In [1]:
from jzset import jz_set
import numpy as np
import pandas as pd
import datetime
jz_data = jz_set.data_set(mode='local',
                         jz_license='eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjUyNTYzMTg1MTcuOTk3MzM3LCJpYXQiOjE2NTYzMTg1MTcuOTk3MzM2OSwiaXNzIjoianoiLCJkYXRhIjp7InVzZXJuYW1lIjoiZGFpbWluZ3podW8iLCJ0aW1lc3RhbXAiOjE2NTYzMTg1MTcuOTk3MzM2OSwicGVybWlzc2lvbiI6WyJtYWNybyIsImZ1dHVyZXMiXSwiaG9zdCI6IjQ3LjEwMy4xMzcuMTI0IiwiZGF0YWJhc2UiOiJkZWZhdWx0IiwidXNlciI6ImRlZmF1bHQiLCJwYXNzd29yZCI6ImRlZmF1bHQifX0.kq7j6UpX0h2yRNGa3w21MAHlWhao9bnmso8lL5KTo54')

jz_dataset: version 220528-1.0.1
Effective time: 3597504116
Permission: ['macro', 'futures']


In [2]:
jz_data.data_dictionary()[['JzCode', 'FullName']]

Unnamed: 0,JzCode,FullName
0,JZ2000027,2010 jz期货分钟数据
1,JZ2000028,2011 jz期货分钟数据
2,JZ2000029,2012 jz期货分钟数据
3,JZ2000030,2013 jz期货分钟数据
4,JZ2000031,2014 jz期货分钟数据
...,...,...
136,JZ0001068,美债十年期实际利率
137,JZ0001070,美债5年期实际利率
138,JZ0001071,"美国国债收益率:1m,3m,6m,1y,2y,3y,5y,7y,10y,20y,30y"
139,JZ0001034,美国标准普尔500波动率指数(VIX)


In [37]:
def get_jz_code(a_datetime):
    a_year = a_datetime.year
    code_num = a_year - 2010 + 40
    return 'JZ20000{}'.format(code_num)


def get_1mindata_allcontract(inst, date_str):
    date_datetime = datetime.datetime.strptime(date_str, '%Y%m%d')
    res = jz_data.bar_data(product=inst,
                           jz_code=get_jz_code(date_datetime),
                           start_date=date_str,
                           end_date=date_str)
    return res


# 由于部分合约名称的数字会少一位2，因此处理这些合约名称时，需要补上一个2
def getContract(a_str, date_str):
    contract = "".join(filter(str.isdigit, a_str))
    if len(contract) < 4:
        year_month_str = date_str[:6]
        for add_year in range(0, 10):
            tmp_contract = '20{}{}'.format(add_year, contract)
            if tmp_contract > year_month_str:
                contract = '{}{}'.format(add_year, contract)
                break
    inst = "".join(filter(str.isalpha, a_str)).upper()
    return inst + contract


def handleData(df, date_str):
    df = df.loc[:, ['Code', 'TradeDate', 'TradeTime', 'OpenPrice', 'HighPrice',
                    'LowPrice', 'ClosePrice', 'TradeVolume', 'OpenInterest', 'Turnover']]
    df.sort_index(inplace=True)
    overflow_flag = (np.abs(df['TradeVolume']) > 2e8)
    df['TradeVolume'].loc[overflow_flag] = np.nan
    df['Turnover'].loc[overflow_flag] = np.nan
    df['Code'] = df['Code'].apply(lambda code: getContract(code, date_str))


    df['TradeTime'] = df['TradeTime'].apply(lambda a_str: datetime.datetime.strptime(a_str, '%Y-%m-%d %H:%M:%S').time())
    priceCol = ['OpenPrice', 'HighPrice', 'LowPrice', 'ClosePrice']
    for col in priceCol:
        df[col] = np.where(df[col] == 0, np.nan, df[col])
    return df

In [69]:
res = get_1mindata_allcontract('AG', '20220510')
res

Unnamed: 0,Code,Exchange,TradeDate,TradeTime,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,Product
0,AG2205,,2022-05-10,2022-05-09 21:01:00,4814,4814,4814,4814,0,3518,0,AG
4446,AG2301,,2022-05-10,2022-05-09 21:07:00,4849,4849,4849,4849,0,6159,0,AG
4445,AG2301,,2022-05-10,2022-05-09 21:06:00,4849,4849,4849,4849,1,6159,72735,AG
4444,AG2301,,2022-05-10,2022-05-09 21:05:00,4850,4851,4850,4851,1,6158,72765,AG
4443,AG2301,,2022-05-10,2022-05-09 21:04:00,4850,4850,4850,4850,0,6157,0,AG
...,...,...,...,...,...,...,...,...,...,...,...,...
2214,AG2208,,2022-05-10,2022-05-10 14:55:00,4814,4815,4814,4815,20,65608,1444485,AG
2213,AG2208,,2022-05-10,2022-05-10 14:54:00,4813,4814,4813,4814,21,65614,1516305,AG
2212,AG2208,,2022-05-10,2022-05-10 14:53:00,4811,4813,4811,4813,19,65611,1371570,AG
2496,AG2209,,2022-05-10,2022-05-10 01:37:00,4816,4816,4815,4815,25,34919,1805625,AG


In [70]:
tmp = handleData(res, '20220510')

In [71]:
tmp

Unnamed: 0,Code,TradeDate,TradeTime,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover
0,AG2205,2022-05-10,21:01:00,4814,4814,4814,4814,0,3518,0
1,AG2205,2022-05-10,21:02:00,4768,4773,4768,4773,8,3512,572610
2,AG2205,2022-05-10,21:03:00,4773,4773,4773,4773,4,3508,286380
3,AG2205,2022-05-10,21:04:00,4773,4773,4773,4773,0,3508,0
4,AG2205,2022-05-10,21:05:00,4773,4773,4773,4773,0,3508,0
...,...,...,...,...,...,...,...,...,...,...
6655,AG2304,2022-05-10,14:56:00,4891,4891,4891,4891,0,2201,0
6656,AG2304,2022-05-10,14:57:00,4891,4891,4891,4891,0,2201,0
6657,AG2304,2022-05-10,14:58:00,4891,4891,4891,4891,0,2201,0
6658,AG2304,2022-05-10,14:59:00,4891,4891,4891,4891,0,2201,0


## 和获取1天的数据不一样，我们需要在这里就直接downsample

In [72]:
import datetime
STARTTIME1 = datetime.time(21, 0, 0)
ENDTIME1 = datetime.time(2, 30, 0)
STARTTIME2 = datetime.time(9, 0, 0)
ENDTIME2 = datetime.time(11, 30, 0)
STARTTIME3 = datetime.time(13, 30, 0)
ENDTIME3 = datetime.time(15, 0, 0)


# 降频的方式有多种，这里必须预留好给其他降频方式的改动

def part_sign(a_time):
    if (a_time <= ENDTIME1) or (a_time >= STARTTIME1):
        return 1
    elif (a_time >= STARTTIME2) and (a_time <= ENDTIME2):
        return 2
    elif (a_time >= STARTTIME3) and (a_time <= ENDTIME3):
        return 3
    else:
        return 4


# 在groupby的过程中需谨记：有的日子是没有夜盘的，也就是在groupby的过程中会少一栏1
# 不使用第一个价格作为Open, 而是使用第一分钟的收盘价作为Open
def dowm_sample_3part_all_inst(data):
    data['time_flag'] = data['TradeTime'].apply(part_sign)
    error_mask = (data['time_flag'] == 4)
    data = data.loc[~error_mask, :]

    data_new = data.groupby(['Code','time_flag']).agg({
                                                    'TradeDate': 'first',
                                                    'OpenPrice': 'first',
                                                    'HighPrice': np.nanmax,
                                                    'LowPrice': np.nanmin,
                                                    'ClosePrice': 'last',
                                                    'TradeVolume': np.nansum,
                                                    'OpenInterest': 'last',
                                                    'Turnover': np.nansum})

    data_new['StableOpenPrice'] = data.groupby(['Code','time_flag']).agg({'ClosePrice':'first'})

    return data_new


def fill_index(data_new):
    new_index = data_new.index.levels[0]
    new_index = pd.MultiIndex.from_product([new_index, [1, 2, 3]], names=['Code', 'time_flag'])
    data_new = data_new.reindex(new_index)
    data_new['TradeDate'] = data_new['TradeDate'].fillna(method='backfill')
    return data_new

In [73]:
dowm_sample_3part_all_inst(tmp)

Unnamed: 0_level_0,Unnamed: 1_level_0,TradeDate,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,StableOpenPrice
Code,time_flag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AG2205,1,2022-05-10,4814,4814,4768,4787,424,3102,30538230,4814
AG2205,2,2022-05-10,4787,4803,4757,4788,110,3048,7884570,4757
AG2205,3,2022-05-10,4788,4790,4780,4782,78,3024,5599530,4788
AG2206,1,2022-05-10,4777,4827,4761,4765,311879,357488,22418864115,4768
AG2206,2,2022-05-10,4765,4819,4760,4804,114513,344081,8225708490,4761
AG2206,3,2022-05-10,4812,4812,4793,4801,57372,337472,4130850840,4807
AG2207,1,2022-05-10,4798,4833,4771,4772,19441,86810,1400152020,4776
AG2207,2,2022-05-10,4772,4825,4769,4809,8906,88132,640367370,4770
AG2207,3,2022-05-10,4814,4815,4799,4808,2823,87817,203531295,4813
AG2208,1,2022-05-10,4800,4842,4782,4786,19578,66087,1412876205,4786


In [74]:
tm1 = dowm_sample_3part_all_inst(tmp)

In [75]:
fill_index(tm1)

Unnamed: 0_level_0,Unnamed: 1_level_0,TradeDate,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,StableOpenPrice
Code,time_flag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AG2205,1,2022-05-10,4814,4814,4768,4787,424,3102,30538230,4814
AG2205,2,2022-05-10,4787,4803,4757,4788,110,3048,7884570,4757
AG2205,3,2022-05-10,4788,4790,4780,4782,78,3024,5599530,4788
AG2206,1,2022-05-10,4777,4827,4761,4765,311879,357488,22418864115,4768
AG2206,2,2022-05-10,4765,4819,4760,4804,114513,344081,8225708490,4761
AG2206,3,2022-05-10,4812,4812,4793,4801,57372,337472,4130850840,4807
AG2207,1,2022-05-10,4798,4833,4771,4772,19441,86810,1400152020,4776
AG2207,2,2022-05-10,4772,4825,4769,4809,8906,88132,640367370,4770
AG2207,3,2022-05-10,4814,4815,4799,4808,2823,87817,203531295,4813
AG2208,1,2022-05-10,4800,4842,4782,4786,19578,66087,1412876205,4786


In [None]:
# 接下来，对上面的数据进行carry的计算