In [1]:
from jzset import jz_set
import numpy as np
import pandas as pd
import datetime
jz_data = jz_set.data_set(mode='local',
                         jz_license='eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjUyNTYzMTg1MTcuOTk3MzM3LCJpYXQiOjE2NTYzMTg1MTcuOTk3MzM2OSwiaXNzIjoianoiLCJkYXRhIjp7InVzZXJuYW1lIjoiZGFpbWluZ3podW8iLCJ0aW1lc3RhbXAiOjE2NTYzMTg1MTcuOTk3MzM2OSwicGVybWlzc2lvbiI6WyJtYWNybyIsImZ1dHVyZXMiXSwiaG9zdCI6IjQ3LjEwMy4xMzcuMTI0IiwiZGF0YWJhc2UiOiJkZWZhdWx0IiwidXNlciI6ImRlZmF1bHQiLCJwYXNzd29yZCI6ImRlZmF1bHQifX0.kq7j6UpX0h2yRNGa3w21MAHlWhao9bnmso8lL5KTo54')

jz_dataset: version 220528-1.0.1
Effective time: 3597403090
Permission: ['macro', 'futures']


In [2]:
jz_data.data_dictionary()[['JzCode', 'FullName']]

Unnamed: 0,JzCode,FullName
0,JZ2000027,2010 jz期货分钟数据
1,JZ2000028,2011 jz期货分钟数据
2,JZ2000029,2012 jz期货分钟数据
3,JZ2000030,2013 jz期货分钟数据
4,JZ2000031,2014 jz期货分钟数据
...,...,...
136,JZ0001068,美债十年期实际利率
137,JZ0001070,美债5年期实际利率
138,JZ0001071,"美国国债收益率:1m,3m,6m,1y,2y,3y,5y,7y,10y,20y,30y"
139,JZ0001034,美国标准普尔500波动率指数(VIX)


In [3]:
def get_jz_code(a_datetime):
    a_year = a_datetime.year
    code_num = a_year - 2010 + 40
    return 'JZ20000{}'.format(code_num)


def get_1mindata_allcontract(inst, date_str):
    date_datetime = datetime.datetime.strptime(date_str, '%Y%m%d')
    res = jz_data.bar_data(product=inst,
                           jz_code=get_jz_code(date_datetime),
                           start_date=date_str,
                           end_date=date_str)
    return res


# 由于部分合约名称的数字会少一位2，因此处理这些合约名称时，需要补上一个2
def getContract(a_str, date_str):
    contract = "".join(filter(str.isdigit, a_str))
    if len(contract) < 4:
        year_month_str = date_str[:6]
        for add_year in range(0, 10):
            tmp_contract = '20{}{}'.format(add_year, contract)
            if tmp_contract > year_month_str:
                contract = '{}{}'.format(add_year, contract)
                break
    inst = "".join(filter(str.isalpha, a_str)).upper()
    return inst + contract


def handleData(df, date_str):
    df = df.loc[:, ['Code', 'TradeDate', 'TradeTime', 'OpenPrice', 'HighPrice',
                    'LowPrice', 'ClosePrice', 'TradeVolume', 'OpenInterest', 'Turnover']]
    df.sort_index(inplace=True)
    overflow_flag = (np.abs(df['TradeVolume']) > 2e8)
    df['TradeVolume'].loc[overflow_flag] = np.nan
    df['Turnover'].loc[overflow_flag] = np.nan
    df['Code'] = df['Code'].apply(lambda code: getContract(code, date_str))


    df['TradeTime'] = df['TradeTime'].apply(lambda a_str: datetime.datetime.strptime(a_str, '%Y-%m-%d %H:%M:%S').time())
    priceCol = ['OpenPrice', 'HighPrice', 'LowPrice', 'ClosePrice']
    for col in priceCol:
        df[col] = np.where(df[col] == 0, np.nan, df[col])
    return df

In [4]:
res = get_1mindata_allcontract('P', '20220624')
res

Unnamed: 0,Code,Exchange,TradeDate,TradeTime,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,Product
0,P2207,,2022-06-24,2022-06-23 21:01:00,10366,10400,10350,10396,109,6055,11312080,P
2751,P2302,,2022-06-24,2022-06-24 14:52:00,9022,9022,9022,9022,0,90,0,P
2752,P2302,,2022-06-24,2022-06-24 14:53:00,9022,9022,9022,9022,0,90,0,P
2753,P2302,,2022-06-24,2022-06-24 14:54:00,9022,9022,9022,9022,0,90,0,P
2754,P2302,,2022-06-24,2022-06-24 14:55:00,9022,9022,9022,9022,0,90,0,P
...,...,...,...,...,...,...,...,...,...,...,...,...
1386,P2211,,2022-06-24,2022-06-23 21:07:00,9526,9548,9526,9540,150,32981,14308020,P
1387,P2211,,2022-06-24,2022-06-23 21:08:00,9540,9542,9528,9528,33,32995,3147140,P
1388,P2211,,2022-06-24,2022-06-23 21:09:00,9528,9530,9528,9530,13,33006,1238900,P
1358,P2210,,2022-06-24,2022-06-24 14:39:00,9086,9086,9062,9074,65,44219,5894620,P


In [5]:
tmp = handleData(res, '20220510')

In [6]:
tmp

Unnamed: 0,Code,TradeDate,TradeTime,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover
0,P2207,2022-06-24,21:01:00,10366,10400,10350,10396,109,6055,11312080
1,P2207,2022-06-24,21:02:00,10386,10400,10352,10370,92,6030,9552200
2,P2207,2022-06-24,21:03:00,10370,10396,10370,10380,18,6026,1869460
3,P2207,2022-06-24,21:04:00,10380,10380,10342,10346,23,6024,2381720
4,P2207,2022-06-24,21:05:00,10346,10360,10346,10360,8,6025,828700
...,...,...,...,...,...,...,...,...,...,...
4135,P2306,2022-06-24,14:56:00,9046,9046,9046,9046,0,13,0
4136,P2306,2022-06-24,14:57:00,9046,9046,9046,9046,0,13,0
4137,P2306,2022-06-24,14:58:00,9046,9046,9046,9046,0,13,0
4138,P2306,2022-06-24,14:59:00,9046,9046,9046,9046,0,13,0


## 和获取1天的数据不一样，我们需要在这里就直接downsample

In [7]:
import datetime
STARTTIME1 = datetime.time(21, 0, 0)
ENDTIME1 = datetime.time(2, 30, 0)
STARTTIME2 = datetime.time(9, 0, 0)
ENDTIME2 = datetime.time(11, 30, 0)
STARTTIME3 = datetime.time(13, 30, 0)
ENDTIME3 = datetime.time(15, 0, 0)


# 降频的方式有多种，这里必须预留好给其他降频方式的改动

def part_sign(a_time):
    if (a_time <= ENDTIME1) or (a_time >= STARTTIME1):
        return 1
    elif (a_time >= STARTTIME2) and (a_time <= ENDTIME2):
        return 2
    elif (a_time >= STARTTIME3) and (a_time <= ENDTIME3):
        return 3
    else:
        return 4


# 在groupby的过程中需谨记：有的日子是没有夜盘的，也就是在groupby的过程中会少一栏1
# 不使用第一个价格作为Open, 而是使用第一分钟的收盘价作为Open
def dowm_sample_3part_all_inst(data):
    data['time_flag'] = data['TradeTime'].apply(part_sign)
    error_mask = (data['time_flag'] == 4)
    data = data.loc[~error_mask, :]

    data_new = data.groupby(['Code','time_flag']).agg({
                                                    'TradeDate': 'first',
                                                    'OpenPrice': 'first',
                                                    'HighPrice': np.nanmax,
                                                    'LowPrice': np.nanmin,
                                                    'ClosePrice': 'last',
                                                    'TradeVolume': np.nansum,
                                                    'OpenInterest': 'last',
                                                    'Turnover': np.nansum})

    data_new['StableOpenPrice'] = data.groupby(['Code','time_flag']).agg({'ClosePrice':'first'})

    return data_new


def fill_index(data_new):
    new_index = data_new.index.levels[0]
    new_index = pd.MultiIndex.from_product([new_index, [1, 2, 3]], names=['Code', 'time_flag'])
    data_new = data_new.reindex(new_index)
    data_new['TradeDate'] = data_new['TradeDate'].fillna(method='backfill')
    return data_new

In [8]:
dowm_sample_3part_all_inst(tmp)

Unnamed: 0_level_0,Unnamed: 1_level_0,TradeDate,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,StableOpenPrice
Code,time_flag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P2207,1,2022-06-24,10366,10498,10176,10268,1697,5956,175476400,10396
P2207,2,2022-06-24,10268,10268,9606,9652,2345,5615,230808840,10182
P2207,3,2022-06-24,9652,9862,9600,9648,1578,5390,152932740,9706
P2208,1,2022-06-24,9900,10082,9770,9836,9604,49348,953408420,9894
P2208,2,2022-06-24,9736,9788,9216,9276,15300,47545,1451667160,9748
P2208,3,2022-06-24,9300,9502,9200,9322,6502,47530,607147620,9344
P2209,1,2022-06-24,9616,9824,9516,9562,517449,358682,49965305420,9642
P2209,2,2022-06-24,9502,9514,9030,9090,570398,368481,52852824180,9496
P2209,3,2022-06-24,9120,9288,9012,9108,376981,360648,34386567620,9174
P2210,1,2022-06-24,9570,9752,9458,9504,5266,43478,506456400,9592


In [9]:
tm1 = dowm_sample_3part_all_inst(tmp)

In [10]:
q = fill_index(tm1)
q

Unnamed: 0_level_0,Unnamed: 1_level_0,TradeDate,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,StableOpenPrice
Code,time_flag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P2207,1,2022-06-24,10366,10498,10176,10268,1697,5956,175476400,10396
P2207,2,2022-06-24,10268,10268,9606,9652,2345,5615,230808840,10182
P2207,3,2022-06-24,9652,9862,9600,9648,1578,5390,152932740,9706
P2208,1,2022-06-24,9900,10082,9770,9836,9604,49348,953408420,9894
P2208,2,2022-06-24,9736,9788,9216,9276,15300,47545,1451667160,9748
P2208,3,2022-06-24,9300,9502,9200,9322,6502,47530,607147620,9344
P2209,1,2022-06-24,9616,9824,9516,9562,517449,358682,49965305420,9642
P2209,2,2022-06-24,9502,9514,9030,9090,570398,368481,52852824180,9496
P2209,3,2022-06-24,9120,9288,9012,9108,376981,360648,34386567620,9174
P2210,1,2022-06-24,9570,9752,9458,9504,5266,43478,506456400,9592


In [11]:
# 接下来，对上面的数据进行carry的计算
def splitdf(df):
    idx = pd.IndexSlice
    index_level_1 = df.index.levels[1]
    res = []
    for index_1 in index_level_1:
        res.append(df.loc[idx[:, index_1],:])
    return res

In [12]:
ss = splitdf(tm1)[0]
ss 

Unnamed: 0_level_0,Unnamed: 1_level_0,TradeDate,OpenPrice,HighPrice,LowPrice,ClosePrice,TradeVolume,OpenInterest,Turnover,StableOpenPrice
Code,time_flag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P2207,1,2022-06-24,10366,10498,10176,10268,1697,5956,175476400,10396
P2208,1,2022-06-24,9900,10082,9770,9836,9604,49348,953408420,9894
P2209,1,2022-06-24,9616,9824,9516,9562,517449,358682,49965305420,9642
P2210,1,2022-06-24,9570,9752,9458,9504,5266,43478,506456400,9592
P2211,1,2022-06-24,9556,9726,9434,9476,5937,33819,569523200,9564
P2212,1,2022-06-24,9506,9694,9400,9442,3986,12897,380145100,9506
P2301,1,2022-06-24,9462,9654,9366,9400,33904,80871,3222803600,9498
P2302,1,2022-06-24,9418,9598,9418,9598,2,91,191960,9418
P2303,1,2022-06-24,9416,9622,9390,9390,4,135,379380,9416
P2304,1,2022-06-24,9382,9520,9382,9476,5,100,474060,9382


In [128]:
def calcCarry(df):
    # 首先找到主力合约
    max_turnover = df['Turnover'].max()
    thresh_turnover = max_turnover/20
    df = df.loc[df['Turnover']>thresh_turnover, :].sort_values('Turnover', ascending=False)
    df = df.iloc[:4,:]
    if len(df) == 1:
        return [np.nan for _ in range(3)], [np.nan for _ in range(3)]
    close_price_arr = df['ClosePrice'].to_numpy().astype(float)
    res_price = list(close_price_arr[1:]/close_price_arr[0] -1)
    month_arr = df.index.get_level_values(0).to_numpy()
    res_month_diff = [diff_month(month_arr[i], month_arr[0]) for i in range(1, len(month_arr))]
    while len(res_price)<3:
        res_price.append(np.nan)
        res_month_diff.append(np.nan)
    return res_price, res_month_diff
    

In [129]:
calcCarry(ss)

([0.009653725078698772, 0.004407135362014758, 0.001469045120671586], [6, 2, 1])

In [132]:
def calcCarryTotal(df):
    part_df_list = splitdf(df)
    res = {}
    for idx, part_df in enumerate(part_df_list):
        ratio, month = calcCarry(part_df)
        res[idx] = ratio + month
    res_df = pd.DataFrame(res).T
    date = df['TradeDate'].iloc[0]
    new_index = pd.MultiIndex.from_product([[date], res_df.index])
    res_df.index = new_index
    res_df.columns = ['ratio_0', 'ratio_1', 'ratio_2', 'month_0', 'month_1', 'month_2']
    return res_df
        

In [133]:
calcCarryTotal(q)

Unnamed: 0,Unnamed: 1,ratio_0,ratio_1,ratio_2,month_0,month_1,month_2
2022-05-10,0,0.009654,0.004407,0.001469,6.0,2.0,1.0
2022-05-10,1,0.007494,0.001041,0.002914,6.0,1.0,2.0
2022-05-10,2,0.008332,,,6.0,,


In [16]:
def diff_month(d1, d2):
    d1 = d1[-4:]
    d2 = d2[-4:]
    d1_year, d1_month = int(d1[:2]), int(d1[2:])
    d2_year, d2_month = int(d2[:2]), int(d2[2:])
    return (d1_year - d2_year) * 12 + d1_month - d2_month
    

In [17]:
diff_month('P2209', 'P2301')

-4

In [121]:
[1,2,3] + [2,3,4]

[1, 2, 3, 2, 3, 4]

In [123]:
pd.DataFrame({1:[2,3,4,5],2:[4,5,6,7]}).T

Unnamed: 0,0,1,2,3
1,2,3,4,5
2,4,5,6,7
