In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
all_contracts = all_instruments("Future")
match = r'\w{1,2}88'
continious_contracts = all_contracts[all_contracts.order_book_id.str.contains(match)]
continious_contracts = continious_contracts.where(continious_contracts["product"] == "Commodity").dropna(how="all")
all_commodity = sorted(set(continious_contracts.underlying_symbol))

In [None]:
continious_contracts.set_index("order_book_id",inplace = True)

In [None]:
continious_contracts.symbol.apply(lambda x : x if "麦" in x else np.nan).dropna()

In [3]:
# 螺纹钢，铁矿石
pairs1 = ["RB","I"]
# 焦煤，焦炭
pairs2 = ["JM","J"]
# 鸡蛋，玉米，豆粕
pairs3 = ["JD","M"]
# ,"C"
# 强麦，硬麦
# pairs4 = ["WH","WT"]
# 银，金
pairs5 = ["AG","AU"]
# 铜，锌
pairs6 = ["CU","ZN"]

In [4]:
def winsorized(series,n = 3):
  median = series.median()
  median_distance = abs(series - median).median()
  return series.clip(median-n*median_distance,median+n*median_distance)


In [5]:
def get_efficient(pairs,start_date,end_date):
  close_price = list(map(lambda x: get_price(x+"88",start_date,end_date,fields="close"),pairs))
#   print(close_price)
  pairs_data = pd.concat([close_price[0],close_price[1]],axis=1).dropna().apply(lambda x:winsorized(x))
  pairs_data.columns = [pairs[0],pairs[1]]
#   print(pairs_data)
  params_coef = sm.OLS(pairs_data.iloc[:,1].values,sm.add_constant(pairs_data.iloc[:,0].values),missing="drop").fit().params
  
  if params_coef[1]>1:
    return {"x": pairs[0],"y":pairs[1],"efficient":int(params_coef[1])}
  params_coef = sm.OLS(pairs_data.iloc[:,0].values,sm.add_constant(pairs_data.iloc[:,1].values),missing="drop").fit().params
  if params_coef[1]>1:
    return {"x": pairs[1],"y":pairs[0],"efficient":int(params_coef[1])}
  
  return {"x": pairs[0],"y":pairs[1],"efficient":1}

In [6]:
def get_backTest_sharpe(pairs,k1,k2,windows):
  pairs_results = get_efficient(pairs,"2013-01-01","2018-01-01")
  y = get_price(pairs_results.get("y")+"88","2013-01-01","2018-01-01",fields="close",frequency='1m')
  x = get_price(pairs_results.get("x")+"88","2013-01-01","2018-01-01",fields="close",frequency='1m')
  spread = y - x*pairs_results.get("efficient")

  spread_avg = spread.rolling(center=False,window=windows).mean()
  spread_std = spread.rolling(center=False,window=windows).std()
  # 上下届
  up_point = spread_avg+k1*spread_std
  down_point = spread_avg-k2*spread_std

  # 卖出价差：1：y 价格 处于高位 ；x价格 处于低位, 买x，卖y
  # 买入价差：-1：x 价格处于高位 ；y价格 处于低位，买y，卖x
  signal_S = (spread > up_point).astype(int)
  signal = (spread < down_point).astype(int).replace([0,1],[np.nan,-1]).fillna(signal_S)

  y_chg = y.pct_change().shift(-1)
  x_chg = x.pct_change().shift(-1)

  combined_data = pd.concat([signal,x_chg,y_chg],axis=1)
  combined_data.columns = ["signal","x_chg","y_chg"]

  # x与y的收益率差， 当期至下一时期的收益率
  combined_data["DIFF"] = (combined_data["x_chg"] - combined_data["y_chg"])
  rets = (combined_data["signal"]*combined_data["DIFF"])
  sharpe = rets.mean()/rets.std()
  return sharpe
  

In [22]:
results = {}
i = 0



In [None]:

k1_list = [2,3]
k2_list = [2,3]
windows_list = np.arange(20,90,10)
pairs_list = {"pairs1":pairs1,"pairs2":pairs2,"pairs3":pairs3,"pairs5":pairs5,"pairs6":pairs6}
# pairs_list = {"pairs6":pairs6}



for k1 in k1_list:
  for k2 in k2_list:
    for pairs in pairs_list.keys():
      for windows in windows_list:
        print(i,k1,k2,pairs,windows)
        sharpe = get_backTest_sharpe(pairs_list.get(pairs),k1,k2,windows)
        res = pd.Series([sharpe,k1,k2,pairs,windows],index = ["sharpe","k1","k2","pairs","windows"])
        results[i] = res
        i+=1


      


In [41]:
pairs_results = pd.DataFrame(results).T.set_index(["pairs","windows"]).sort_index()

In [42]:
pairs_results.sort_values(by="sharpe",ascending=False).xs("pairs5",level=0)

Unnamed: 0_level_0,sharpe,k1,k2
windows,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20,0.0578389,2,2
30,0.0565234,2,2
40,0.0542765,2,2
50,0.0509585,2,2
60,0.0479434,2,2
70,0.0465964,2,2
20,0.045608,3,2
30,0.0442899,3,2
40,0.0441258,3,2
80,0.0439753,2,2
