In [1]:
import numpy as np
import pandas as pd
from decimal import ROUND_HALF_UP, Decimal


In [2]:
stock_list = pd.read_csv('ds/stock_list.csv')
supplemental_stock_prices = pd.read_csv('ds/supplemental_files/stock_prices.csv')
train_stock_prices = pd.read_csv('ds/train_files/stock_prices.csv')
test_stock_prices = pd.read_csv('ds/example_test_files/stock_prices.csv')

In [3]:
def calc_rate_of_change(current_value,old_value):
    return ((current_value - old_value)/old_value)

In [4]:
def create_base_long_short(prices):
    if(prices == "train"):
        model =  train_stock_prices.copy()
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model=model[model["Date"]!="2020-10-01"]
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["Previous Daily Return"].apply(lambda x: x.fillna(x.mean()))
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2017-01-04"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model
    elif(prices=="supplemental"):
        model =  pd.concat([train_stock_prices[train_stock_prices["Date"]=="2021-12-03"].copy(),supplemental_stock_prices.copy()])
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2021-12-03"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model
    else:
        model = pd.concat([train_stock_prices[train_stock_prices["Date"]=="2021-12-03"].copy(),test_stock_prices.copy()])
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        model["Closing Price after 1 day"] = model["adjusted_close"].shift(-1)
        model["Closing Price after 2 days"] = model["adjusted_close"].shift(-2)
        model["Target"] = model.apply( lambda x: calc_rate_of_change(x["Closing Price after 2 days"],x["Closing Price after 1 day"]),axis=1)
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2021-12-03"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model

In [5]:
def calc_adjusted_close(df):
    df = df.sort_values("Date",ascending=False)
    df.loc[:,"cummulative_adjustment_factor"] = df["AdjustmentFactor"].cumprod()
    df.loc[:,"adjusted_close"] = (df["cummulative_adjustment_factor"]*df["Close"]).map(lambda x: float(Decimal(str(x)).quantize(Decimal("0.1"),rounding=ROUND_HALF_UP)))
    df = df.sort_values("Date")
    df.loc[df["adjusted_close"]==0,"adjusted_close"] = np.nan
    df.loc[:,"adjusted_close"] = df.loc[:,"adjusted_close"].ffill()
    return df

In [6]:
train_stock_prices["Date"] = pd.to_datetime(train_stock_prices["Date"])
test_stock_prices["Date"] = pd.to_datetime(test_stock_prices["Date"])
supplemental_stock_prices["Date"] = pd.to_datetime(supplemental_stock_prices["Date"])

In [7]:
train_stock_prices = train_stock_prices.drop(["RowId"],axis=1)
test_stock_prices = test_stock_prices.drop(["RowId"],axis=1)
supplemental_stock_prices = supplemental_stock_prices.drop(["RowId"],axis=1)

In [8]:
train_stock_prices.sort_values("Date",ascending=False)

Unnamed: 0,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
2332530,2021-12-03,9997,690.0,711.0,686.0,696.0,381100,1.0,,False,0.018414
2331201,2021-12-03,4526,1587.0,1640.0,1577.0,1638.0,58600,1.0,,False,0.032297
2331188,2021-12-03,4488,6400.0,6700.0,6300.0,6700.0,52200,1.0,,False,-0.011688
2331189,2021-12-03,4490,5330.0,5520.0,5240.0,5520.0,85700,1.0,,False,0.094961
2331190,2021-12-03,4493,1913.0,1984.0,1899.0,1974.0,173800,1.0,,False,0.050183
...,...,...,...,...,...,...,...,...,...,...,...
1248,2017-01-04,7315,465.0,494.0,465.0,493.0,41100,1.0,,False,-0.004032
1249,2017-01-04,7408,2459.0,2518.0,2447.0,2500.0,110900,1.0,,False,0.004421
1250,2017-01-04,7412,719.0,725.0,719.0,721.0,201400,1.0,,False,0.000000
1251,2017-01-04,7414,1360.0,1400.0,1360.0,1399.0,33700,1.0,,False,-0.007153


In [9]:
train_stock_prices = train_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)
test_stock_prices = test_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)
supplemental_stock_prices = supplemental_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)

In [10]:
standard_mkt_cap = create_base_long_short('supplemental')
standard_mkt_cap = standard_mkt_cap.join(stock_list[["SecuritiesCode","MarketCapitalization"]].set_index("SecuritiesCode"),on="SecuritiesCode")
standard_mkt_cap["mkt_cap_rank"] = standard_mkt_cap.groupby(["Date"])["MarketCapitalization"].rank(method="first")-1
standard_mkt_cap["is_not_small_cap"] = standard_mkt_cap.groupby(["Date"])["mkt_cap_rank"].apply(lambda x: ((x/x.max())>=0.3).astype(int))
standard_mkt_cap["above_median_small_cap_rank"]=standard_mkt_cap.groupby(["Date"]).apply(lambda x: x["Rank"]>x[x["is_not_small_cap"]==0]["Rank"].median()).astype(int).values
standard_mkt_cap.loc[(standard_mkt_cap["above_median_small_cap_rank"]==1)&(standard_mkt_cap["is_not_small_cap"]==0),["is_not_small_cap"]]=3
standard_mkt_cap_small = standard_mkt_cap.sort_values(["Date","is_not_small_cap"]).reset_index(drop=True)
standard_mkt_cap_small["Rank"]=standard_mkt_cap_small.groupby(["Date"])["Rank"].transform(lambda x: np.linspace(x.min(),x.max(),x.count()))
standard_mkt_cap_small = standard_mkt_cap_small.set_index(["Date","SecuritiesCode"])
standard_mkt_cap_small["Rank"] = standard_mkt_cap_small["Rank"].astype("int")


In [11]:
standard_mkt_cap_small

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,cummulative_adjustment_factor,adjusted_close,daily_return,Previous Daily Return,Rank,MarketCapitalization,mkt_cap_rank,is_not_small_cap,above_median_small_cap_rank
Date,SecuritiesCode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-12-06,6378,949.0,962.0,914.0,930.0,759700,1.0,,False,0.027689,1.000000,930.0,-0.020021,-0.079612,0,1.744820e+10,141.0,0,0
2021-12-06,7065,2605.0,2605.0,2504.0,2545.0,12400,1.0,,False,0.006255,1.000000,2545.0,-0.023033,-0.047184,1,2.089648e+10,275.0,0,0
2021-12-06,4978,228.0,231.0,221.0,221.0,2061400,1.0,,False,0.004464,1.000000,221.0,-0.030702,-0.046025,2,1.918279e+10,192.0,0,0
2021-12-06,1930,1100.0,1105.0,1075.0,1095.0,13500,1.0,,False,-0.017548,0.833333,912.5,-0.170455,-0.033913,3,2.279760e+10,334.0,0,0
2021-12-06,4026,2600.0,2673.0,2505.0,2641.0,67600,1.0,,False,0.027362,1.000000,2641.0,0.015769,-0.031950,4,2.204664e+10,312.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-28,4310,1958.0,1969.0,1898.0,1961.0,26000,1.0,,False,0.022200,1.000000,1961.0,0.001532,0.067517,1993,1.649947e+10,98.0,3,1
2022-04-28,9932,2123.0,2192.0,2123.0,2182.0,12000,1.0,,False,0.035829,1.000000,2182.0,0.027791,0.070717,1994,2.770015e+10,484.0,3,1
2022-04-28,7726,2258.0,2348.0,2156.0,2160.0,102100,1.0,,False,0.022449,1.000000,2160.0,-0.043401,0.074519,1995,1.652092e+10,99.0,3,1
2022-04-28,6848,920.0,940.0,896.0,902.0,23400,1.0,,False,0.008484,1.000000,902.0,-0.019565,0.079545,1996,1.759435e+10,143.0,3,1


In [12]:
standard_mkt_cap_small[standard_mkt_cap_small.index.get_level_values('SecuritiesCode')==9441]

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,cummulative_adjustment_factor,adjusted_close,daily_return,Previous Daily Return,Rank,MarketCapitalization,mkt_cap_rank,is_not_small_cap,above_median_small_cap_rank
Date,SecuritiesCode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-12-06,9441,8140.0,8300.0,8020.0,8090.0,800,1.0,,False,-0.014833,0.333333,2696.7,-0.668710,0.000000,93,1.728258e+10,131.0,0,0
2021-12-07,9441,8090.0,8090.0,7960.0,8090.0,600,1.0,,False,-0.011292,0.333333,2696.7,-0.666663,-0.668710,0,1.728258e+10,131.0,0,0
2021-12-08,9441,8090.0,8090.0,7960.0,7970.0,600,1.0,,False,0.019035,0.333333,2656.7,-0.671607,-0.666663,0,1.728258e+10,131.0,0,0
2021-12-09,9441,7880.0,7880.0,7880.0,7880.0,100,1.0,,False,0.003736,0.333333,2626.7,-0.666662,-0.671607,0,1.728258e+10,131.0,0,0
2021-12-10,9441,8030.0,8030.0,8030.0,8030.0,100,1.0,,False,0.000000,0.333333,2676.7,-0.666663,-0.666662,1,1.728258e+10,131.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-22,9441,1898.0,1898.0,1890.0,1890.0,600,1.0,,False,-0.021053,1.000000,1890.0,-0.004215,0.004664,1759,1.728258e+10,131.0,3,1
2022-04-25,9441,1930.0,1949.0,1890.0,1900.0,2000,1.0,,False,0.005376,1.000000,1900.0,-0.015544,-0.004215,228,1.728258e+10,131.0,0,0
2022-04-26,9441,1876.0,1876.0,1860.0,1860.0,500,1.0,,False,0.010695,1.000000,1860.0,-0.008529,-0.015544,65,1.728258e+10,131.0,0,0
2022-04-27,9441,1855.0,1870.0,1855.0,1870.0,1200,1.0,,False,0.000000,1.000000,1870.0,0.008086,-0.008529,134,1.728258e+10,131.0,0,0


In [13]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

In [14]:
calc_spread_return_sharpe(standard_mkt_cap_small)

0.2823306258699462

In [18]:
standard_mkt_cap_small.loc['2021-12-07']

Unnamed: 0_level_0,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,cummulative_adjustment_factor,adjusted_close,daily_return,Previous Daily Return,Rank,MarketCapitalization,mkt_cap_rank,is_not_small_cap,above_median_small_cap_rank
SecuritiesCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
9441,8090.0,8090.0,7960.0,8090.0,600,1.0,,False,-0.011292,0.333333,2696.7,-0.666663,-0.668710,0,1.728258e+10,131.0,0,0
7723,4670.0,4815.0,4670.0,4815.0,2000,1.0,,False,0.002056,0.333333,1605.0,-0.656317,-0.666311,1,2.765320e+10,482.0,0,0
4169,6500.0,6550.0,5870.0,6070.0,496200,1.0,,False,-0.024433,0.500000,3035.0,-0.533077,-0.555712,2,2.934851e+10,530.0,0,0
2987,3465.0,3465.0,3070.0,3120.0,107600,0.5,,False,-0.022837,0.500000,1560.0,-0.549784,-0.527338,3,1.531418e+10,44.0,0,0
2217,5200.0,5300.0,5180.0,5280.0,3200,1.0,,False,0.003824,0.500000,2640.0,-0.492308,-0.516791,4,2.164843e+10,306.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7047,1363.0,1511.0,1336.0,1382.0,703100,1.0,,False,-0.011724,1.000000,1382.0,0.013940,0.043028,1995,1.711219e+10,126.0,3,1
6182,1250.0,1340.0,1223.0,1335.0,710700,1.0,,False,-0.005904,1.000000,1335.0,0.068000,0.054434,1996,1.592254e+10,75.0,3,1
4235,1935.0,1950.0,1864.0,1878.0,38300,1.0,,False,-0.010444,1.000000,1878.0,-0.029457,0.066304,1997,1.486386e+10,17.0,3,1
6912,1450.0,1499.0,1310.0,1394.0,211200,1.0,,False,-0.033582,1.000000,1394.0,-0.038621,0.080330,1998,1.560240e+10,61.0,3,1


In [15]:
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    sample_prediction["Rank"]=sample_prediction.apply(lambda x: standard_mkt_cap_small.loc[(x["Date"],x["SecuritiesCode"])]["Rank"] , axis=1)
    env.predict(sample_prediction)

NameError: name 'iter_test' is not defined