In [1]:
import numpy as np
import pandas as pd
from decimal import ROUND_HALF_UP, Decimal


In [3]:
stock_list = pd.read_csv('ds/stock_list.csv')
supplemental_stock_prices = pd.read_csv('ds/supplemental_files/stock_prices.csv')
train_stock_prices = pd.read_csv('ds/train_files/stock_prices.csv')
test_stock_prices = pd.read_csv('ds/example_test_files/stock_prices.csv')

In [4]:
def calc_rate_of_change(current_value,old_value):
    return ((current_value - old_value)/old_value)

In [5]:
def create_base_long_short(prices):
    if(prices == "train"):
        model =  train_stock_prices.copy()
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model=model[model["Date"]!="2020-10-01"]
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["Previous Daily Return"].apply(lambda x: x.fillna(x.mean()))
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2017-01-04"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model
    elif(prices=="supplemental"):
        model =  pd.concat([train_stock_prices[train_stock_prices["Date"]=="2021-12-03"].copy(),supplemental_stock_prices.copy()])
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2021-12-03"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model
    else:
        model = pd.concat([train_stock_prices[train_stock_prices["Date"]=="2021-12-03"].copy(),test_stock_prices.copy()])
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        model["Closing Price after 1 day"] = model["adjusted_close"].shift(-1)
        model["Closing Price after 2 days"] = model["adjusted_close"].shift(-2)
        model["Target"] = model.apply( lambda x: calc_rate_of_change(x["Closing Price after 2 days"],x["Closing Price after 1 day"]),axis=1)
        model["daily_return"] = (model["adjusted_close"]-model["Open"])/model["Open"]
        #Open high low close not available
        model["daily_return"] = model.groupby(["Date"])["daily_return"].apply(lambda x: x.fillna(x.mean()))
        model["Previous Daily Return"] =model.groupby(["SecuritiesCode"])["daily_return"].shift(1)
        model["Rank"] = model.groupby(["Date"])["Previous Daily Return"].rank(method="first")-1
        #Remove first day since no previous daily returns
        model = model[model["Date"]!="2021-12-03"]
        model = model.sort_values(["Date","Rank"]).reset_index(drop=True)
        return model

In [6]:
def calc_adjusted_close(df):
    df = df.sort_values("Date",ascending=False)
    df.loc[:,"cummulative_adjustment_factor"] = df["AdjustmentFactor"].cumprod()
    df.loc[:,"adjusted_close"] = (df["cummulative_adjustment_factor"]*df["Close"]).map(lambda x: float(Decimal(str(x)).quantize(Decimal("0.1"),rounding=ROUND_HALF_UP)))
    df = df.sort_values("Date")
    df.loc[df["adjusted_close"]==0,"adjusted_close"] = np.nan
    df.loc[:,"adjusted_close"] = df.loc[:,"adjusted_close"].ffill()
    return df

In [7]:
train_stock_prices["Date"] = pd.to_datetime(train_stock_prices["Date"])
test_stock_prices["Date"] = pd.to_datetime(test_stock_prices["Date"])
supplemental_stock_prices["Date"] = pd.to_datetime(supplemental_stock_prices["Date"])

In [8]:
train_stock_prices = train_stock_prices.drop(["RowId"],axis=1)
test_stock_prices = test_stock_prices.drop(["RowId"],axis=1)
supplemental_stock_prices = supplemental_stock_prices.drop(["RowId"],axis=1)

In [9]:
train_stock_prices = train_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)
test_stock_prices = test_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)
supplemental_stock_prices = supplemental_stock_prices.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)

In [10]:
standard_mkt_cap = create_base_long_short('supplemental')
standard_mkt_cap = standard_mkt_cap.join(stock_list[["SecuritiesCode","MarketCapitalization"]].set_index("SecuritiesCode"),on="SecuritiesCode")
standard_mkt_cap["mkt_cap_rank"] = standard_mkt_cap.groupby(["Date"])["MarketCapitalization"].rank(method="first")-1
standard_mkt_cap["is_not_small_cap"] = standard_mkt_cap.groupby(["Date"])["mkt_cap_rank"].apply(lambda x: ((x/x.max())>=0.3).astype(int))
standard_mkt_cap["above_median_small_cap_rank"]=standard_mkt_cap.groupby(["Date"]).apply(lambda x: x["Rank"]>x[x["is_not_small_cap"]==0]["Rank"].median()).astype(int).values
standard_mkt_cap.loc[(standard_mkt_cap["above_median_small_cap_rank"]==1)&(standard_mkt_cap["is_not_small_cap"]==0),["is_not_small_cap"]]=3
standard_mkt_cap_small = standard_mkt_cap.sort_values(["Date","is_not_small_cap"]).reset_index(drop=True)
standard_mkt_cap_small["Rank"]=standard_mkt_cap_small.groupby(["Date"])["Rank"].transform(lambda x: np.linspace(x.min(),x.max(),x.count()))
standard_mkt_cap_small = standard_mkt_cap_small.set_index(["Date","SecuritiesCode"])
standard_mkt_cap_small["Rank"] = standard_mkt_cap_small["Rank"].astype("int")


In [11]:
standard_mkt_cap_small

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,cummulative_adjustment_factor,adjusted_close,daily_return,Previous Daily Return,Rank,MarketCapitalization,mkt_cap_rank,is_not_small_cap,above_median_small_cap_rank
Date,SecuritiesCode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-12-06,6378,949.0,962.0,914.0,930.0,759700,1.0,,False,0.027689,1.000000,930.0,-0.020021,-0.079612,0,1.744820e+10,141.0,0,0
2021-12-06,7065,2605.0,2605.0,2504.0,2545.0,12400,1.0,,False,0.006255,1.000000,2545.0,-0.023033,-0.047184,1,2.089648e+10,275.0,0,0
2021-12-06,4978,228.0,231.0,221.0,221.0,2061400,1.0,,False,0.004464,1.000000,221.0,-0.030702,-0.046025,2,1.918279e+10,192.0,0,0
2021-12-06,1930,1100.0,1105.0,1075.0,1095.0,13500,1.0,,False,-0.017548,0.833333,912.5,-0.170455,-0.033913,3,2.279760e+10,334.0,0,0
2021-12-06,4026,2600.0,2673.0,2505.0,2641.0,67600,1.0,,False,0.027362,1.000000,2641.0,0.015769,-0.031950,4,2.204664e+10,312.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-28,4310,1958.0,1969.0,1898.0,1961.0,26000,1.0,,False,0.022200,1.000000,1961.0,0.001532,0.067517,1993,1.649947e+10,98.0,3,1
2022-04-28,9932,2123.0,2192.0,2123.0,2182.0,12000,1.0,,False,0.035829,1.000000,2182.0,0.027791,0.070717,1994,2.770015e+10,484.0,3,1
2022-04-28,7726,2258.0,2348.0,2156.0,2160.0,102100,1.0,,False,0.022449,1.000000,2160.0,-0.043401,0.074519,1995,1.652092e+10,99.0,3,1
2022-04-28,6848,920.0,940.0,896.0,902.0,23400,1.0,,False,0.008484,1.000000,902.0,-0.019565,0.079545,1996,1.759435e+10,143.0,3,1


In [10]:
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    sample_prediction["Rank"]=sample_prediction.apply(lambda x: standard_mkt_cap_small.loc[(x["Date"],x["SecuritiesCode"])]["Rank"] , axis=1)
    env.predict(sample_prediction)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
