# Simple moving average

Long if price above moving average
Short if price below moving average

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_colwidth', None)
from decimal import ROUND_HALF_UP, Decimal
from statsmodels.api import OLS
import random
import statsmodels.api as sm
from ta.trend import sma_indicator,wma_indicator,ema_indicator
from tqdm.auto import tqdm
tqdm.pandas()

In [2]:
train_stock_prices = pd.read_csv('ds/train_files/stock_prices.csv')
supplemental_stock_prices = pd.read_csv('ds/supplemental_files/stock_prices.csv')
data_stock_prices = pd.concat([train_stock_prices,supplemental_stock_prices],ignore_index=True)

In [3]:
def calc_adjusted_close(df):
    df = df.sort_values("Date",ascending=False)
    df.loc[:,"cummulative_adjustment_factor"] = df["AdjustmentFactor"].cumprod()
    df.loc[:,"adjusted_close"] = (df["cummulative_adjustment_factor"]*df["Close"]).map(lambda x: float(Decimal(str(x)).quantize(Decimal("0.1"),rounding=ROUND_HALF_UP)))
    df = df.sort_values("Date")
    df.loc[df["adjusted_close"]==0,"adjusted_close"] = np.nan
    df.loc[:,"adjusted_close"] = df.loc[:,"adjusted_close"].ffill()
    return df

In [18]:

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        #Target is the rate of change 
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

In [29]:
def hull(close,window):
    wma_1 = wma_indicator(close,int(round(window/2)))*2
    wma_2 = wma_1-wma_indicator(close,window)
    hull = wma_indicator(wma_2,int(round(np.sqrt(window))))
    hull_diff = close - hull
    return hull_diff

In [55]:
def create_features(df,window,tech):
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.drop(["RowId"],axis=1)
    df = df[df["Date"]!="2020-10-01"]
    df = df.groupby("SecuritiesCode").apply(calc_adjusted_close).reset_index(drop=True).sort_values(["Date","SecuritiesCode"]).reset_index(drop=True)
    if(tech=="SMA"):
        df["SMA"] = df.groupby(["SecuritiesCode"]).apply(lambda x: sma_indicator(x["adjusted_close"],window)).reset_index(0,drop=True)
        df["SMA_diff"] = df["adjusted_close"] - df["SMA"]
        df = df.dropna(subset=["SMA_diff"])
        df["Rank"]=df.groupby('Date')["SMA_diff"].rank(method='first',ascending=False)-1
    elif(tech=="EMA"):
        df["EMA"] = df.groupby(["SecuritiesCode"]).apply(lambda x: ema_indicator(x["adjusted_close"],window)).reset_index(0,drop=True)
        df["EMA_diff"] = df["adjusted_close"] - df["EMA"]
        df = df.dropna(subset = ["EMA_diff"])
        df["Rank"]=df.groupby('Date')["EMA_diff"].rank(method='first', ascending=False)-1
    elif (tech=='HULL'):
        df["HULL_diff"] = df.groupby(["SecuritiesCode"]).progress_apply(lambda x: hull(x["adjusted_close"],window)).reset_index(0,drop=True)
        df = df.dropna(subset=["HULL_diff"])
        df["Rank"]=df.groupby('Date')["HULL_diff"].rank(method='first',ascending = False)-1
    else:
        df["WMA"] = df.groupby(["SecuritiesCode"]).progress_apply(lambda x: wma_indicator(x["adjusted_close"],window)).reset_index(0,drop=True)
        df["WMA_diff"] = df["adjusted_close"] - df["WMA"]
        df = df.dropna(subset=["WMA_diff"])
        df["Rank"]=df.groupby('Date')["WMA_diff"].rank(method='first',ascending = False)-1
    return df

In [14]:
for i in range(20):
    test = create_features(train_stock_prices,i,"SMA")
    print(i,calc_spread_return_sharpe(test))

0 Series([], dtype: float64)
1 0.00572789399375571
2 -0.030196665384828923
3 -0.0553855609890768
4 -0.05759953516355983
5 -0.05767727460909306
6 -0.05312975851327651
7 -0.05186208572959556
8 -0.047825038046860134
9 -0.046824450219794075
10 -0.0480595072853108
11 -0.04963878060960106
12 -0.04770506709934199
13 -0.04816097915688667
14 -0.0487736225430018
15 -0.04717100045852948
16 -0.048455801273579895
17 -0.049425677418197214
18 -0.04856500791980775
19 -0.04677526973561797


In [17]:
for i in range(2,20):
    test = create_features(train_stock_prices,i,"EMA")
    print(i,calc_spread_return_sharpe(test))

2 -0.05063871582275966
3 -0.05777050099420525
4 -0.05657765463508878
5 -0.05517163000371047
6 -0.05535035994034471
7 -0.05516567411061257
8 -0.05457629432399737
9 -0.055436326829603
10 -0.05408081905927273
11 -0.055239995543182485
12 -0.05505330750469431
13 -0.056848741427761654
14 -0.05970346783523429
15 -0.06019483094026366
16 -0.05934141945999082
17 -0.0611412750591002
18 -0.060785927603553044
19 -0.06094792179209723


In [56]:
for i in range(2,20):
    test = create_features(train_stock_prices,i,"WMA")
    print(i,calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

2 -0.03001058303139861


  0%|          | 0/2000 [00:00<?, ?it/s]

3 -0.049035592135028394


  0%|          | 0/2000 [00:00<?, ?it/s]

4 -0.05475626055456376


  0%|          | 0/2000 [00:00<?, ?it/s]

5 -0.06045066964195395


  0%|          | 0/2000 [00:00<?, ?it/s]

6 -0.05816424488924153


  0%|          | 0/2000 [00:00<?, ?it/s]

7 -0.056280408380574985


  0%|          | 0/2000 [00:00<?, ?it/s]

8 -0.055782028702700166


  0%|          | 0/2000 [00:00<?, ?it/s]

9 -0.05671564896614801


  0%|          | 0/2000 [00:00<?, ?it/s]

10 -0.050260047920571004


  0%|          | 0/2000 [00:00<?, ?it/s]

11 -0.05085870560426859


  0%|          | 0/2000 [00:00<?, ?it/s]

12 -0.051631927904158444


  0%|          | 0/2000 [00:00<?, ?it/s]

13 -0.04924111270221629


  0%|          | 0/2000 [00:00<?, ?it/s]

14 -0.05130891431327744


  0%|          | 0/2000 [00:00<?, ?it/s]

15 -0.05205851723937231


  0%|          | 0/2000 [00:00<?, ?it/s]

16 -0.0509866047116529


  0%|          | 0/2000 [00:00<?, ?it/s]

17 -0.050931574498458115


  0%|          | 0/2000 [00:00<?, ?it/s]

18 -0.05123787186214216


  0%|          | 0/2000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [58]:
    test = create_features(train_stock_prices,2,"HULL")
    print(calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

0.030699504226180525


In [60]:
    test = create_features(train_stock_prices,5,"HULL")
    print(calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

0.011570752111339873


In [62]:
    test = create_features(train_stock_prices,20,"HULL")
    print(calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

-0.03831473871998391


In [65]:
    test = create_features(train_stock_prices,10,"HULL")
    print(calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

-0.04551828059338785


In [66]:
    test = create_features(train_stock_prices,50,"HULL")
    print(calc_spread_return_sharpe(test))

  0%|          | 0/2000 [00:00<?, ?it/s]

-0.033941830443256917


In [67]:
    test = create_features(train_stock_prices,50,"EMA")
    print(calc_spread_return_sharpe(test))

-0.057013966211592604
