In [None]:
# pip install watermark lightgbm plotly cufflinks numpy pandas optuna torch pandas_ta gluonts pandas_datareader

In [None]:
# pip install -U git+https://github.com/unit8co/darts.git@master

In [None]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
%matplotlib inline
%reload_ext watermark
%config InlineBackend.figure_format='retina'

In [None]:
%watermark

In [None]:
# conda install -c conda-forge 'u8darts'

### Library imports

In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import darts
import pandas as pd
import numpy as np 
from datetime import datetime
import numpy as np

import plotly
import plotly.express as px
import plotly.graph_objects as go

# pip install matplotlib==3.1.2
import matplotlib
import matplotlib.pyplot as plt

import plotly.offline
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [None]:
# pip install -U "u8darts[torch]"

In [None]:
darts.__version__

### Reproducibility

In [None]:
import random
random.seed(0)

import numpy as np
np.random.seed(0)

import torch
torch.manual_seed(0)

In [None]:
df_m6 = pd.read_csv("M6_Universe.csv", index_col=0)
df_m6.head(5)

In [None]:
stoks = df_m6[df_m6["class"]=="Stock"]["symbol"].values
etfs = df_m6[df_m6["class"]=="ETF"]["symbol"].values

In [None]:
SAMPLE_SIZE = 100
FORECAST_HORIZON = 20 #days
PERIODS = 20

In [None]:
%%time 

from tqdm.notebook import tqdm
from utils import get_ticker_historical_data
import pandas_datareader as pdr

directory = './tickers'
save = False

if not os.path.exists(directory):
    os.makedirs(directory)

tickers = df_m6["symbol"].to_list()
tickers_data = dict()
from_date = pd.to_datetime("2020-01-01")

to_date = pd.Timestamp.today()
to_date.tz_localize(tz='Europe/Moscow').tz_convert(tz='America/New_York')
to_date.replace(hour=0, minute=0, second=0, microsecond=0)

#to_date = pd.to_datetime("2022-02-06")
interval = '1d'

for ticker in tqdm(tickers[:SAMPLE_SIZE]): 
#     data = get_ticker_historical_data(ticker=ticker,
#                                       from_date=from_date,
#                                       to_date=to_date,
#                                       interval=interval
#                                       )
    # This returns a data frame of scraped stock data from yahoo
    data = pdr.DataReader(ticker, 'yahoo', from_date, to_date)
    tickers_data[ticker] = data
    if save:
        data.reset_index().to_csv(os.path.join(directory,f'{ticker}_{interval}.csv'))

In [None]:
def calculate_pct_returns(x: pd.Series, periods: int) -> pd.Series:
    return (1 + x.pct_change(periods=periods))

def calculate_cum_pct_returns(x: pd.Series, periods: int) -> pd.Series:
    return (((1 + x.pct_change(periods=periods)).cumprod() - 1))*100

def calculate_cum_log_returns(x: pd.Series, periods: int) -> pd.Series:
    return (np.log(1 + x.pct_change(periods=periods)).cumsum())

def calculate_log_returns(x: pd.Series, periods: int) -> pd.Series:
    return np.log(1 + x.pct_change(periods=periods))

df = pd.DataFrame.from_dict({k: v['Adj Close'] for k, v in tickers_data.items()})
df_stock_cum_log_returns = df.apply(calculate_cum_log_returns, periods=PERIODS, axis=0)
df_stock_cum_prt_returns = df.apply(calculate_cum_pct_returns, periods=PERIODS, axis=0)
df_stock_log_returns = df.apply(calculate_log_returns, periods=PERIODS, axis=0)
df_stock_prc_returns = df.apply(calculate_pct_returns, periods=PERIODS, axis=0)

#### Predicting Ranks

In [None]:
df_stock_returns_quantiles = df_stock_prc_returns.dropna().apply(lambda x: (x.rank(ascending=True) // 10 +1).clip(upper=5), axis=0).astype(int)
df_stock_returns = df_stock_returns_quantiles.copy()
labels = ['Rank 1','Rank 2','Rank 3','Rank 4','Rank 5']

In [None]:
from metrics import portfolio_rps
min_values = list()
test_start = '2022-01-01'
for period in range(1*PERIODS,8*PERIODS+1):
    df_rank_predicts = list()
    df_rank_true = list()
    for column in df_stock_returns.columns:
        df_true = pd.get_dummies(df_stock_returns[column])
        df_pred = df_true.rolling(f'{period}D').mean().shift(FORECAST_HORIZON)
        df_rank_predicts.append(df_pred)
        df_rank_true.append(df_true)

    df_pred = pd.concat(df_rank_predicts, axis=1).dropna().loc[test_start:,:]
    df_true = pd.concat(df_rank_true, axis=1).loc[test_start:,:]
    df_pred.columns = pd.MultiIndex.from_product([df_stock_returns.columns,labels])
    df_true.columns = pd.MultiIndex.from_product([df_stock_returns.columns,labels])
    
    rps_df = pd.DataFrame(columns=['RPS'], data=np.nan, index=df_pred.index)
    for idx, df in df_pred.iterrows():
        probs = df_pred.loc[idx,:].unstack().values
        trues = df_true.loc[idx,:].unstack().values
        rps_df.loc[idx,'RPS'] = portfolio_rps(probs=probs,outcome=trues)
    mean_value = rps_df['RPS'].mean()#.round(4)
    min_values.append(mean_value)
    print(f"RPS {period} from {rps_df.index.date[0]} to {rps_df.index.date[-1]} is {mean_value}")

In [None]:
pd.DataFrame(min_values).plot()
indx = pd.DataFrame(min_values).idxmin().values[0]
print(f"Min index: {indx}, min value: {pd.DataFrame(min_values).min()}")
plt.show()

In [None]:
from metrics import portfolio_rps
min_values = list()
test_start = '2022-01-01'
for period in range(1*PERIODS,8*PERIODS+1):
    df_rank_predicts = list()
    df_rank_true = list()
    for column in df_stock_returns.columns:
        df_true = pd.get_dummies(df_stock_returns[column])
        df_pred = df_true.shift(period)
        df_rank_predicts.append(df_pred)
        df_rank_true.append(df_true)

    df_pred = pd.concat(df_rank_predicts, axis=1).dropna().loc[test_start:,:]
    df_true = pd.concat(df_rank_true, axis=1).loc[test_start:,:]
    df_pred.columns = pd.MultiIndex.from_product([df_stock_returns.columns,labels])
    df_true.columns = pd.MultiIndex.from_product([df_stock_returns.columns,labels])
    
    rps_df = pd.DataFrame(columns=['RPS'], data=np.nan, index=df_pred.index)
    for idx, df in df_pred.iterrows():
        probs = df_pred.loc[idx,:].unstack().values
        trues = df_true.loc[idx,:].unstack().values
        rps_df.loc[idx,'RPS'] = portfolio_rps(probs=probs,outcome=trues)
    mean_value = rps_df['RPS'].mean()#.round(4)
    min_values.append(mean_value)
    print(f"RPS {period} from {rps_df.index.date[0]} to {rps_df.index.date[-1]} is {mean_value}")

In [None]:
pd.DataFrame(min_values).plot()
indx = pd.DataFrame(min_values).idxmin().values[0]
print(f"Min index: {indx}, min value: {pd.DataFrame(min_values).min()}")
plt.show()

In [None]:
# bins = list(i-0.5 for i in range(1,7))
# df = backtests[2].pd_dataframe()
# df['cuts'] = pd.cut(df.values.reshape(-1), bins=bins, labels=labels, retbins=False)
# df = pd.get_dummies(df['cuts']).rolling(f'{forecast_horizon}D').mean()

In [None]:
# for ticket in df_stock_returns_quantiles.columns:
#     df_stock_returns_quantiles[[ticket]].plot()#(kind='hist')
#     plt.show()

In [None]:
# !pip install joypy
import joypy
# Draw Plot
plt.figure(figsize=(16,10), dpi= 80)
fig, axes = joypy.joyplot(probs, column=labels, ylim='own', figsize=(6,4))

# Decoration
plt.title('Joy Plot', fontsize=22)
plt.show()