In [None]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
%matplotlib inline

%load_ext watermark
%autoreload 2
%config InlineBackend.figure_format='retina'

In [None]:
%watermark

In [None]:
import pandas as pd
import numpy as np 
import plotly
import plotly.express as px
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')

In [None]:
import plotly.offline
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [None]:
# conda install darts
import darts

In [None]:
from pathlib import Path
import pandas as pd
data_path = Path.cwd().parent / "data" 
df_m6 = pd.read_csv(data_path / "template/M6_Universe.csv", index_col=0)
df_m6.head(5)

In [None]:
%%time 

from tqdm.notebook import tqdm
from datetime import datetime
from src.utils import get_ticker_historical_data
import os

directory = './tickers'
if not os.path.exists(directory):
    os.makedirs(directory)

tickers = df_m6["symbol"].to_list()
tickers_data = dict()
from_date = pd.to_datetime("2000-01-01")

to_date = pd.Timestamp.today()
to_date.tz_localize(tz='Europe/Moscow').tz_convert(tz='America/New_York')
to_date.replace(hour=0, minute=0, second=0, microsecond=0)

# to_date = pd.to_datetime("2022-01-30")
interval = '1d'

for ticker in tqdm(tickers[:]): 
    #print(f"Ticker: {ticker}")
    data = get_ticker_historical_data(ticker=ticker,
                                      from_date=from_date,
                                      to_date=to_date,
                                      interval=interval
                                      )
    tickers_data[ticker] = data
    data.reset_index().to_csv(os.path.join(directory,f'{ticker}_{interval}.csv'))

In [None]:
df = tickers_data['AVB'].copy()
# add moving averages to df
df['MA20'] = df['Adj Close'].rolling(window=20).mean()
df['MA50'] = df['Adj Close'].rolling(window=50).mean()
df.dropna(inplace=True, axis=0)

# !pip install ta -q 
from ta.trend import MACD
# MACD
macd = MACD(close=df['Adj Close'], 
            window_slow=50,
            window_fast=20, 
            window_sign=20)

df = df.iloc[-500:,]

In [None]:
# based on https://python.plainenglish.io/a-simple-guide-to-plotly-for-plotting-financial-chart-54986c996682
fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                    vertical_spacing=0.01, 
                    row_heights=[0.5,0.2,0.2])

#fig = go.Figure()
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)

# add OHLC trace
fig.add_trace(go.Candlestick(x=df.index,
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'], 
                             showlegend=False))

fig.add_trace(go.Scatter(x=df.index, 
                         y=df['MA50'], 
                         opacity=0.7, 
                         line=dict(color='blue', width=2), 
                         name='MA 50'))
fig.add_trace(go.Scatter(x=df.index, 
                         y=df['MA20'], 
                         opacity=0.7, 
                         line=dict(color='orange', width=2), 
                         name='MA 20'))



# Plot volume trace on 2nd row
colors = ['green' if row['Open'] - row['Close'] >= 0 
          else 'red' for index, row in df.iterrows()]
fig.add_trace(go.Bar(x=df.index, 
                     y=df['Volume'],
                     marker_color=colors
                    ), row=2, col=1)


# Plot MACD trace on 3rd row
colors = ['green' if val >= 0 
          else 'red' for val in macd.macd_diff()]
fig.add_trace(go.Bar(x=df.index, 
                     y=macd.macd_diff(),
                     marker_color=colors,
                     opacity=0.7,
                    ), row=3, col=1)

fig.add_trace(go.Scatter(x=df.index,
                         y=macd.macd(),
                         line=dict(color='black', width=2)
                        ), row=3, col=1)

fig.add_trace(go.Scatter(x=df.index,
                         y=macd.macd_signal(),
                         line=dict(color='blue', width=1)
                        ), row=3, col=1)



# remove rangeslider
fig.update_layout(xaxis_rangeslider_visible=False)

# add chart title 
fig.update_layout(title="AAPL")

# fig.update_layout(
#     title="Plot Title",
#     xaxis_title="X Axis Title",
#     yaxis_title="Y Axis Title",
#     legend_title="Legend Title",
#     font=dict(
#         family="Courier New, monospace",
#         size=18,
#         color="RebeccaPurple"
#     )
# )


# removing all empty dates
# build complete timeline from start date to end date
dt_all = pd.date_range(start=df.index[0],end=df.index[-1])
# retrieve the dates that ARE in the original datset
dt_obs = [d.strftime("%Y-%m-%d") for d in pd.to_datetime(df.index)]
# define dates with missing values
dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in dt_obs]
fig.update_xaxes(rangebreaks=[dict(values=dt_breaks)])

# removing white space
fig.update_layout(margin=go.layout.Margin(
        l=40, #left margin
        r=40, #right margin
        b=40, #bottom margin
        t=40  #top margin
    ))

# update y-axis label
fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1)
fig.update_yaxes(title_text="MACD", showgrid=False, row=3, col=1)

fig.show()

In [None]:
df = tickers_data['AEP'].copy()
# if 'Close' in df.columns:
#     df.drop('Close', axis=1, inplace=True)
# # df.columns = [x.lower() for x in df.columns]
# df['Pct_change'] = df['Adj Close'].pct_change()
# df['Cumsum_Pct_change'] = (df['Pct_change']).cumprod()
# df['Realized_volatility'] = df['Pct_change'].rolling(3).std()
# # df_month = df.copy()

# df_month = df_month.resample('M').agg({'Open': np.mean,
#                                        'High': np.mean, 
#                                        'Close': np.mean,
#                                        'Adj Close': np.mean,
#                                        'Volume':np.mean
#                                        })
# df_month['monthly_returns'] = df_month['Adj Close'].pct_change()
# df_month
df.head()

In [None]:
# import pandas_ta as ta
# df.ta.indicators()
# df.ta.log_return(cumulative=True, append=True)
# df.ta.percent_return(cumulative=True, append=True)
# df.ta.sma(length=50, append=True)
# df.ta.sma(length=?20, append=True)
# df.ta.strategy("Momentum") 
# df.ta.strategy(fast=10, slow=50, verbose=True)

In [None]:
# import plotly.express as px

# fig = px.line(df, x=df.index, y="cumsum_pct_change", 
#               title='Cumulative returns', #text='Date'
#              )
# fig.update_traces(textposition="bottom right")
# fig.show()
# df['Pct_change'] = df['Adj Close'].pct_change()
# df['Cumsum_Pct_change'] = (df['Pct_change']).cumprod()
# df['Realized_volatility'] = df['Pct_change'].rolling(3).std()

In [None]:
from strategy import SMAStrategy, EMAStrategy, TestStrategy

sma = TestStrategy(strategy=SMAStrategy(),
                   short_window=50, 
                   long_window=200,
                   close_name='Adj Close',
                   )

df = tickers_data['AVB'].copy()
df = sma.run(data=df)

In [None]:
df.head(1)

In [None]:
# Calculate the cumulative daily returns in percentage 
df['cum_return_pct']=(((1 + df['Adj Close'].pct_change(periods=1)).cumprod() - 1))*100

fig = px.line(df, x=df.index,
              y='cum_return_pct', #color='ticker',
              title='Performance - Daily Cumulative Returns',
              labels={'cum_return_pct':'daily cumulative returns (%)', })
fig.show()

In [None]:
df['ma_return'].iplot(kind="hist")

In [None]:
# Calculate the cumulative daily returns in percentage 
df['cum_logg_return_pct']=np.exp((np.log(1 + df['Adj Close'].pct_change(periods=1))).cumsum())-1

fig = px.line(df, x=df.index,
              y='cum_logg_return_pct', #color='ticker',
              title='Performance - Daily Cumulative Returns',
              labels={'cum_return_pct':'daily cumulative returns (%)', })
fig.show()

# log_ret = np.log(1+simple_ret)
#     np.exp(log_ret.cumsum()[-1]) -1

In [None]:
df['cum_return_pct_roll']=(((1 + df['Adj Close'].rolling(1).mean().pct_change(periods=1)).cumprod() - 1))*100
df[['cum_return_pct_roll','cum_return_pct']].iplot()

In [None]:
from reduce_memory import ReduceMemoryTransformer
from nyse_holidays import NYSECalendar

# Pipeline: 
"""
A. Make regression for 50 tickets (stocks + ETF groups) for 19 days
    - global DL model on all series and datetime covariates 
    - emsemble model per series with finatial and datetime covariates 
    - standalone lightgbm model per series with finacial and datetime covariates 
    - hyperparameter optimization
B. Make covariance matrix from the forecast residuals
C. Create random forecast variable and sample 100 times
D. Optimize portfolio to minimize risks 
E. Train model on residuals 

""" 


In [None]:

probs = np.array([[0.0, 0.2, 0.3, 0.4, 0.1], 
                  [0.0, 0.2, 0.3, 0.4, 0.1],
                  [0.0, 0.2, 0.3, 0.4, 0.1]]
                )
outco = np.array([[0, 0, 0, 1, 0], 
                  [0, 0, 0, 1, 0],
                  [0, 0, 0, 1, 0]]
                )
portfolio_rps(probs, outco)

rps(probs=np.array([0.0, 0.2, 0.3, 0.4, 0.1]),
    outcome=np.array([0, 0, 0, 1, 0]))

from sklearn.metrics import make_scorer
rps_score = make_scorer(rps, greater_is_better=False)


In [None]:
bins=[-300, -11, -5, 5, 11, 300]
group_names = ['strong sell', 'sell', 'hold', 'buy', 'strong buy']
stocks['short_result'] = pd.cut(stocks['short_result'], bins=bins, labels=group_names, ordered=False)
stocks['short_result'].unique()
pd.qcut(range(5), q=[0, .25, .5, .75, 1.], labels=group_names)

In [None]:
def model_building(df_data, asset_id):
    print(f"Building asset {asset_id}")
    data_set = df_data[df_data['Asset_ID'] == asset_id]
    df = get_features(data_set)
    df = df.replace([np.inf, -np.inf], np.nan).dropna(how="any")
    
    df = reduce_memory_usage(df)
    
    X = df.drop(['Target'], axis=1)
    y = df["Target"]
    
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=24, shuffle=False)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_valid = scaler.transform(X_valid)
    
    model = LGBMRegressor(
        n_estimators=1500, 
        num_leaves=700,
        objective="regression",
        metric="rmse",
        boosting_type="gbdt",
        learning_rate=0.01,
        random_state=24,
        verbose=0,
        force_col_wise=True,
    )
    
    model.fit(X_train, y_train)
        
    return X_train, y_train, model

In [None]:
%%time
Xs = {}
ys = {}
models = {}
print('Training Starting...')

for asset, asset_name in zip(asset_id["Asset_ID"], asset_id["Asset_Name"]):
    X, y, model = model_building(data, asset)
    Xs[asset], ys[asset], models[asset] = X, y, model
print('Training Completed !!!')