In [None]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
# %matplotlib inline
%reload_ext watermark
%autoreload 2
%config InlineBackend.figure_format='retina'

In [None]:
%watermark

### Library imports

In [None]:
import os

import darts
import pandas as pd
import numpy as np 
from datetime import datetime
import numpy as np

import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
# pio.renderers.default = "notebook"
pio.templates.default = "simple_white"

import plotly.offline
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

import warnings
warnings.filterwarnings('ignore')

### Reproducibility

In [None]:
import random
random.seed(0)

import numpy as np
np.random.seed(0)

In [None]:
cd /Users/alekseimashlakov/ProgrammingProjects/time_series_forecasting/M6 competition/

In [None]:
from pathlib import Path
import pandas as pd
data_path = Path.cwd() / "data" 
df_m6 = pd.read_csv(data_path / "template/M6_Universe.csv", index_col=0)
# df_m6 = pd.read_csv("M6_Universe.csv", index_col=0)
df_m6.head(5)

In [None]:
from sklearn.preprocessing import OneHotEncoder

df_m6['GICS_sector/ETF_type'].value_counts()

In [None]:
df_m6_stocks = df_m6[df_m6["class"]=="Stock"]
df_m6_etf = df_m6[df_m6["class"]=="ETF"]

In [None]:
# %%time 

from tqdm.notebook import tqdm
from src.io import get_ticker_historical_data

directory = './tickers'
save = False

# if not os.path.exists(directory):
#     os.makedirs(directory)

tickers = df_m6["symbol"].str.replace("FB", "META").to_list()

tickers_data = dict()
from_date = pd.to_datetime("2010-01-01")

to_date = pd.Timestamp.today()
to_date.tz_localize(tz='Europe/Moscow').tz_convert(tz='America/New_York')
to_date.replace(hour=0, minute=0, second=0, microsecond=0)

# to_date = pd.to_datetime("2022-01-30")
interval = '1d'

for ticker in tqdm(tickers[:]):
    try:
        data = get_ticker_historical_data(ticker=ticker,
                                        from_date=from_date,
                                        to_date=to_date,
                                        interval=interval
                                        )
        tickers_data[ticker] = data
        if save:
            data.reset_index().to_csv(os.path.join(directory,f'{ticker}_{interval}.csv'))
    except:
        print(f"Error with {ticker}")
        continue 

In [None]:
# iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip')
# iris

In [None]:
# pip install -U ppscore

In [None]:
periods = 20

def calculate_pct_returns(x: pd.Series) -> pd.Series:
    return (1 + x.pct_change(periods=periods))

def calculate_cum_pct_returns(x: pd.Series) -> pd.Series:
    return (((1 + x.pct_change(periods=periods)).cumprod() - 1))*100

def calculate_cum_log_returns(x: pd.Series) -> pd.Series:
    return (np.log(1 + x.pct_change(periods=periods)).cumsum())

def calculate_log_returns(x: pd.Series) -> pd.Series:
    return np.log(1 + x.pct_change(periods=periods))


df = pd.DataFrame.from_dict({k: v['Adj Close'] for k, v in tickers_data.items()})
df_stock_cum_log_returns = df.apply(calculate_cum_log_returns, axis=0)
df_stock_cum_prt_returns = df.apply(calculate_cum_pct_returns, axis=0)
df_stock_log_returns = df.apply(calculate_log_returns, axis=0)
df_stock_prc_returns = df.apply(calculate_pct_returns, axis=0)

In [None]:
df_stock_returns = df_stock_prc_returns

In [None]:
import matplotlib.pyplot as plt 
for key in tickers_data.keys():
    df_stock_prc_returns[key].plot(kind='hist', bins=100)
    plt.xlim([0.5, 1.5])
    plt.show()

In [None]:
# automatically box-cox transform a time series
from scipy.stats import boxcox
df_stock_prc_returns['ABBV'], lambda_val = boxcox(df_stock_prc_returns['ABBV'])
#scipy.special.inv_boxcox(y, lmbda)
print(f'best lambda: {lambda_val}')

In [None]:
# plt.plot(df_stock_prc_returns_boxcox)
# plt.plot(df_stock_prc_returns['CARR'].dropna().values-1)

In [None]:
df_stock_returns = df_stock_prc_returns.copy()

In [None]:
from numpy import polyfit
def fit(X, y, degree=3):
    coef = polyfit(X, y, degree)
    trendpoly = np.poly1d(coef)
    return trendpoly(X)

def get_season(s, yearly_periods=4, degree=3):
    X = [i%(365/4) for i in range(0, len(s))]
    seasonal = fit(X, s.values, degree)
    return pd.Series(data=seasonal, index=s.index)

def get_trend(s, degree=3):
    X = list(range(len(s)))
    trend = fit(X, s.values, degree)
    return pd.Series(data=trend, index=s.index)

import seaborn as sns
stock_ticket = "ABBV"
plt.figure(figsize=(12, 6))
stock = df_stock_returns.loc[:,[stock_ticket]].copy().dropna()
stock['trend'] = get_trend(stock[stock_ticket], degree=1)
stock['season'] = get_season(stock[stock_ticket] - stock['trend'], yearly_periods=12)
stock['residuals'] = stock[stock_ticket] - stock['trend'] - stock['season']
sns.lineplot(data=stock[[stock_ticket, 'season', 'trend', 'residuals']])
plt.ylabel('Stock change');

# scaled_series[0].pd_dataframe()}

In [None]:
import statsmodels.api as sm
fig, axs = plt.subplots(2)
fig.tight_layout()
sm.graphics.tsa.plot_pacf(stock['ABBV'], lags=50, ax=axs[0])
sm.graphics.tsa.plot_acf(stock['ABBV'], lags=50, ax=axs[1])

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df_stock_prc_returns[['ABBV']].dropna(), model='additive', period=52)
result.plot()

In [None]:
gauss_data = 5 * np.random.randn(100) + 50
gauss_data

In [None]:
import itertools
from plotly.express.colors import sample_colorscale
from plotly.subplots import make_subplots
from statsmodels.graphics.gofplots import qqplot

df = df_stock_returns.iloc[-100:]

x = np.linspace(0, 1, len(df.columns))
c = sample_colorscale('rainbow', list(x), colortype='rgb')


fig = go.Figure()

for i, column in enumerate(df.columns.to_list()):
    series = df[column]
    qqplot_data = qqplot(series, line='s').gca().lines
    fig.add_trace({
        'type': 'scatter',
        'x': qqplot_data[0].get_xdata(),
        'y': qqplot_data[0].get_ydata(),
        'mode': 'markers',
        'marker': {
            'color': c[i]
        },
        'legendgroup': column, 
        'name': column, 
        'showlegend': True
    })

    fig.add_trace({
        'type': 'scatter',
        'x': qqplot_data[1].get_xdata(),
        'y': qqplot_data[1].get_ydata(),
        'mode': 'lines',
        'line': {
            'color': c[i]
        },
        'legendgroup': column, 
        'name': column, 
        'showlegend': False

    })

fig['layout'].update({
    'title': 'Quantile-Quantile Plot',
    'xaxis': {
        'title': 'Theoritical Quantities',
        'zeroline': False
    },
    'yaxis': {
        'title': 'Sample Quantities'
    },
    'showlegend': False,
    'width': 800,
    'height': 700,
})


fig.iplot(fig, filename='normality-QQ')

In [None]:
from statsmodels.graphics.gofplots import qqplot
gauss_data = 5 * np.random.randn(100) + 50

qqplot_data = qqplot(gauss_data, line='s').gca().lines

fig = go.Figure()

fig.add_trace({
    'type': 'scatter',
    'x': qqplot_data[0].get_xdata(),
    'y': qqplot_data[0].get_ydata(),
    'mode': 'markers',
    'marker': {
        'color': '#19d3f3'
    }
})

fig.add_trace({
    'type': 'scatter',
    'x': qqplot_data[1].get_xdata(),
    'y': qqplot_data[1].get_ydata(),
    'mode': 'lines',
    'line': {
        'color': '#636efa'
    }

})


fig['layout'].update({
    'title': 'Quantile-Quantile Plot',
    'xaxis': {
        'title': 'Theoritical Quantities',
        'zeroline': False
    },
    'yaxis': {
        'title': 'Sample Quantities'
    },
    'showlegend': False,
    'width': 800,
    'height': 700,
})


fig.iplot(fig, filename='normality-QQ')

In [None]:
import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/iris-data.csv')
index_vals = df['class'].astype('category').cat.codes

fig = go.Figure(data=go.Splom(
                dimensions=[dict(label='sepal length',
                                 values=df['sepal length']),
                            dict(label='sepal width',
                                 values=df['sepal width']),
                            dict(label='petal length',
                                 values=df['petal length']),
                            dict(label='petal width',
                                 values=df['petal width'])],
                showupperhalf=False, # remove plots on diagonal
                text=df['class'],
                marker=dict(color=index_vals,
                            showscale=False, # colors encode categorical variables
                            line_color='white', line_width=0.5)
                ))


fig.update_layout(
    title='Iris Data set',
    width=600,
    height=600,
)

fig.show()

In [None]:
# conda install stumpy

In [None]:
import stumpy
import numpy as np

if __name__ == "__main__":
    your_time_series = np.random.rand(10000)
    window_size = 50  # Approximately, how many data points might be found in a pattern

    matrix_profile = stumpy.stump(df_stock_returns[["ABBV"]].dropna().values.reshape(-1), m=window_size)

In [None]:
from sklearn.preprocessing import FunctionTransformer

def sin_transformer(period):
	return FunctionTransformer(lambda x: np.sin(x / period * np.pi/ 2) * 360)

def cos_transformer(period):
	return FunctionTransformer(lambda x: np.cos(x / period * np.pi / 2) * 360)

test_df = df_stock_prc_returns.dropna().copy()
test_df["dayofweek"] = test_df.index.dayofweek
test_df["dayofyear"] = test_df.index.dayofyear
test_df["month"] = test_df.index.month

test_df["sin_dayofyear"] = sin_transformer(365).fit_transform(test_df["dayofyear"])
test_df["cos_dayofyear"] = cos_transformer(365).fit_transform(test_df["dayofyear"])
test_df["sin_month"] = sin_transformer(12).fit_transform(test_df["month"])
test_df["cos_month"] = cos_transformer(12).fit_transform(test_df["month"])
test_df = test_df[test_df.index.year == 2022]

In [None]:
df_stock_prc_returns.dropna().mean(axis=1).plot()

In [None]:
px.line(df_stock_prc_returns.dropna())

In [None]:
import plotly.graph_objects as go

import pandas as pd

i = 1
fig = go.Figure()
for column in df_stock_prc_returns.columns.to_list()[::-1]:
    fig.add_trace(go.Scatterpolar(
            r = test_df[column],
            theta = test_df['cos_dayofyear'],
            mode = 'lines',
            name = column,
            #line_color = 'peru'
    ))
    i+=1
# fig.add_trace(go.Scatterpolar(
#         r = df['x2'],
#         theta = df['y'],
#         mode = 'lines',
#         name = 'Cardioid',
#         line_color = 'darkviolet'
#     ))
# fig.add_trace(go.Scatterpolar(
#         r = df['x3'],
#         theta = df['y'],
#         mode = 'lines',
#         name = 'Hypercardioid',
#         line_color = 'deepskyblue'
#     ))


fig.update_layout(
    title = 'Stocks in 2022',
    showlegend = True
)

fig.show()

In [None]:
import plotly.graph_objects as go

#df_stock_returns.transform(lambda x: (np.exp(x)-1)*100).iplot()

fig = go.Figure(data=go.Heatmap(
        z=df_stock_prc_returns.iloc[:,:],
        y=df_stock_prc_returns.index[:],
        x=df_stock_prc_returns.columns,
        colorscale='Viridis'))

fig.update_layout(
    title='Log returns per stock/ETF',
    xaxis_nticks=100)

fig.show()

In [None]:
fig = go.Figure(data=[go.Surface(z=df_stock_returns[df_stock_returns<2.0].values)])

fig.update_layout(title='3d surface', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90),
                  scene = dict(
                    xaxis_title='Asset',
                    yaxis_title='Time index',
                    zaxis_title='Returns',
                    # yaxis = dict(
                    #     nticks=4,
                    #     ticktext= df_stock_returns.columns.to_list(),
                    #     tickvals= list(range(100))/100.0)
                        ),
)

fig.show()

In [None]:
df_stock_returns.transform(lambda x: (np.exp(x)-1)*100).iplot()

In [None]:
def calculate_na_per_column(df: pd.DataFrame) -> pd.DataFrame:
    percent_of_non_na = ((df.shape[0] - df.isna().sum().T) / df.shape[0]) * 100
    return 100 - percent_of_non_na 

missing = calculate_na_per_column(df=df_stock_returns)

In [None]:
import plotly.express as px

from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2, cols=1, row_heights=[0.8, 0.2], vertical_spacing=0.05, shared_xaxes=True)

fig.add_trace(
    go.Heatmap(z=df_stock_returns.isna().values*1,
    colorscale="Sunset",
    coloraxis="coloraxis1",
    colorbar=dict(
        y=.6,
        len=.7,
        thickness=20.0,
        #title="Legend",
        titleside="top",
        tickmode="array",
        tickvals=[0.0, 1.0],
        ticktext=["Present", "Missing"],
        ticks="outside"
    )),
    row=1, col=1
)

fig.add_trace(
    go.Bar(x=missing.index, y=missing.values, marker={'color': missing.values/100.0,
                                                      'coloraxis':"coloraxis1",
                                                      'colorscale': 'Sunset'}), #, color_continuous_scale=px.colors.sequential.Viridis),
    secondary_y=False,
    row=2, col=1, 
    
)
# color_continuous_scale=[(0, "red"), (0.5, "green"), (1, "blue")]

fig.update_layout(height=600, width=800, title_text="Missing values")
fig.update_yaxes(range=[0, 101], showline=False, showgrid=True, row=2, col=1)
fig.update_xaxes(tickangle=-90, showline=True, showgrid=True, row=2, col=1)
# fig.update_layout(coloraxis=dict(colorscale='RdBu'), showlegend=False)
fig.update_layout(coloraxis_colorbar=dict(
        y=.55,
        len=.9,
        thickness=20.0,
        #title="Legend",
        titleside="top",
        tickmode="array",
        tickvals=[0.0, 1.0],
        ticktext=["Present (%)", "Missing (%)"],
        ticks="outside"
))
# fig = px.imshow(df_stock_returns.T.isna(), 
#                 color_continuous_scale="Cividis", 
#                 origin='lower', 
#                 title="Missing values"
#                )
fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)

# 12 sets of normal distributed random data, with increasing mean and standard deviation
# data = (np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200) +
#             (np.arange(12) + 2 * np.random.random(12))[:, np.newaxis])

months = df_stock_returns.index.month.unique().to_list()
colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', len(months), colortype='rgb')
# fig = make_subplots(rows=100, cols=1, vertical_spacing=0.01, shared_xaxes=True)

i = 1
fig = go.Figure()
for column in df_stock_returns.columns.to_list()[::-1]:
    for j in range(len(months)):
        data_line = df_stock_returns.loc[df_stock_returns.index.month==months[j], column].dropna().T
        fig.add_trace(go.Violin(x=data_line, legendgroup=months[j], scalegroup=months[j], line_color=colors[j], name=f'{column}'))
    i+=1

fig.update_traces(orientation='h', side='positive', width=2, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_layout(violingap=0, violinmode='overlay') 
fig.update_layout(title='Ridgeline/Joy plot of returns per stock/ETF')
fig.show()
# TODO: group by day, month in 2022

In [None]:
cols = list(np.sort(df_stock_prc_returns.index.dayofweek.unique()))
cols

In [None]:
import plotly.express as px


df = df_stock_prc_returns.dropna()
df = (df
.groupby([df.index.month])
.agg({k: ['mean', 'var'] for k in df.columns})
.unstack(1)
.unstack(1)
.reset_index()
.rename(columns={'level_0': 'ticker', 'Date': 'Day of week'})
)
n_colors = 12
colors = px.colors.sample_colorscale("plasma", [n/(n_colors -1) for n in range(n_colors)])

df['Day of week'] = df['Day of week'].astype('category')

fig = px.scatter(df, x="mean", y="ticker", color="Day of week",
    size="var", size_max=45, log_x=False, color_discrete_sequence = colors)

fig.update_layout(legend=dict(
    orientation="v",
    yanchor="bottom",
    y=0.05,
    xanchor="right",
    x=1.05
),     yaxis_autorange='reversed',
)
fig.update_xaxes(showline=True, linewidth=0.01, linecolor='grey', gridcolor='grey')
fig.update_yaxes(showline=True, linewidth=0.01, linecolor='grey', gridcolor='grey')

fig.show()

In [None]:
# from pandas import DataFrame
# from pandas import Grouper

# groups = df_stock_returns.groupby(Grouper(freq='D'))
# years = DataFrame()
# for name, group in groups:
#     years[name.day] = group.values

# # hours = pd.unique(df['Hour'].values).ravel('K')
# hours = pd.unique(df['Hour'].values).ravel('K')
# days = pd.unique(df.index.dayofyear.values).ravel('K')
# z = years#np.random.poisson(size=(len(hours), len(days)))

# fig = go.Figure(data=go.Heatmap(
#                                 z=z,
#                                 x=days,
#                                 y=hours,
#                                 colorscale='YlGnBu'))

# fig.update_layout(
#     title='Load profile change with EV scenario 9',
#     xaxis_nticks=36,
#     yaxis_nticks=24)
# # py.offline.plot(fig, filename='./figures/HELEN/Suburban/EV2/Helen_suburban__load_profile__EV_9sc2.html')

# fig.show()

In [None]:

df = df_stock_returns

fig = go.Figure()

N = len(df.columns)     # Number of boxes

# generate an array of rainbow colors by fixing the saturation and lightness of the HSL
# representation of colour and marching around the hue.
# Plotly accepts any CSS color format, see e.g. http://www.w3schools.com/cssref/css_colors_legal.asp.
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 360, N)]


for i, column in enumerate(df.columns[:]):
    fig.add_trace(go.Box(y=df[column], name=column,  marker_color=c[i]))
    
# format the layout
# fig.update_layout(
#     xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
#     yaxis=dict(zeroline=False, gridcolor='white'),
#     paper_bgcolor='rgb(233,233,233)',
#     plot_bgcolor='rgb(233,233,233)',
# )
fig.show()

In [None]:
from statsmodels.tsa.stattools import acf, pacf
from typing import Callable
from functools import partial

def apply_to_dataframe(df: pd.DataFrame, func: Callable[..., pd.DataFrame], axis: int=0):
    """Compute full-sample column-wise autocorrelation for a DataFrame."""
    return df.apply(lambda col: func(col), axis=axis, result_type='expand')

df_acf = apply_to_dataframe(df_stock_returns.dropna(), func=partial(acf, nlags=50, fft=True, missing="drop"), axis=0)
df_pacf = apply_to_dataframe(df_stock_returns.dropna(), func=partial(pacf, nlags=50), axis=0)

In [None]:
# modified from https://community.plotly.com/t/plot-pacf-plot-acf-autocorrelation-plot-and-lag-plot/24108/3

from plotly.express.colors import sample_colorscale
x = np.linspace(0, 1, len(df_stock_returns.columns))
c = sample_colorscale('rainbow', list(x), colortype='rgb')
rgb_to_rgba = lambda x: "rgba" + x[3:-1] + ", 0.05)"
names = {'Plot 1':'Autocorrelation (ACF)', 'Plot 2':'Partial Autocorrelation (PACF)'}
fig = make_subplots(2, 1, shared_xaxes=True, shared_yaxes=False, subplot_titles=("Plot 1", "Plot 2",), vertical_spacing=0.05,)

for j, func in zip(range(1,3), [acf, pacf]):
    for i, column in enumerate(df_stock_returns.columns.to_list()):
        series = df_stock_returns[column]
        corr_array = func(series.dropna(), alpha=0.05)
        lower_y = corr_array[1][:,0] - corr_array[0]
        upper_y = corr_array[1][:,1] - corr_array[0]
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=corr_array[0], mode='markers', marker_color=c[i],
                        marker_size=12, name=column, row=j, col=1)
        [fig.add_scatter(x=(x,x), y=(0,corr_array[0][x]), mode='lines', line_color='#3f3f3f',line_width=0.1, name=column, row=j, col=1) 
                for x in range(len(corr_array[0]))]
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=upper_y, mode='lines', line_color=rgb_to_rgba(c[i]), name=column, row=j, col=1)
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=lower_y, mode='lines', fillcolor=rgb_to_rgba(c[i]), name=column,
                fill='tonexty', line_color=rgb_to_rgba(c[i]), row=j, col=1)
fig.update_traces(showlegend=False)
fig.update_xaxes(range=[-1,36])
fig.update_yaxes(zerolinecolor='#000000')
fig.update_layout(
    showlegend = False,
    width=800,
    height=800
)
fig.for_each_annotation(lambda a: a.update(text = names[a.text]))
fig.show()

In [None]:
names = {'Plot 1':'Autocorrelation (ACF)', 'Plot 2':'Partial Autocorrelation (PACF)'}
fig = make_subplots(2, 1, shared_xaxes=True, shared_yaxes=False, 
                    subplot_titles=("Plot 1", "Plot 2",), vertical_spacing=0.1,)

fig.add_trace(
    go.Heatmap(z=df_acf.T,
        y=df_acf.T.index,
        x=df_acf.T.columns,
    colorscale="Rainbow",
    coloraxis="coloraxis1",
    # colorbar=dict(
    #     y=.6,
    #     len=.7,
    #     thickness=20.0,
    #     #title="Legend",
    #     titleside="top",
    #     tickmode="array",
    #     tickvals=[0.0, 1.0],
    #     ticktext=["Present", "Missing"],
    #     ticks="outside"
    # )
),
    row=1, col=1
)

fig.add_trace(
    go.Heatmap(z=df_pacf.T,
            y=df_acf.T.index,
        x=df_acf.T.columns,
    colorscale="Rainbow",
    coloraxis="coloraxis1",
    # colorbar=dict(
    #     y=.6,
    #     len=.7,
    #     thickness=20.0,
    #     #title="Legend",
    #     titleside="top",
    #     tickmode="array",
    #     tickvals=[0.0, 1.0],
    #     ticktext=["Present", "Missing"],
    #     ticks="outside"
    # )
),
    row=2, col=1
)
# fig = px.imshow(df_acf.T, 
#                 color_continuous_scale="Cividis", 
#                 origin='upper', 
#                 title="Autocorrelation plot"
#                )
# update layout for xaxis tickmode as linear
# fig.update_layout(
#    yaxis = dict(
#       tickfont=dict(family='Helvetica', size=8, color='black')
#       #tickmode = 'linear',
#       #   tickmode = 'array',
#       #   tickvals = list(range(100)),
#       #   ticktext = df_acf.columns.to_list() # ['One', 'Three', 'Five', 'Seven', 'Nine', 'Eleven']
#    )
# )
fig.update_layout(
    showlegend = False,
    width=800,
    height=800
)
fig.for_each_annotation(lambda a: a.update(text = names[a.text]))

fig.show()

In [None]:
locs

In [None]:
import itertools
from plotly.express.colors import sample_colorscale
from plotly.subplots import make_subplots

df = df_stock_returns.iloc[-20:]

x = np.linspace(0, 1, len(df.columns))
c = sample_colorscale('rainbow', list(x), colortype='rgb')
locs = [i for i in itertools.product(range(1,4), repeat=2)]

fig = make_subplots(3, 3, shared_xaxes=True, shared_yaxes=True, 
                    subplot_titles=[f"Lag {i}" for i in range(1,10)], 
                    vertical_spacing=0.05, horizontal_spacing=0.05,)

for i, column in enumerate(df.columns.to_list()):
    series = df[column]  
    for lag in range(1, 10):
        lag_series = series.shift(lag)
        fig.add_scatter(x=lag_series.values[lag:], y=series.values[lag:], 
                        mode='markers', marker_color=c[i], 
                        legendgroup=column, 
                        name=column, 
                        marker_size=12, row=locs[lag-1][0], col=locs[lag-1][1],
                        showlegend=True if lag==1 else False)
fig.update_layout(
    title="Lag plot",
    width=800,
    height=800
)
fig.show()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
# result = seasonal_decompose(df_stock_prc_returns[['ABBV']].dropna(), model='additive', period=52)
df = df_stock_prc_returns.dropna()
df_tsa = apply_to_dataframe(df, func=partial(seasonal_decompose, model='additive', period=52), axis=0)


In [None]:
getattr(df_tsa[column], attr)

In [None]:
df_stock_prc_returns.columns.to_list

In [None]:
from plotly.subplots import make_subplots
import itertools

fig = make_subplots(4, 1, horizontal_spacing=0.01, shared_xaxes=True, shared_yaxes=False)

results = ["observed", "trend", "resid", "seasonal"]
cbarlocs = [.85, .5, .15, .0]

# for idx, attr in zip(itertools.product(range(1,3), repeat=2), results):
for idx, attr in zip(list(range(1,5)), results):
    data = pd.concat([getattr(df_tsa[column], attr) for column in df_stock_prc_returns.columns.to_list()[::-1]], axis=1)
    data.columns = df_stock_prc_returns.columns.to_list()[::-1]
    fig.add_trace(
        go.Heatmap(
            z=data,
            x=data.index,
            y=data.columns,
            name=column,
            # coloraxis=f'coloraxis{results.index(attr)+1}'
            # colorbar=dict(len=0.15, y=cbarlocs[idx-1]),
            coloraxis='coloraxis'
    ), 
    row=idx, col=1)

fig.update_layout(
    title = 'Stocks in 2022',
    showlegend = True,
    width=800,
    height=1000,
    coloraxis=dict(colorscale='Plasma', colorbar_x=1.02, colorbar_thickness=20),
)

fig.show()

In [None]:
pd.concat([getattr(df_tsa[column], attr) for column in df_stock_prc_returns.columns.to_list()[::-1]], axis=1)

In [None]:
import plotly.express as px
df = df_stock_returns
df["Mean"] = df_stock_returns.mean(axis=1)
df = df.reset_index()
fig = px.line(df, x="Date", y=df.columns,
              hover_data={"Date": "|%B %d, %Y"},
              title='custom tick labels with ticklabelmode="period"')

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    ))
fig.update_layout(
    xaxis_tickformatstops = [
        # dict(dtickrange=[None, 1000], value="%H:%M:%S.%L ms"),
        # dict(dtickrange=[1000, 60000], value="%H:%M:%S s"),
        # dict(dtickrange=[60000, 3600000], value="%H:%M m"),
        # dict(dtickrange=[3600000, 86400000], value="%H:%M h"),
        # dict(dtickrange=[86400000, 604800000], value="%e. %b d"),
        # dict(dtickrange=[604800000, "M1"], value="%e. %b w"),
        dict(dtickrange=["M1", "M12"], value="%b '%y M"),
        dict(dtickrange=["M12", None], value="%Y Y")
    ]
)
fig.update_yaxes(range = [0.25,1.75])

fig.show()

In [None]:

import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go

pio.templates.default = "plotly_white"


corr = df_stock_returns.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))

# fig = px.imshow(corr, text_auto=True, aspect="auto", origin='lower')



In [None]:
data = go.Heatmap(
        z=corr.mask(mask),
        x=corr.columns,
        y=corr.columns,
        colorscale=px.colors.diverging.RdBu,
        zmin=-1,
        zmax=1,
)

title = 'Asset Correlation Matrix'

layout = go.Layout(
    title_text=title, 
    title_x=0.5, 
    width=600, 
    height=600,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
      yaxis_autorange='reversed'
)

fig=go.Figure(data=[data], layout=layout)
fig.update_layout(
   yaxis = dict(
      tickfont=dict(family='Helvetica', size=6, color='black')
      ),
   xaxis = dict(
      tickfont=dict(family='Helvetica', size=6, color='black')
      )
)
fig.show()

In [None]:
import ppscore as pps
import seaborn as sns
matrix_df = pps.matrix(df_stock_prc_returns.dropna())[['x', 'y', 'ppscore']].pivot(columns='x', index='y', values='ppscore')
# sns.heatmap(matrix_df, vmin=0, vmax=1, cmap="Blues", linewidths=0.5, annot=True)
# fig = px.imshow(matrix_df, x=matrix_df.index, y=matrix_df.columns, color_continuous_scale='Blues')
# fig.show()

In [None]:
fig = px.imshow(matrix_df, x=matrix_df.index, y=matrix_df.columns, color_continuous_scale='Blues')
fig.show()

In [None]:
corr = matrix_df
mask = np.triu(np.ones_like(corr, dtype=bool))

In [None]:
data = go.Heatmap(
        z=corr.mask(mask),
        x=corr.columns,
        y=corr.columns,
        colorscale=px.colors.sequential.Blues,
        zmin=0,
        zmax=1,
)

title = 'Asset PPsore Matrix'

layout = go.Layout(
    title_text=title, 
    title_x=0.5, 
    width=600, 
    height=600,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
      yaxis_autorange='reversed'
)

fig=go.Figure(data=[data], layout=layout)
fig.update_layout(
   yaxis = dict(
      tickfont=dict(family='Helvetica', size=6, color='black')
      ),
   xaxis = dict(
      tickfont=dict(family='Helvetica', size=6, color='black')
      )
)
fig.show()

In [None]:
import plotly.express as px

fig = px.imshow(df_stock_returns.T, 
                color_continuous_scale="Cividis_r", 
                origin='upper', 
                title="Autocorrelation plot",
               #  zmax=1.75,
                range_color=(0.5,1.75)
               )
# update layout for xaxis tickmode as linear
fig.update_layout(
   yaxis = dict(
      tickfont=dict(family='Helvetica', size=8, color='black')
      #tickmode = 'linear',
      #   tickmode = 'array',
      #   tickvals = list(range(100)),
      #   ticktext = df_acf.columns.to_list() # ['One', 'Three', 'Five', 'Seven', 'Nine', 'Eleven']
   )
)
fig.show()

### Plot persentage of missing

In [None]:
(100-((df_stock_returns.shape[0] - df_stock_returns.isna().sum().T)/df_stock_returns.shape[0])*100).iplot(kind='bar')

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_stock_returns_scaled = pd.DataFrame(data=scaler.fit_transform(df_stock_returns), 
                          index=df_stock_returns.index, 
                          columns=df_stock_returns.columns)

In [None]:
df_stock_returns_scaled.iplot(kind="hist", bins=200)