In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import plotly.graph_objects as go
from typing import Tuple, Optional, NamedTuple, Any, List
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from pathlib import Path
import common
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
import seaborn

# Plan:

1. add all stocks data together into ine dataframe
2. save into csv
3. calculate correlations between pairs of stocks

In [None]:

def get_relevant_data(ticker): 
    return common.get_stock_data(ticker, columns=['date','minute', 'close'])
    

def preprocess_data(dataframe: pd.DataFrame) -> pd.DataFrame:
    dataframe['datetime'] = dataframe['date'] + "," + dataframe['minute']
    dataframe['datetime'] = pd.to_datetime(dataframe['datetime'], format='%Y-%m-%d,%H:%M')
    del dataframe['date']
    dataframe = dataframe.sort_values(by=['datetime']).reset_index(drop=True)
    
    del dataframe['minute']
    dataframe.set_index('datetime', inplace=True)
    return dataframe



In [None]:
all_snp_stocks = [i.stem for i in Path('data/relevant/snp500_from_iex').iterdir()]
first_ticker = all_snp_stocks[0]
main_df = get_relevant_data(first_ticker)
main_df = preprocess_data(main_df)
main_df.rename(columns={'close': first_ticker}, inplace=True)


for ticker in all_snp_stocks[1:]:
    df = get_relevant_data(ticker)
    if df is None:
        continue
    df = preprocess_data(df)
    df.rename(columns={'close': ticker}, inplace=True)
    main_df = main_df.merge(df, on='datetime', how="outer", copy=False)
    print(f"ticker {ticker} added")
    
main_df

In [None]:
kendall_correlations = main_df.corr(method='kendall')
kendall_correlations

In [None]:
kendall_correlations.to_csv("kendall_correlations_of_all_stocks.csv")

In [None]:
kendall_correlations = pd.read_csv("kendall_correlations_of_all_stocks.csv")
kendall_correlations = kendall_correlations.set_index("Unnamed: 0", drop=True)
kendall_correlations.index = kendall_correlations.index.rename("")
kendall_correlations

In [None]:
mask = np.zeros_like(kendall_correlations)
mask[np.triu_indices_from(mask)] = True

# generate plot
seaborn.heatmap(kendall_correlations, cmap='RdYlGn', vmax=1.0,
                vmin=-1.0, mask=mask, linewidths=2.5)
plt.yticks(rotation=0)
plt.xticks(rotation=90)
plt.show()

In [None]:
import plotly.express as px

fig = px.imshow(kendall_correlations.values)
fig.show()

In [None]:
kendall_correlations.abs().unstack().sort_values(ascending = False)

# kendall_correlations.corr().unstack().sort_values().drop_duplicates()


In [None]:
def get_redundant_pairs(df):
    '''Get diagonal and lower triangular pairs of correlation matrix'''
    pairs_to_drop = set()
    cols = df.columns
    for i in range(0, df.shape[1]):
        for j in range(0, i+1):
            pairs_to_drop.add((cols[i], cols[j]))
    return pairs_to_drop

def get_top_abs_correlations(df, n=5):
    au_corr = df.abs().unstack()
    labels_to_drop = get_redundant_pairs(df)
    au_corr = au_corr.drop(labels=labels_to_drop).sort_values(ascending=False)
    return au_corr[0:n]


get_top_abs_correlations(kendall_correlations, 50)

In [None]:
main_df

In [None]:
goog_data = get_relevant_data('GOOG')
goog_data = preprocess_data(goog_data)
goog_data.plot()

googl_data = get_relevant_data('GOOGL')
googl_data = preprocess_data(googl_data)
googl_data.plot()

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(y=goog_data['close'], x=goog_data.index,
                    mode='lines',
                    name='goog'))
fig.add_trace(go.Scatter(y=googl_data['close'], x=googl_data.index,
                    mode='lines',
                    name='googl'))

fig.show()

In [None]:
def drow_stocks(*tickers):
    fig = go.Figure()
    for ticker in tickers:
        data = get_relevant_data(ticker)
        data = preprocess_data(data)
        fig.add_trace(go.Scatter(y=data['close'], x=data.index,
                    mode='lines',
                    name=ticker))

    fig.show()
    
    
drow_stocks('V', 'MA')

In [None]:
drow_stocks('GL', 'AMP')

In [None]:
drow_stocks('DVN', 'EOG')

In [None]:
drow_stocks('C', 'SWK')