Correlation between different stocks, sector mean, s&p500, nasdaq

In [2]:
import pandas as pd
import numpy as np
import os
from os import path

In [3]:
symbols = pd.read_csv("D:\Trading\sp500_symbols_list.csv")

In [4]:
tickers_path = "D:\\Trading\\raw_data\\tickers"

In [5]:
tickers_dfs = {ticker: pd.read_csv(path.join(tickers_path, f"{ticker}.csv"), parse_dates=True, index_col="Date") for ticker in symbols['Symbol']}

In [6]:
s_p_df = pd.read_csv(path.join("D:\\Trading\\raw_data\\special", "SP500.csv"), parse_dates=True, index_col="Date")
nsdq_df = pd.read_csv(path.join("D:\\Trading\\raw_data\\special", "NSDQ.csv"), parse_dates=True, index_col="Date")

## Autocorrelation

In [12]:
autocorrs = {i: np.array([tickers_dfs[ticker]["Close"].diff().autocorr(lag=i) for ticker in symbols['Symbol']]) for i in range(1, 6)}

In [13]:
for i in range(1, 6):
    print(f"Autocorr lag: {i}")
    print(f"Median: {np.median(autocorrs[i])}, Mean: {np.mean(autocorrs[i])}, Max: {np.max(autocorrs[i])}, Min: {np.min(autocorrs[i])}")

Autocorr lag: 1
Median: -0.029069693898538057, Mean: -0.032104408449437155, Max: 0.12364255974715051, Min: -0.22241851581963
Autocorr lag: 2
Median: 0.004320297397610031, Mean: 0.005048153943142921, Max: 0.07843800207562823, Min: -0.12259684762300502
Autocorr lag: 3
Median: -0.011848383311203076, Mean: -0.01274977626596836, Max: 0.08162981965074169, Min: -0.24756414903070134
Autocorr lag: 4
Median: -0.018303694941026682, Mean: -0.018532690289495838, Max: 0.21267490090354751, Min: -0.08779306148885804
Autocorr lag: 5
Median: 0.01145428374223368, Mean: 0.011976409881366218, Max: 0.1213809601689234, Min: -0.09732817112007146


In [18]:
np.sign(s_p_df["Close"].pct_change().iloc[1:]).autocorr()

-0.05855697050585328

In [28]:
def one_sign_chains(pct_change):
    signs = np.sign(pct_change)
    sign_chain = False
    positive_chains = []
    negative_chains = []
    chains_len = 0
    for i in range(2, signs.shape[0]):
        if sign_chain:
            if signs[i] != signs[i - 1]:
                if signs[i - 1] > 0:
                    positive_chains.append(chains_len)
                else:
                    negative_chains.append(chains_len)
                sign_chain = False
                chains_len = 0
            else:
                chains_len += 1
        else:
            if signs[i] == signs[i - 1] and signs[i] == signs[i - 2]:
                sign_chain = True
                chains_len = 3
    return np.array(positive_chains), np.array(negative_chains)

In [35]:
pos_lens, neg_lens = [], []
for ticker in symbols['Symbol']:
    df = tickers_dfs[ticker]
    pos, neg = one_sign_chains(df["Close"].pct_change().iloc[1:])
    pos_lens.append(pos)
    neg_lens.append(neg)

In [47]:
t = pos_lens[0]
(t == 3).mean()

0.5252525252525253

In [40]:
print(f"Positive median: {np.median([np.median(pos) for pos in pos_lens])}")
print(f"Positive 80%: {np.median([np.quantile(pos, 0.8) for pos in pos_lens])}")
print(f"Mean Positive % of all time: {np.mean([pos_lens[i].sum() / tickers_dfs[symbols['Symbol'].iloc[i]].shape[0] for i in range(symbols.shape[0])])}")

Positive median: 4.0
Positive 80%: 5.0
Mean Positive % of all time: 0.26106802568190307


In [102]:
print(f"Positive prob of 3: {np.median([(pos <= 4).mean() for pos in pos_lens])}")

Positive prob of 3: 0.7493569014885499


In [41]:
print(f"Negative median: {np.median([np.median(pos) for pos in neg_lens])}")
print(f"Negative 80%: {np.median([np.quantile(pos, 0.8) for pos in neg_lens])}")
print(f"Mean Negative % of all time: {np.mean([neg_lens[i].sum() / tickers_dfs[symbols['Symbol'].iloc[i]].shape[0] for i in range(symbols.shape[0])])}")

Negative median: 3.0
Negative 80%: 5.0
Mean Negative % of all time: 0.213096571160183


In [100]:
print(f"Negative prob of 3: {np.median([(pos <= 4).mean() for pos in neg_lens])}")

Negative prob of 3: 0.7857405884446844


### Correlation between stocks and its sectoral mean

Всего рассматриваемых секторов

In [53]:
symbols["GICS Sector"].unique()

array(['Industrials', 'Information Technology', 'Financials', 'Energy'],
      dtype=object)

#### Industrials

In [54]:
sector = "Industrials"
tickets_in_sector = symbols[symbols["GICS Sector"] == sector]
print(f"Stocks in {sector}: {tickets_in_sector.shape[0]}")

Stocks in Industrials: 71


In [55]:
sector_tickers_merged_closes = pd.DataFrame(index=tickers_dfs[tickets_in_sector["Symbol"].iloc[0]].index)

Абсолютные изменения

In [56]:
for symbol in tickets_in_sector["Symbol"]:
    sector_tickers_merged_closes[symbol] = tickers_dfs[symbol]["Close"].diff()
sector_tickers_merged_closes = sector_tickers_merged_closes.iloc[1:]

In [57]:
sector_abs_mean = sector_tickers_merged_closes.mean(axis=1)

In [58]:
sector_tickers_merged_closes["Sector_Mean"] = sector_abs_mean

In [59]:
for i in range(1, 6):
    sector_tickers_merged_closes[f"Sector_Mean_s{i}"] = sector_tickers_merged_closes["Sector_Mean"].shift(i)

In [61]:
sector_tickers_merged_closes.dropna(inplace=True)

In [65]:
sector_tickers_merged_closes.corr()[["Sector_Mean", "Sector_Mean_s1", "Sector_Mean_s2", "Sector_Mean_s3", "Sector_Mean_s4", "Sector_Mean_s5"]].iloc[:-6].describe()

Unnamed: 0,Sector_Mean,Sector_Mean_s1,Sector_Mean_s2,Sector_Mean_s3,Sector_Mean_s4,Sector_Mean_s5
count,71.0,71.0,71.0,71.0,71.0,71.0
mean,0.672026,-0.027329,0.022245,-0.010073,-0.043216,0.040198
std,0.100164,0.03038,0.031857,0.028661,0.027197,0.0282
min,0.426425,-0.09919,-0.041568,-0.070702,-0.102902,-0.02441
25%,0.615028,-0.043889,-0.000933,-0.028793,-0.061857,0.022065
50%,0.694471,-0.031565,0.02267,-0.00945,-0.047312,0.040928
75%,0.739295,-0.013653,0.042534,0.012706,-0.025191,0.056434
max,0.855329,0.048455,0.097796,0.062826,0.043564,0.112417


Относительные изменения

In [69]:
sector_tickers_merged_closes = pd.DataFrame(index=tickers_dfs[tickets_in_sector["Symbol"].iloc[0]].index)

In [70]:
for symbol in tickets_in_sector["Symbol"]:
    sector_tickers_merged_closes[symbol] = tickers_dfs[symbol]["Close"].pct_change()
sector_tickers_merged_closes = sector_tickers_merged_closes.iloc[1:]

In [71]:
sector_rel_mean = sector_tickers_merged_closes.mean(axis=1)

In [72]:
sector_tickers_merged_closes["Sector_Mean"] = sector_rel_mean

In [73]:
for i in range(1, 6):
    sector_tickers_merged_closes[f"Sector_Mean_s{i}"] = sector_tickers_merged_closes["Sector_Mean"].shift(i)

In [74]:
sector_tickers_merged_closes.dropna(inplace=True)

In [75]:
sector_tickers_merged_closes.corr()[["Sector_Mean", "Sector_Mean_s1", "Sector_Mean_s2", "Sector_Mean_s3", "Sector_Mean_s4", "Sector_Mean_s5"]].iloc[:-6].describe()

Unnamed: 0,Sector_Mean,Sector_Mean_s1,Sector_Mean_s2,Sector_Mean_s3,Sector_Mean_s4,Sector_Mean_s5
count,71.0,71.0,71.0,71.0,71.0,71.0
mean,0.718476,-0.056331,0.060597,-0.029684,-0.064944,0.049984
std,0.099037,0.041232,0.041868,0.0404,0.036774,0.036225
min,0.459557,-0.158787,-0.025456,-0.13186,-0.155846,-0.065301
25%,0.669791,-0.076665,0.031175,-0.054847,-0.083931,0.03216
50%,0.737932,-0.056487,0.053781,-0.030372,-0.070851,0.048736
75%,0.784495,-0.037396,0.091398,-0.001269,-0.04107,0.068436
max,0.881381,0.068035,0.147736,0.094335,0.031321,0.143331


#### Information Technology

In [76]:
sector = "Information Technology"
tickets_in_sector = symbols[symbols["GICS Sector"] == sector]
print(f"Stocks in {sector}: {tickets_in_sector.shape[0]}")

Stocks in Information Technology: 62


In [77]:
sector_tickers_merged_closes = pd.DataFrame(index=tickers_dfs[tickets_in_sector["Symbol"].iloc[0]].index)

In [78]:
for symbol in tickets_in_sector["Symbol"]:
    sector_tickers_merged_closes[symbol] = tickers_dfs[symbol]["Close"].pct_change()
sector_tickers_merged_closes = sector_tickers_merged_closes.iloc[1:]

In [79]:
sector_rel_mean = sector_tickers_merged_closes.mean(axis=1)

In [80]:
sector_tickers_merged_closes["Sector_Mean"] = sector_rel_mean

In [81]:
for i in range(1, 6):
    sector_tickers_merged_closes[f"Sector_Mean_s{i}"] = sector_tickers_merged_closes["Sector_Mean"].shift(i)

In [82]:
sector_tickers_merged_closes.dropna(inplace=True)

In [83]:
sector_tickers_merged_closes.corr()[["Sector_Mean", "Sector_Mean_s1", "Sector_Mean_s2", "Sector_Mean_s3", "Sector_Mean_s4", "Sector_Mean_s5"]].iloc[:-6].describe()

Unnamed: 0,Sector_Mean,Sector_Mean_s1,Sector_Mean_s2,Sector_Mean_s3,Sector_Mean_s4,Sector_Mean_s5
count,62.0,62.0,62.0,62.0,62.0,62.0
mean,0.686596,-0.074337,0.041493,-0.004864,-0.032181,0.020529
std,0.099606,0.042576,0.023941,0.021371,0.020967,0.02094
min,0.433075,-0.16694,-0.026985,-0.062718,-0.082405,-0.031436
25%,0.630617,-0.108108,0.02393,-0.017319,-0.043474,0.009529
50%,0.689451,-0.074916,0.041696,-0.004925,-0.034894,0.019023
75%,0.770402,-0.036804,0.057564,0.006766,-0.017188,0.0334
max,0.833234,0.004849,0.098921,0.042867,0.026246,0.070885


#### Financials

In [84]:
sector = "Financials"
tickets_in_sector = symbols[symbols["GICS Sector"] == sector]
print(f"Stocks in {sector}: {tickets_in_sector.shape[0]}")

Stocks in Financials: 69


In [85]:
sector_tickers_merged_closes = pd.DataFrame(index=tickers_dfs[tickets_in_sector["Symbol"].iloc[0]].index)

In [86]:
for symbol in tickets_in_sector["Symbol"]:
    sector_tickers_merged_closes[symbol] = tickers_dfs[symbol]["Close"].pct_change()
sector_tickers_merged_closes = sector_tickers_merged_closes.iloc[1:]

In [87]:
sector_rel_mean = sector_tickers_merged_closes.mean(axis=1)

In [88]:
sector_tickers_merged_closes["Sector_Mean"] = sector_rel_mean

In [89]:
for i in range(1, 6):
    sector_tickers_merged_closes[f"Sector_Mean_s{i}"] = sector_tickers_merged_closes["Sector_Mean"].shift(i)

In [90]:
sector_tickers_merged_closes.dropna(inplace=True)

In [91]:
sector_tickers_merged_closes.corr()[["Sector_Mean", "Sector_Mean_s1", "Sector_Mean_s2", "Sector_Mean_s3", "Sector_Mean_s4", "Sector_Mean_s5"]].iloc[:-6].describe()

Unnamed: 0,Sector_Mean,Sector_Mean_s1,Sector_Mean_s2,Sector_Mean_s3,Sector_Mean_s4,Sector_Mean_s5
count,69.0,69.0,69.0,69.0,69.0,69.0
mean,0.754056,-0.096025,0.073546,-0.017472,-0.06537,0.03728
std,0.112597,0.031812,0.033121,0.028327,0.028129,0.026249
min,0.381932,-0.164154,0.005759,-0.081821,-0.112578,-0.018155
25%,0.693324,-0.120046,0.053306,-0.039046,-0.085504,0.021501
50%,0.767598,-0.097242,0.068871,-0.013246,-0.069875,0.034286
75%,0.844876,-0.075519,0.093787,0.001336,-0.050057,0.04879
max,0.896719,-0.025838,0.172169,0.048022,0.028372,0.106575


#### Financials

In [92]:
sector = "Energy"
tickets_in_sector = symbols[symbols["GICS Sector"] == sector]
print(f"Stocks in {sector}: {tickets_in_sector.shape[0]}")

Stocks in Energy: 20


In [93]:
sector_tickers_merged_closes = pd.DataFrame(index=tickers_dfs[tickets_in_sector["Symbol"].iloc[0]].index)

In [94]:
for symbol in tickets_in_sector["Symbol"]:
    sector_tickers_merged_closes[symbol] = tickers_dfs[symbol]["Close"].pct_change()
sector_tickers_merged_closes = sector_tickers_merged_closes.iloc[1:]

In [95]:
sector_rel_mean = sector_tickers_merged_closes.mean(axis=1)

In [96]:
sector_tickers_merged_closes["Sector_Mean"] = sector_rel_mean

In [97]:
for i in range(1, 6):
    sector_tickers_merged_closes[f"Sector_Mean_s{i}"] = sector_tickers_merged_closes["Sector_Mean"].shift(i)

In [98]:
sector_tickers_merged_closes.dropna(inplace=True)

In [99]:
sector_tickers_merged_closes.corr()[["Sector_Mean", "Sector_Mean_s1", "Sector_Mean_s2", "Sector_Mean_s3", "Sector_Mean_s4", "Sector_Mean_s5"]].iloc[:-6].describe()

Unnamed: 0,Sector_Mean,Sector_Mean_s1,Sector_Mean_s2,Sector_Mean_s3,Sector_Mean_s4,Sector_Mean_s5
count,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.80683,-0.011014,0.022173,0.0041,-0.00592,0.022383
std,0.095389,0.030695,0.01886,0.021538,0.02656,0.021175
min,0.516017,-0.071543,-0.021639,-0.046082,-0.08303,-0.01596
25%,0.778422,-0.028358,0.010457,-0.005248,-0.016411,0.006528
50%,0.839506,-0.024058,0.025837,0.003452,-0.001489,0.024034
75%,0.87236,0.011137,0.035988,0.009042,0.003943,0.038156
max,0.894221,0.058106,0.057032,0.048882,0.034092,0.054716
