In [2]:
import pandas as pd
import pandas_datareader as pdr

import requests_cache
session = requests_cache.CachedSession(cache_name='stocks-cache', backend='sqlite')

In [3]:
symbols = ['CL=F', 'BTC-USD', 'RUB=X', 'GC=F', '^GSPC', '^IXIC']

ticks = [
    pdr.DataReader(
        sym,
        start='2014-01-01',
        end='2020-05-01',
        data_source='yahoo',
        session=session
    ).Close.fillna(method='ffill').rename(sym)
    for sym in symbols
]

ticks = [t[~t.index.duplicated()] for t in ticks]

ticks_df = pd.concat(ticks, join='inner', axis=1)
ticks_df = ticks_df[~(ticks_df < 1).any(axis=1)]

ticks_df.corr().style.background_gradient(cmap='bwr', vmin=-1, vmax=1)

Unnamed: 0,CL=F,BTC-USD,RUB=X,GC=F,^GSPC,^IXIC
CL=F,1.0,0.299952,-0.666507,-0.073561,0.31255,0.251984
BTC-USD,0.299952,1.0,0.065245,0.609711,0.838919,0.833857
RUB=X,-0.666507,0.065245,1.0,0.179455,0.11421,0.178153
GC=F,-0.073561,0.609711,0.179455,1.0,0.692176,0.714099
^GSPC,0.31255,0.838919,0.11421,0.692176,1.0,0.991147
^IXIC,0.251984,0.833857,0.178153,0.714099,0.991147,1.0


In [18]:
from scipy.spatial.distance import pdist, squareform
from sklearn.feature_selection import mutual_info_regression

mi = [
    (a, b, mutual_info_regression(ticks_df[a][:,None], ticks_df[b])[0])
    for a in ticks_df.columns
    for b in ticks_df.columns
]
pd.DataFrame(mi).pivot(0, 1, 2).style.background_gradient(cmap='Reds', vmin=0, vmax=5)

1,BTC-USD,CL=F,GC=F,RUB=X,^GSPC,^IXIC
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BTC-USD,5.866615,0.900403,1.031517,0.826901,1.636635,1.670144
CL=F,0.900506,5.806011,0.660202,0.864246,1.033375,1.044913
GC=F,1.032647,0.659061,5.81182,0.70055,1.047131,1.069223
RUB=X,0.826923,0.864066,0.700387,5.861713,1.014284,1.059265
^GSPC,1.636461,1.033188,1.047214,1.01409,5.866145,2.638685
^IXIC,1.67021,1.045208,1.069791,1.059222,2.638379,5.864063


In [3]:
change_df = ticks_df.pct_change().dropna()
change_df.corr().style.background_gradient(cmap='bwr', vmin=-1, vmax=1)

Unnamed: 0,CL=F,BTC-USD,RUB=X,GC=F,^GSPC,^IXIC
CL=F,1.0,0.041884,-0.13753,0.106187,0.286956,0.266799
BTC-USD,0.041884,1.0,-0.029105,0.038291,0.109061,0.102011
RUB=X,-0.13753,-0.029105,1.0,-0.058473,-0.131074,-0.127754
GC=F,0.106187,0.038291,-0.058473,1.0,-0.009778,-0.021037
^GSPC,0.286956,0.109061,-0.131074,-0.009778,1.0,0.959762
^IXIC,0.266799,0.102011,-0.127754,-0.021037,0.959762,1.0


In [48]:
change_df = ticks_df.rolling(5).sum().dropna().pct_change().dropna()
change_df.corr().style.background_gradient(cmap='bwr', vmin=-1, vmax=1)

Unnamed: 0,CL=F,BTC-USD,RUB=X,GC=F,^GSPC,^IXIC
CL=F,1.0,0.086011,-0.413454,0.080145,0.354026,0.296728
BTC-USD,0.086011,1.0,-0.080126,0.06281,0.177097,0.181907
RUB=X,-0.413454,-0.080126,1.0,-0.099211,-0.351818,-0.315893
GC=F,0.080145,0.06281,-0.099211,1.0,0.043839,0.014652
^GSPC,0.354026,0.177097,-0.351818,0.043839,1.0,0.954748
^IXIC,0.296728,0.181907,-0.315893,0.014652,0.954748,1.0


In [50]:
change_df = ticks_df[ticks_df.index < '2017-01-01'].rolling(5).sum().pct_change().dropna()
change_df.corr().style.background_gradient(cmap='bwr', vmin=-1, vmax=1)

Unnamed: 0,CL=F,BTC-USD,RUB=X,GC=F,^GSPC,^IXIC
CL=F,1.0,0.035747,-0.466252,-0.074728,0.387988,0.344987
BTC-USD,0.035747,1.0,-0.129088,-0.086469,0.061517,0.071176
RUB=X,-0.466252,-0.129088,1.0,-0.02959,-0.353168,-0.30858
GC=F,-0.074728,-0.086469,-0.02959,1.0,-0.205004,-0.260066
^GSPC,0.387988,0.061517,-0.353168,-0.205004,1.0,0.956612
^IXIC,0.344987,0.071176,-0.30858,-0.260066,0.956612,1.0


In [5]:
walcl = pdr.DataReader(
    'WALCL',
    start='2003-01-01',
    end='2020-05-01',
    data_source='fred',
    session=session
)

snp = pdr.DataReader(
    '^GSPC',
    start='2003-01-01',
    end='2020-05-01',
    data_source='yahoo',
    session=session
).Close.rename('^GSPC')

stat = pd.concat([walcl, snp], join='inner', axis=1)
stat.corr().style.background_gradient(cmap='bwr', vmin=-1, vmax=1)

Unnamed: 0,WALCL,^GSPC
WALCL,1.0,0.778495
^GSPC,0.778495,1.0


In [17]:
mi = [
    (a, b, mutual_info_regression(stat[a][:,None], stat[b])[0])
    for a in stat.columns
    for b in stat.columns
]
pd.DataFrame(mi).pivot(0, 1, 2).style.background_gradient(cmap='Reds', vmin=0, vmax=5)

1,WALCL,^GSPC
0,Unnamed: 1_level_1,Unnamed: 2_level_1
WALCL,5.541265,1.910814
^GSPC,1.910598,5.539777
