In [1]:
import pandas as pd
import plotly.express as px

pd.set_option('display.max_columns', None)

In [2]:
returns = pd.read_csv('data/returns.csv', index_col=0)
returns.head()

Unnamed: 0_level_0,AAPL,AMZN,BRK-A,BTC-USD,BYDDF,CL=F,COIN,CSPX.AS,ETH-USD,GC=F,GOOGL,IQQH.F,IT,META,MSFT,MSTR,NVDA,PYPL,SI=F,SOL-USD,TSLA,UA,VAPU.SW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-01-05,0.012364,0.010004,-0.004963,0.063197,0.067736,0.048509,,0.001695,0.057461,0.004114,0.008064,0.012309,0.010375,0.007548,0.000965,0.008019,0.02221,0.012892,0.010519,-0.131938,0.007317,0.025896,0.008708
2021-01-06,-0.033662,-0.024897,0.012782,0.083311,-0.028047,0.01402,,0.016355,0.097369,-0.023455,-0.009868,0.094345,0.025546,-0.028269,-0.025929,0.12113,-0.058953,-0.034396,-0.021689,-0.105692,0.02839,0.003884,-0.007751
2021-01-07,0.034123,0.007577,0.011266,0.069157,0.065613,0.00395,,0.008633,0.01538,0.002832,0.029869,0.077108,0.014714,0.020622,0.028457,0.114868,0.05783,0.036194,0.008416,0.229901,0.079447,0.005158,0.013517
2021-01-08,0.008631,0.006496,0.00557,0.036234,0.01225,0.02774,,0.004667,-0.001208,-0.040893,0.013239,-0.033681,0.017449,-0.004354,0.006093,-0.007671,-0.00504,0.031569,-0.09625,0.35703,0.078403,-0.021809,0.02559
2021-01-11,-0.023249,-0.021519,-0.00565,-0.128217,0.030255,0.000191,,0.006746,-0.109502,0.008451,-0.023106,-0.019807,-0.011354,-0.040102,-0.009699,-0.067997,0.025966,-0.020457,0.026727,-0.034721,-0.078214,0.017705,-0.010252


In [3]:
corr_matrix = returns.corr(method='pearson')
corr_matrix.head()

Unnamed: 0,AAPL,AMZN,BRK-A,BTC-USD,BYDDF,CL=F,COIN,CSPX.AS,ETH-USD,GC=F,GOOGL,IQQH.F,IT,META,MSFT,MSTR,NVDA,PYPL,SI=F,SOL-USD,TSLA,UA,VAPU.SW
AAPL,1.0,0.572423,0.446538,0.260227,0.315148,0.031474,0.409605,0.404733,0.264191,0.039142,0.624941,0.277077,0.40556,0.498974,0.688557,0.36594,0.562026,0.467397,0.073991,0.260637,0.522761,0.368327,0.250835
AMZN,0.572423,1.0,0.365067,0.28377,0.302065,0.047419,0.481839,0.375034,0.280696,0.08633,0.647101,0.238356,0.390467,0.590038,0.668876,0.410667,0.562658,0.510862,0.135129,0.261921,0.430476,0.38181,0.195795
BRK-A,0.446538,0.365067,1.0,0.213635,0.13365,0.170141,0.251247,0.395886,0.21946,0.074073,0.412724,0.189494,0.41849,0.32216,0.397668,0.251645,0.28636,0.351674,0.130659,0.160828,0.226732,0.376516,0.276205
BTC-USD,0.260227,0.28377,0.213635,1.0,0.174067,0.038774,0.545296,0.18112,0.807876,0.037275,0.260878,0.260928,0.217522,0.199818,0.308867,0.711464,0.309746,0.297927,0.103533,0.56085,0.283985,0.23351,0.147763
BYDDF,0.315148,0.302065,0.13365,0.174067,1.0,0.103275,0.23801,0.23223,0.17618,0.036486,0.294267,0.314093,0.217091,0.232275,0.286514,0.270341,0.325815,0.317429,0.090562,0.16685,0.376354,0.241866,0.334074


In [4]:
corr_matrix.to_csv('data/corr_matrix.csv')

# Visualization and EDA of the correlation matrix

## Key observations

- Almost all correlations are positive (negative ones are close to zero).
- Highly correlating assets belong to the same industry or have similar purposes, e.g.:
  - Microsoft, Google, Apple, and Amazon
  - Coinbase and MicroStrategy
  - Bitcoin and Ethereum
  - Gold and Silver
- Commodities (gold, silver, and crude oil) do not correlate with the equity and crypto markets.

In [5]:
fig = px.imshow(corr_matrix, text_auto=False, aspect='auto', 
                width=700, height=700, 
                title='Correlation Matrix Heatmap')
fig.update_coloraxes(colorscale='RdBu', cmin=-1, cmax=1)   
fig.show()