In [1]:
import datetime as dt
from datetime import date
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import time
from datetime import date
import statsmodels.formula.api as sm

#returns dataframe of price with close price
def ticker_df(ticker, start, end):
    source = 'yahoo'
    df = web.DataReader(ticker, source, start, end)
    return df.drop(['High', 'Low', 'Open', 'Adj Close', 'Volume'], axis = 1)

def stock_correlation(stock1, stock2):
    correlation = stock1['Close'].pct_change().corr(stock2['Close'].pct_change())
    return(correlation)

def get_correlation(stocks, start, end):
    
    assets ={}
    assets2 = {}

    for stock in stocks:
        assets[stock] = ticker_df(stock, start, end)

    for stock in stocks:
        assets2[stock] = list(assets[stock]['Close'])

    df = pd.DataFrame(data = assets2, columns = assets.keys())
    corr_matrix = df.corr()

    for stock1 in stocks:
        for stock2 in stocks:
            corr_matrix[stock1][stock2] = stock_correlation(assets[stock1], assets[stock2])
    
    return(corr_matrix)

#takes holdings {'ticker':allocation}, returns simulation of portfolio
def sim_port(holdings, start, end):
    portfolio = ticker_df('SPY', start, end).drop('Close', axis =1)

    for stock in holdings.keys():
        portfolio[stock + ' close'] = ticker_df(stock, start, end)
        portfolio[stock + ' pct change'] = portfolio[stock + ' close'].pct_change()

    for stock in holdings.keys():

        portfolio[stock + ' sim close'] = [0.0]*len(portfolio)
        portfolio[stock + ' sim close'][0] = holdings[stock]

        for i in range(1, len(portfolio)):
            a = portfolio[stock + ' sim close'][i-1]
            b = 1 + portfolio[stock + ' pct change'][i]
            portfolio[stock + ' sim close'][i] =  a*b

    portfolio['Close'] = [0.0]*len(portfolio)

    for stock in holdings.keys():
        for i in range(len(portfolio)):
            portfolio['Close'][i] += portfolio[stock + ' sim close'][i]

    return(portfolio)

#takes list of stocks (strings), returns dictionary of time series
def asset_dict(stocks, start, end):
    assets = {}
    for stock in stocks:
        assets[stock] = ticker_df(stock,start,end)['Close']
    return(assets)

#takes list of stocks, returns dictionary of time series
def asset_timeseries(stocks, start, end):
    assets = {}
    for stock in stocks:
        assets[stock] = list(ticker_df(stock, start, end)['Close'])
    return(assets)

def series_corr(ser1, ser2):
    correlation = ser1.pct_change().corr(ser2.pct_change())
    return(correlation)

def create_corr_matrix(stock_dict):
    data = pd.DataFrame(data = stock_dict, columns = stock_dict.keys())
    corr_matrix = data.corr()

    for stock1 in stock_dict.keys():
        for stock2 in stock_dict.keys():
            corr_matrix[stock1][stock2] = series_corr(stock_dict[stock1], stock_dict[stock2])
    return(corr_matrix)


Date
2007-01-03     52.630001
2007-01-04     53.590000
2007-01-05     53.180000
2007-01-08     53.250000
2007-01-09     53.540001
                 ...    
2020-06-24    272.459991
2020-06-25    276.079987
2020-06-26    270.980011
2020-06-29    273.739990
2020-06-30    278.709991
Name: Close, Length: 3397, dtype: float64

In [28]:
start = dt.datetime(2014,1,1)
end = dt.datetime(2020,6,30)

holdings = {'VGT':0.174358, 'VTI':0.026624, 'GLD':0.140878, 'USO':0.012532, 'LQD':0.050989, 'IEF':0.45764}
stocks = ['SPY', 'IWM', 'QQQ', 'GDX', 'GDXJ', 'GLD', 'SLV', 'USO', 'XLK', 'XLE', 'XRT', 'XRT', 'XLU', 'XLF','XLI','XLV', 'XAR', 'ARKK', 'TLT', 'LQD', 'IEF']
assets = asset_dict(stocks, start, end)

portfolio = sim_port(holdings, start, end)
assets['portfolio'] = portfolio['Close']
corr_matrix = create_corr_matrix(assets)
corr_matrix

Unnamed: 0,SPY,IWM,QQQ,GDX,GDXJ,GLD,SLV,USO,XLK,XLE,...,XLU,XLF,XLI,XLV,XAR,ARKK,TLT,LQD,IEF,portfolio
SPY,1.0,0.895597,0.934694,0.08303,0.134228,-0.065763,0.14714,0.364461,0.941555,0.76763,...,0.611474,0.902053,0.926237,0.87438,0.842673,0.760716,-0.421056,0.167227,-0.424373,0.75186
IWM,0.895597,1.0,0.817163,0.092573,0.136567,-0.062198,0.162321,0.343233,0.812173,0.754327,...,0.489081,0.863185,0.874357,0.76506,0.859616,0.789719,-0.386664,0.163854,-0.396705,0.662021
QQQ,0.934694,0.817163,1.0,0.056781,0.105791,-0.063496,0.127332,0.292883,0.975436,0.62467,...,0.490158,0.762053,0.801778,0.828362,0.732843,0.797257,-0.376214,0.123991,-0.386397,0.785041
GDX,0.08303,0.092573,0.056781,1.0,0.947701,0.755451,0.642651,0.154289,0.052387,0.180174,...,0.1956,-0.003922,0.078772,0.057965,0.101239,0.077527,0.234539,0.250227,0.272406,0.363191
GDXJ,0.134228,0.136567,0.105791,0.947701,1.0,0.737495,0.634546,0.153526,0.099893,0.208453,...,0.229474,0.05337,0.124792,0.101684,0.137473,0.123491,0.182096,0.270921,0.225241,0.393475
GLD,-0.065763,-0.062198,-0.063496,0.755451,0.737495,1.0,0.775173,0.029311,-0.057721,-0.006358,...,0.146871,-0.155772,-0.080244,-0.055564,-0.039339,-0.021323,0.322718,0.314169,0.389425,0.384529
SLV,0.14714,0.162321,0.127332,0.642651,0.634546,0.775173,1.0,0.197567,0.129983,0.209213,...,0.175227,0.074258,0.14182,0.111139,0.163158,0.181708,0.146697,0.269874,0.18442,0.43155
USO,0.364461,0.343233,0.292883,0.154289,0.153526,0.029311,0.197567,1.0,0.307482,0.622826,...,0.110667,0.331642,0.349759,0.233154,0.326443,0.263697,-0.210565,0.039048,-0.205545,0.259891
XLK,0.941555,0.812173,0.975436,0.052387,0.099893,-0.057721,0.129983,0.307482,1.0,0.647774,...,0.520872,0.785681,0.823761,0.798026,0.750566,0.778445,-0.380601,0.148419,-0.387323,0.812978
XLE,0.76763,0.754327,0.62467,0.180174,0.208453,-0.006358,0.209213,0.622826,0.647774,1.0,...,0.430255,0.747948,0.770197,0.594481,0.704279,0.569463,-0.334327,0.142437,-0.33062,0.542189


In [14]:
corr_matrix.to_csv('corr_matrix.csv')


In [19]:
start = dt.datetime(2007,1,1)
end = dt.datetime(2020,6,30)

holdings = {'VGT':0.174358, 'VTI':0.026624, 'GLD':0.140878, 'USO':0.012532, 'LQD':0.050989, 'IEF':0.45764}
stocks = list(holdings.keys())
assets = asset_dict(stocks, start, end)

portfolio = sim_port(holdings, start, end)
assets['portfolio'] = portfolio['Close']
corr_matrix = create_corr_matrix(assets)
corr_matrix

Unnamed: 0,VGT,VTI,GLD,USO,LQD,IEF,portfolio
VGT,1.0,0.926073,0.013246,0.3612,0.113264,-0.417901,0.615485
VTI,0.926073,1.0,0.037299,0.412618,0.135153,-0.437997,0.550017
GLD,0.013246,0.037299,1.0,0.187172,0.111776,0.181682,0.592225
USO,0.3612,0.412618,0.187172,1.0,0.040333,-0.246003,0.299291
LQD,0.113264,0.135153,0.111776,0.040333,1.0,0.441053,0.381695
IEF,-0.417901,-0.437997,0.181682,-0.246003,0.441053,1.0,0.205937
portfolio,0.615485,0.550017,0.592225,0.299291,0.381695,0.205937,1.0


In [19]:
start = dt.datetime(2005,1,1)
end = dt.datetime(2020,6,30)

fin_instr = ['CADUSD=X', 'GLD', 'SLV', 'USO','VTI']
fin_instr_ser = asset_dict(fin_instr, start, end)

corr_matrix_curr = create_corr_matrix(fin_instr_ser)
corr_matrix_curr

Unnamed: 0,CADUSD=X,GLD,SLV,USO,VTI
CADUSD=X,1.0,0.184902,0.20614,0.239634,0.289892
GLD,0.184902,1.0,0.805231,0.19883,0.042223
SLV,0.20614,0.805231,1.0,0.31391,0.220169
USO,0.239634,0.19883,0.31391,1.0,0.405425
VTI,0.289892,0.042223,0.220169,0.405425,1.0


Unnamed: 0,SPY,IWM,QQQ,GDX,GDXJ,GLD,SLV,USO,XLK,XLE,XRT,XLU,XLF,XLI,XLV,XAR,ARKK,TLT,LQD,portfolio
SPY,1.0,0.901847,0.935922,0.081083,0.140907,-0.056771,0.16596,0.378633,0.943215,0.77361,0.784478,0.625923,0.902025,0.926928,0.878083,0.848603,0.761728,-0.418516,0.184954,0.919144
IWM,0.901847,1.0,0.817599,0.084537,0.138616,-0.057306,0.179606,0.357117,0.817076,0.767254,0.850378,0.510905,0.872527,0.881299,0.772601,0.868747,0.792791,-0.388749,0.184461,0.895084
QQQ,0.935922,0.817599,1.0,0.051894,0.110603,-0.054475,0.144819,0.308289,0.977861,0.631612,0.69672,0.506058,0.760708,0.801708,0.829209,0.734721,0.797392,-0.373177,0.140136,0.89136
GDX,0.081083,0.084537,0.051894,1.0,0.951425,0.755357,0.633158,0.148195,0.050723,0.17153,0.039304,0.207333,-0.006613,0.080245,0.056321,0.110923,0.074283,0.255688,0.258909,0.297382
GDXJ,0.140907,0.138616,0.110603,0.951425,1.0,0.73703,0.627762,0.149633,0.10746,0.205583,0.092687,0.250332,0.056991,0.134201,0.106086,0.154597,0.12144,0.201869,0.287669,0.34745
GLD,-0.056771,-0.057306,-0.054475,0.755357,0.73703,1.0,0.766109,0.007532,-0.047268,-0.015975,-0.083399,0.166693,-0.151811,-0.071835,-0.040891,-0.025383,-0.024167,0.342837,0.331516,0.111946
SLV,0.16596,0.179606,0.144819,0.633158,0.627762,0.766109,1.0,0.190252,0.149658,0.211815,0.121558,0.2029,0.089141,0.161786,0.131524,0.187696,0.184817,0.162659,0.295608,0.317889
USO,0.378633,0.357117,0.308289,0.148195,0.149633,0.007532,0.190252,1.0,0.321061,0.62634,0.280387,0.116741,0.346334,0.363764,0.251513,0.342842,0.267796,-0.224575,0.0422,0.374542
XLK,0.943215,0.817076,0.977861,0.050723,0.10746,-0.047268,0.149658,0.321061,1.0,0.655206,0.692238,0.534804,0.784741,0.824845,0.805184,0.755098,0.778624,-0.377186,0.16261,0.885217
XLE,0.77361,0.767254,0.631612,0.17153,0.205583,-0.015975,0.211815,0.62634,0.655206,1.0,0.648199,0.44147,0.761526,0.779056,0.606881,0.721107,0.575273,-0.33831,0.156726,0.73546


In [60]:
# fama french data https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
# tutorial on excel https://www.youtube.com/watch?v=b2bO23z7cwg
# tutorial (text) https://www.codingfinance.com/post/2019-07-01-analyze-ff-factor-python/
# tutorial (text) https://randlow.github.io/posts/finance-economics/asset-pricing-regression/

# https://www.codingfinance.com/
# https://www.codingfinance.com/post/2019-07-01-analyze-ff-factor-python, adjust later
# https://seekingalpha.com/article/2035813-a-look-inside-the-fama-french-3-factor-model 
# https://www.codingfinance.com/post/2018-04-25-portfolio-beta-py/
# https://www.codingfinance.com/post/2018-03-27-retirement-py/
# https://www.codingfinance.com/post/2018-04-20-portfolio-stats-py/

In [3]:
factor_df = web.DataReader("F-F_Research_Data_Factors_daily", "famafrench")[0]
factor_df = factor_df.apply(lambda x: x/ 100)

In [4]:
#fama french 3 factor regression

factor_df_last = factor_df.index[factor_df.shape[0] - 1].date()
end = factor_df_last
factor_df_first = factor_df.index[0].date()
start = factor_df_first

holdings= {'XSU.TO':25.0, 'XQQ.TO':25.0, 'VEF.TO':30.0, 'GDX': 10.0, 'ARKK':5.0, 'XAR':5.0}
#holdings = {'SQQQ':100.0}
portfolio = sim_port(holdings, start, end)

factor_df['portfolio'] = portfolio['Close'].pct_change()
factor_df = factor_df[1:]
factor_df.rename(columns={"Mkt-RF":"mkt_excess"}, inplace=True)
factor_df['portfolio_excess'] = factor_df['portfolio'] - factor_df['RF']

model = smf.formula.ols(formula = "portfolio_excess ~ mkt_excess + SMB + HML", data = factor_df).fit()
print(model.params)


NameError: name 'smf' is not defined

In [3]:
def ticker_df(ticker, start, end):
    source = 'yahoo'
    df = web.DataReader(ticker, source, start, end)
    return df.drop(['High', 'Low', 'Open', 'Adj Close', 'Volume'], axis = 1)

In [24]:
start = dt.datetime(2010,12,1)
end = dt.datetime(2021,3,12)
BTC_USD= 'BTC-USD'
SPY_USD = 'SPY'
MSFT_USD = 'MSFT'
BTC=ticker_df(BTC_USD, start, end)

SPY = ticker_df(SPY_USD, start, end)

GLD = ticker_df('GLD', start, end)
MSFT = ticker_df(MSFT_USD,start,end)
QQQ = ticker_df('QQQ', start,end)
TNX = ticker_df("^TNX", start, end)

In [15]:
stock_correlation(BTC, SPY)

0.31943549943457583

In [16]:
stock_correlation(GLD, SPY)

0.20799405988821074

In [17]:
stock_correlation(MSFT,SPY)

0.6214500771949476

In [18]:
stock_correlation(QQQ,SPY)

0.863160073763301

In [23]:
stock_correlation(BTC, TNX)
#past 6 months

0.053644230610290354

In [25]:
stock_correlation(BTC, TNX)
#past 11 years 

0.006172402347731589