In [7]:
import datetime as dt
from datetime import date
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import time


def ticker_df(ticker, start, end):
    source = 'yahoo'
    df = web.DataReader(ticker, source, start, end)
    return df.drop(['High', 'Low', 'Open', 'Adj Close', 'Volume'], axis = 1)

def stock_correlation(stock1, stock2):
    correlation = stock1['Close'].pct_change().corr(stock2['Close'].pct_change())
    return(correlation)

def get_correlation(stocks, start, end):
    
    assets ={}
    assets2 = {}

    for stock in stocks:
        assets[stock] = ticker_df(stock, start, end)

    for stock in stocks:
        assets2[stock] = list(assets[stock]['Close'])

    df = pd.DataFrame(data = assets2, columns = assets.keys())
    corr_matrix = df.corr()

    for stock1 in stocks:
        for stock2 in stocks:
            corr_matrix[stock1][stock2] = stock_correlation(assets[stock1], assets[stock2])
    
    return(corr_matrix)



#takes holdings {'ticker':allocation}, returns simulation of portfolio
def sim_port(holdings, start, end):
    portfolio = ticker_df('SPY', start, end).drop('Close', axis =1)

    for stock in holdings.keys():
        portfolio[stock + ' close'] = ticker_df(stock, start, end)
        portfolio[stock + ' pct change'] = portfolio[stock + ' close'].pct_change()

    for stock in holdings.keys():

        portfolio[stock + ' sim close'] = [0.0]*len(portfolio)
        portfolio[stock + ' sim close'][0] = holdings[stock]

        for i in range(1, len(portfolio)):
            a = portfolio[stock + ' sim close'][i-1]
            b = 1 + portfolio[stock + ' pct change'][i]
            portfolio[stock + ' sim close'][i] =  a*b

    portfolio['Close'] = [0.0]*len(portfolio)

    for stock in holdings.keys():
        for i in range(len(portfolio)):
            portfolio['Close'][i] += portfolio[stock + ' sim close'][i]

    return(portfolio)

#takes list of stocks (strings), returns dictionary of time series
def asset_dict(stocks, start, end):
    assets = {}
    for stock in stocks:
        assets[stock] = ticker_df(stock,start,end)['Close']
    return(assets)

#takes list of stocks, returns dictionary of time series
def asset_timeseries(stocks, start, end):
    assets = {}
    for stock in stocks:
        assets[stock] = list(ticker_df(stock, start, end)['Close'])
    return(assets)


def series_corr(ser1, ser2):
    correlation = ser1.pct_change().corr(ser2.pct_change())
    return(correlation)

def create_corr_matrix(stock_dict):
    data = pd.DataFrame(data = stock_dict, columns = stock_dict.keys())
    corr_matrix = data.corr()

    for stock1 in stock_dict.keys():
        for stock2 in stock_dict.keys():
            corr_matrix[stock1][stock2] = series_corr(stock_dict[stock1], stock_dict[stock2])
    return(corr_matrix)


In [10]:
start = dt.datetime(2018,1,1)
end = dt.datetime(2020,7,7)
stocks = ['SPY', 'IWM', 'QQQ', 'GDX', 'GDXJ', 'GLD', 'SLV', 'USO', 'XLK', 'XLE', 'XRT', 'XRT', 'XLU', 'XLF','XLI','XLV']

get_correlation(stocks, start, end)

Unnamed: 0,SPY,IWM,QQQ,GDX,GDXJ,GLD,SLV,USO,XLK,XLE,XRT,XLU,XLF,XLI,XLV
SPY,1.0,0.917689,0.947217,0.101234,0.200278,0.027448,0.241528,0.399859,0.955246,0.804302,0.833209,0.696182,0.917887,0.932206,0.911848
IWM,0.917689,1.0,0.839013,0.117185,0.206882,0.031732,0.275883,0.377697,0.843867,0.828902,0.892533,0.610986,0.898629,0.90727,0.79896
QQQ,0.947217,0.839013,1.0,0.075948,0.171765,0.034475,0.230178,0.352865,0.982011,0.680796,0.758116,0.574074,0.792264,0.818196,0.860064
GDX,0.101234,0.117185,0.075948,1.0,0.951964,0.689546,0.549139,0.105775,0.057174,0.142957,0.094093,0.181447,0.059256,0.101133,0.069528
GDXJ,0.200278,0.206882,0.171765,0.951964,1.0,0.69051,0.55736,0.112589,0.153311,0.203598,0.187673,0.276782,0.154042,0.189619,0.155521
GLD,0.027448,0.031732,0.034475,0.689546,0.69051,1.0,0.762517,-0.020073,0.021739,0.002652,-0.013912,0.179682,-0.03274,-0.004374,0.034392
SLV,0.241528,0.275883,0.230178,0.549139,0.55736,0.762517,1.0,0.20802,0.222577,0.257838,0.213383,0.231704,0.193574,0.230904,0.192895
USO,0.399859,0.377697,0.352865,0.105775,0.112589,-0.020073,0.20802,1.0,0.360969,0.576703,0.314931,0.14623,0.37786,0.39385,0.309873
XLK,0.955246,0.843867,0.982011,0.057174,0.153311,0.021739,0.222577,0.360969,1.0,0.704558,0.758415,0.594691,0.819797,0.840364,0.856698
XLE,0.804302,0.828902,0.680796,0.142957,0.203598,0.002652,0.257838,0.576703,0.704558,1.0,0.727531,0.514771,0.824572,0.828394,0.676118


In [None]:
def get_correlation(stocks, start, end):
    
    assets ={}
    assets2 = {}

    for stock in stocks:
        assets[stock] = ticker_df(stock, start, end)

    for stock in stocks:
        assets2[stock] = list(assets[stock]['Close'])

    df = pd.DataFrame(data = assets2, columns = assets.keys())
    corr_matrix = df.corr()

    for stock1 in stocks:
        for stock2 in stocks:
            corr_matrix[stock1][stock2] = stock_correlation(assets[stock1], assets[stock2])

In [25]:
stocks = ['SPY', 'IWM', 'QQQ', 'GDX', 'GDXJ', 'GLD', 'SLV', 'USO', 'XLK', 'XLE', 'XRT', 'XRT', 'XLU', 'XLF','XLI','XLV']
stock_dict = asset_dict(stocks, start, end)

In [26]:
stock_dict['portfolio'] = portfolio['Close']

In [27]:
data = pd.DataFrame(data = stock_dict, columns = ETF_dict.keys())
corr_matrix = data.corr()

In [38]:
for stock1 in stock_dict.keys():
    for stock2 in stock_dict.keys():
        corr_matrix[stock1][stock2] = series_corr(stock_dict[stock1], stock_dict[stock2])

In [35]:
series_corr(stock_dict['QQQ'], stock_dict['XLK'])

0.9820108760895104

In [29]:
stock_correlation

<function __main__.stock_correlation(stock1, stock2)>