In [62]:
%pip install yfinance
import yfinance as yf
import pandas as pd
import numpy as np

Note: you may need to restart the kernel to use updated packages.


In [63]:
tickers = 'AAPL,TSLA,MSFT,'
indices = 'SPY,'

md = yf.download(
    tickers = indices + tickers,
    group_by='ticker',
    start='2020-05-19', end='2021-05-19',
    interval = '1d',
    auto_adjust = True,
    threads = True
    )

print(f'Downloaded {len(md)} days of market data for indices : {indices} and tickers : {tickers}')

md['SPY'].tail()
print(f"Index : {md.index[:5]}...")
print(f"Columns : {md.columns[:5]}...")



[*********************100%***********************]  4 of 4 completed
Downloaded 253 days of market data for indices : SPY, and tickers : AAPL,TSLA,MSFT,
Index : DatetimeIndex(['2020-05-18', '2020-05-19', '2020-05-20', '2020-05-21',
               '2020-05-22'],
              dtype='datetime64[ns]', name='Date', freq=None)...
Columns : MultiIndex([('TSLA',   'Open'),
            ('TSLA',   'High'),
            ('TSLA',    'Low'),
            ('TSLA',  'Close'),
            ('TSLA', 'Volume')],
           )...


In [64]:
def getPriceAndReturn(md):
    priceField = 'Close'
    prices = pd.DataFrame()
    returns = pd.DataFrame()
    for ticker, kind in md.columns:
        if kind == priceField:
            prices[ticker] = md[ticker][[priceField]].values.reshape(1, -1).ravel()
            returns[ticker] = prices[ticker].pct_change()
    return prices, returns

prices, returns = getPriceAndReturn(md)

print(prices['SPY'].head())
print(returns['SPY'].head())



0    290.343842
1    287.361664
2    292.243408
3    290.225769
4    290.776947
Name: SPY, dtype: float64
0         NaN
1   -0.010271
2    0.016988
3   -0.006904
4    0.001899
Name: SPY, dtype: float64


In [141]:
def getStats(prices, returns):
    mean = []
    std = []
    sharpe = []
    totReturn = []

    for ticker in returns.columns:
        pr = prices[ticker]
        ret = returns[ticker]

        totReturn.append((pr.iloc[-1] - pr.iloc[0]) / pr.iloc[0])
        mean.append(ret.mean())
        std.append(ret.std())

        sharpe.append(mean[-1] / std[-1] * (252**0.5)) #TODO consider adding risk free rate

    return pd.DataFrame.from_dict(
        {'DailyMean' : mean, 'DailyStd' : std, 'AnualSR' : sharpe, 'TotalReturn' : totReturn},
        orient='index', columns=returns.columns)

print(f"Daily return SPY mean : {stats['SPY']['DailyMean']}")
print(f"Daily return SPY std  : {stats['SPY']['DailyStd']}")

stats = getStats(prices, returns)
print(f"Basic return stats (mean, std, sharpe ratio, total return) :\n{stats}")

Daily return SPY mean : 0.0014474968820360192
Daily return SPY std  : 0.010791323551446576
Basic return stats (mean, std, sharpe ratio, total return) :
                 TSLA      MSFT       SPY      AAPL
DailyMean    0.006091  0.001280  0.001447  0.002102
DailyStd     0.046060  0.017525  0.010791  0.022180
AnualSR      2.099086  1.159051  2.129331  1.504477
TotalReturn  2.551184  0.327944  0.418801  0.596282


In [142]:
def getCovAndCorr(returns, stats):
    cov = []
    corr = []

    for i in range(len(returns.columns)):
        covColumn = []
        corrColumn = []
        
        iname = returns.columns[i]
        for j in range(i):
            covColumn.append(cov[j][i])
            corrColumn.append(corr[j][i])
        iStd = stats[iname]["DailyStd"]
        covColumn.append(iStd * iStd)
        corrColumn.append(1)
        for j in range(i+1, len(returns.columns)):
            jname = returns.columns[j]
            jStd = stats[jname]["DailyStd"]
            tickerCov = returns[iname].cov(returns[jname])
            covColumn.append(tickerCov)
            corrColumn.append(tickerCov / (iStd * jStd))
        cov.append(covColumn)
        corr.append(corrColumn)

    covDf = pd.DataFrame(cov, index = returns.columns, columns = returns.columns)
    corrDf = pd.DataFrame(corr, index = returns.columns, columns = returns.columns)
    return covDf, corrDf

cov, corr = getCovAndCorr(returns, stats)

print(f"Correlation matrix :\n{corr}\n")
print(f"Test Correlation TSLA/MSFT: {returns['TSLA'].cov(returns['MSFT'])/(stats['TSLA']['DailyStd'] * stats['MSFT']['DailyStd'])}")
'''
print(f"Covariance :\n{cov}\n")
print(f"Test Covariance TSLA/MSFT: {returns['TSLA'].cov(returns['MSFT'])}")
print(f"Covarience SPY column :\n{cov['SPY']}")
print(f"Covarience TSLA row :\n{cov.loc['TSLA']}")
print(f"corr.loc['SPY','TSLA'] : {corr.loc['SPY','TSLA']}")
'''
print(f"corr.loc['TSLA','SPY'] : {corr.loc['TSLA','SPY']}")


Correlation matrix :
          TSLA      MSFT       SPY      AAPL
TSLA  1.000000  0.488779  0.464253  0.477201
MSFT  0.488779  1.000000  0.735201  0.700225
SPY   0.464253  0.735201  1.000000  0.674270
AAPL  0.477201  0.700225  0.674270  1.000000

Test Correlation TSLA/MSFT: 0.48877904276818845
corr.loc['TSLA','SPY'] : 0.4642533884028333


In [158]:
def getAlphaBetaBenchmark(indices, tickers, cov, stats):
    #TODO consider adding risk free rate
    benchmark = {}
    for index in indices:
        alpha = []
        beta = []
        for ticker in tickers:
            indexVarience = cov[index][index]
            indexTickerCovarience = cov[index][ticker]
            indexTickerBeta = indexTickerCovarience / indexVarience
            indexTickerAlpha = stats[ticker]['TotalReturn'] - indexTickerBeta * stats[index]['TotalReturn']
            beta.append(indexTickerBeta)
            alpha.append(indexTickerAlpha)

        benchmark[index] = pd.DataFrame.from_dict({'alpha' : alpha, 'beta' : beta}, orient='index', columns=tickers)
    return benchmark

abBench = getAlphaBetaBenchmark(
    list(filter(None, indices.split(','))),
    list(filter(None, tickers.split(','))),
    cov, stats)

abBench['SPY']

Unnamed: 0,AAPL,TSLA,MSFT
alpha,0.015872,1.721312,-0.172086
beta,1.385885,1.981545,1.193959
