In [70]:
# Import package dependencies
import pandas as pd  # for dealing with data
import numpy as np
import matplotlib.pyplot as plt  # for plotting
import seaborn as sns  # for making plots look nicer
sns.set()  # implementing Seaborn's style and themes
from scipy.stats import linregress

In [59]:
df = pd.read_csv('/Users/anaghabhole/Documents/Projects/Investment Analysis/beta_data.csv')
df

Unnamed: 0,Date,aapl_price,nasdaq_price
0,5/23/12,17.422821,2850.120117
1,5/24/12,17.262812,2839.379883
2,5/25/12,17.170284,2837.530029
3,5/29/12,17.475040,2870.989990
4,5/30/12,17.685741,2837.360107
...,...,...,...
2511,5/16/22,145.539993,11662.790040
2512,5/17/22,149.240005,11984.519530
2513,5/18/22,140.820007,11418.150390
2514,5/19/22,137.350006,11388.500000


In [60]:
# Set the 'date' column as the index to help ensure dates show up in our plot.
df.set_index('Date', inplace=True)
df.head()

Unnamed: 0_level_0,aapl_price,nasdaq_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
5/23/12,17.422821,2850.120117
5/24/12,17.262812,2839.379883
5/25/12,17.170284,2837.530029
5/29/12,17.47504,2870.98999
5/30/12,17.685741,2837.360107


In [61]:
returns_df = df.pct_change(1)
returns_df.head()

Unnamed: 0_level_0,aapl_price,nasdaq_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
5/23/12,,
5/24/12,-0.009184,-0.003768
5/25/12,-0.00536,-0.000651
5/29/12,0.017749,0.011792
5/30/12,0.012057,-0.011714


In [5]:
returns_df.dropna(inplace = True)
returns_df

Unnamed: 0_level_0,aapl_price,nasdaq_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
5/24/12,-0.009184,-0.003768
5/25/12,-0.005360,-0.000651
5/29/12,0.017749,0.011792
5/30/12,0.012057,-0.011714
5/31/12,-0.002486,-0.003531
...,...,...
5/16/22,-0.010672,-0.012047
5/17/22,0.025423,0.027586
5/18/22,-0.056419,-0.047258
5/19/22,-0.024641,-0.002597


In [68]:
# create function here:
def getBeta(df, stock_price_col, market_price_col):
     # Calculate returns of prices
    returns_df = df.pct_change(1)
    returns_df.dropna(inplace = True)
    
    # calculate deviations:
    deviations = returns_df - returns_df.mean()
    
    # calculate var
    deviations_squared = deviations[market_price_col] ** 2
    sum_squared_deviations = sum(deviations_squared.dropna())
    var_aapl_manual = sum_squared_deviations / (len(deviations_squared.dropna()) - 1)
    
    # product of deviations
    prod_dev = deviations[stock_price_col] * deviations[market_price_col]
    
    # calculate cov:
    cov_aapl_nasdaq = prod_dev.sum() / (len(prod_dev) - 1)
    
    # calculate beta:
    beta = cov_aapl_nasdaq / var_aapl_manual
    
    return var_aapl_manual, cov_aapl_nasdaq, beta
    
    

In [69]:
getBeta(df, 'aapl_price', 'nasdaq_price')

(0.00015251403716210567, 0.00016606664904514131, 1.0888614066955078)

In [66]:
# Calculating beta using numpy:

# Calculating variance:
returns_df.dropna(inplace = True)
var_nasdaq = np.var(returns_df['nasdaq_price'])

# calculating covariance:
cov_aapl_nasdaq = np.cov(returns_df['aapl_price'], returns_df['nasdaq_price'])
cov_aapl_nasdaq

array([[0.0003225 , 0.00016607],
       [0.00016607, 0.00015251]])

In [72]:
# Calculating linregress
linregress(y = returns_df['aapl_price'], x = returns_df['nasdaq_price'])

LinregressResult(slope=1.088861406695505, intercept=0.00030156557836544273, rvalue=0.7487982102913376, pvalue=0.0, stderr=0.019226169985479855, intercept_stderr=0.00023769469041410194)