In [1]:
# Import libraries and dependencies
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline

### Read CSVs in as DataFrames

In [2]:
# Read the daily closing prices of AMZN, set the `date` as a datetime index
amzn_data = Path("../Resources/amzn_data.csv")
amzn_df = pd.read_csv(amzn_data, index_col="date", parse_dates=True, infer_datetime_format=True)

# Read the daily closing prices of S&P 500, set the `date` as a datetime index
sp500_data = Path("../Resources/sp500_data.csv")
sp500_df = pd.read_csv(sp500_data, index_col="date", parse_dates=True, infer_datetime_format=True)
sp500_df.head()

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2014-05-20,1872.829956
2014-05-21,1888.030029
2014-05-22,1892.48999
2014-05-23,1900.530029
2014-05-27,1911.910034


### Combine DataFrames, Sort Index, and Rename Columns

In [3]:
# Create a new dataframe where the columns are the closing prices for each ticker
assets = [amzn_df, sp500_df]
portfolio_df = pd.concat(assets, axis=1, join='inner')
columns = ['AMZN', 'SP500']
portfolio_df.columns = columns
portfolio_df
# Sort datetime index in ascending order (past to present)
portfolio_df.sort_values('date', axis=0, inplace=True)
# Set column names to 'AMZN' and 'S&P 500'

# Display a few rows
portfolio_df

Unnamed: 0_level_0,AMZN,SP500
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-20,301.19,1872.829956
2014-05-21,305.01,1888.030029
2014-05-22,304.91,1892.489990
2014-05-23,312.24,1900.530029
2014-05-27,310.82,1911.910034
...,...,...
2019-05-13,1822.68,2811.870117
2019-05-14,1840.12,2834.409912
2019-05-15,1871.15,2850.959961
2019-05-16,1907.57,2876.320068


### Calculate Daily Returns

In [4]:
# Use the `pct_change` function to calculate daily returns of closing prices for each column
daily_returns = portfolio_df.pct_change().dropna()

### Calculate Covariance of AMZN returns vs. S&P 500 returns

In [5]:
# Calculate covariance of all daily returns of AMZN vs. S&P 500
covariance = daily_returns['AMZN'].cov(daily_returns['SP500'])
covariance

9.344294535060871e-05

### Calculate Variance of S&P 500 returns

In [6]:
# Calculate variance of all daily returns of AMZN vs. S&P 500
variance = daily_returns['SP500'].var()
variance

7.030950113534627e-05

### Beta of AMZN vs Correlation to SP500

In [10]:
# Calculate beta of all daily returns of AMZN
amzn_beta = covariance/variance
amzn_beta

1.3290230173974695

In [15]:
# Calculate the correlation for the daily_returns dataframe using the pearson method.
correlation = daily_returns.corr(method='pearson')

Note that the Beta value and the correlation do **not** match! Beta is a measure of volatity relative to the market. We would conclude that this stock is approximately 33% more volatile than the market (Beta of 1.329). The Pearson Correlation (which is R, not R-squared) is an indication of the extent of the linear relationship between AMZN and the S&P500. 

In [16]:
correlation

Unnamed: 0,AMZN,SP500
AMZN,1.0,0.585244
SP500,0.585244,1.0
