To Do List:-

- Incorporate CAPM Model
- Incorporate more IEX data into analysis
- Visualization in webapp

In [1]:
import pandas as pd
import numpy as np
import math
import scipy.stats as stats
import json
import requests

from functools import reduce

from matplotlib import pyplot as plt

import datetime as dt
from datetime import datetime, date, time, timedelta
from dateutil.relativedelta import relativedelta

from config import api_token

In [2]:
# Web Scrape List of Dow Component Stocks from Yahoo Finance

dow_components_url = 'https://finance.yahoo.com/quote/%5EDJI/components?p=%5EDJI'
dow_components_table = pd.read_html(dow_components_url)[0]

dow_components_table


Unnamed: 0,Symbol,Company Name,Last Price,Change,% Change,Volume
0,PG,The Procter & Gamble Company,122.62,0.04,+0.03%,1792683
1,INTC,Intel Corporation,60.12,0.02,+0.04%,5598061
2,UNH,UnitedHealth Group Incorporated,289.41,-0.13,-0.04%,970926
3,HD,"The Home Depot, Inc.",218.75,-0.18,-0.08%,1603209
4,PFE,Pfizer Inc.,38.9,-0.03,-0.09%,4365351
5,WMT,Walmart Inc.,117.75,-0.14,-0.12%,1792267
6,IBM,International Business Machines Corporation,134.17,-0.17,-0.12%,826713
7,GS,"The Goldman Sachs Group, Inc.",231.29,-0.29,-0.13%,1029966
8,MSFT,Microsoft Corporation,158.41,-0.21,-0.13%,7039615
9,WBA,"Walgreens Boots Alliance, Inc.",59.19,0.11,+0.19%,2539062


In [3]:
# Dow Stocks 1yr Prices and Daily Returns DataFrame

dow_stocks = dow_components_table["Symbol"]
# dow_company = dow_components_table["Company Name"]
market_basket = "DIA"

base_url = "https://sandbox.iexapis.com/stable/stock/"

# Query link for market API call (1 Year Data)
market_url = base_url + market_basket + "/chart/1y?token=" + api_token

    # JSON Response for market data
iex_market_response = requests.get(market_url).json()


# DataFrame for Market Data
market_date = []
market_close = []

for response in iex_market_response:
    market_date.append(response["date"])
    market_close.append(response["close"])

dow_df = pd.DataFrame({"Date": market_date,
                       "DIA": market_close})

dow_df["DIA_log_returns"] = (np.log(dow_df["DIA"]) - np.log(dow_df["DIA"].shift(1)))

dow_df

    

Unnamed: 0,Date,DIA,DIA_log_returns
0,2019-01-04,243.13,
1,2019-01-07,245.76,0.010759
2,2019-01-08,241.25,-0.018522
3,2019-01-09,240.26,-0.004112
4,2019-01-10,240.00,-0.001083
...,...,...,...
247,2019-12-27,289.34,0.000864
248,2019-12-30,284.91,-0.015429
249,2019-12-31,289.00,0.014253
250,2020-01-02,294.88,0.020142


In [4]:
# Function to return Stock 1yr Prices and Daily Returns

def returns(stock):
    
    base_url = "https://sandbox.iexapis.com/stable/stock/"
    
    # Query link for stock API call (1 Year Data)
    stock_url = base_url + stock + "/chart/1y?token=" + api_token   
    
    # JSON Response for stock data
    iex_stock_response = requests.get(stock_url).json()
    
    # DataFrame for Stock Data
    stock_date = []
    stock_close = []
    stock_returns = []

    # Interate through API Reponse
    for response in iex_stock_response:
        stock_date.append(response["date"])
        stock_close.append(response["close"])
        
    # Built Stock data Dataframe
    stock_data = pd.DataFrame({"Date": stock_date,
                           str(stock): stock_close})
    
    # To add column with daily returns
    stock_return_str = str(stock) + "_" + "log_returns"
    
    # Calculate Daily Returns
    stock_data[stock_return_str] = (np.log(stock_data[str(stock)]) - np.log(stock_data[str(stock)].shift(1)))  
    
    # Return Pandas Dataframe 
    return stock_data
    

# Test Function
returns("AAPL")
    


Unnamed: 0,Date,AAPL,AAPL_log_returns
0,2019-01-04,153.77,
1,2019-01-07,150.62,-0.020698
2,2019-01-08,155.66,0.032914
3,2019-01-09,153.83,-0.011826
4,2019-01-10,160.00,0.039326
...,...,...,...
247,2019-12-27,298.50,0.024588
248,2019-12-30,295.97,-0.008512
249,2019-12-31,300.37,0.014757
250,2020-01-02,303.28,0.009641


In [5]:
# Interate through dow component stocks to return pandas dataframes for each component
dataframes = []

for stock in dow_stocks:
    stock_dataframe = returns(stock)
    dataframes.append(stock_dataframe)
    
dataframes
    

[           Date      PG  PG_log_returns
 0    2019-01-04   93.71             NaN
 1    2019-01-07   92.99       -0.007713
 2    2019-01-08   93.38        0.004185
 3    2019-01-09   92.19       -0.012826
 4    2019-01-10   91.47       -0.007841
 ..          ...     ...             ...
 247  2019-12-27  131.83        0.024651
 248  2019-12-30  125.68       -0.047774
 249  2019-12-31  128.90        0.025298
 250  2020-01-02  126.67       -0.017452
 251  2020-01-03  127.23        0.004411
 
 [252 rows x 3 columns],            Date   INTC  INTC_log_returns
 0    2019-01-04  47.39               NaN
 1    2019-01-07  47.64          0.005262
 2    2019-01-08  49.08          0.029779
 3    2019-01-09  49.65          0.011547
 4    2019-01-10  49.46         -0.003834
 ..          ...    ...               ...
 247  2019-12-27  62.80          0.033682
 248  2019-12-30  60.06         -0.044611
 249  2019-12-31  59.87         -0.003169
 250  2020-01-02  61.07          0.019845
 251  2020-01-03  63

In [97]:
########  Merge all Stock Dateframes

stock_dataframes = list(dataframes)
               
stock_returns_dfs = reduce(lambda  left,right: pd.merge(left,right,on=['Date'],
                                            how='outer'), stock_dataframes) 
stock_returns_dfs



Unnamed: 0,Date,PG,PG_log_returns,INTC,INTC_log_returns,UNH,UNH_log_returns,HD,HD_log_returns,PFE,...,BA,BA_log_returns,AXP,AXP_log_returns,JNJ,JNJ_log_returns,MCD,MCD_log_returns,DIS,DIS_log_returns
0,2019-01-04,93.71,,47.39,,246.49,,177.22,,44.00,...,328.79,,100.60,,130.22,,180.58,,114.30,
1,2019-01-07,92.99,-0.007713,47.64,0.005262,246.92,0.001743,181.66,0.024745,44.05,...,339.56,0.032231,98.83,-0.017751,129.34,-0.006781,185.60,0.027420,111.03,-0.029026
2,2019-01-08,93.38,0.004185,49.08,0.029779,247.52,0.002427,186.49,0.026241,44.50,...,343.75,0.012264,99.38,0.005550,130.57,0.009465,185.80,0.001077,115.60,0.040336
3,2019-01-09,92.19,-0.012826,49.65,0.011547,251.69,0.016707,180.48,-0.032758,43.51,...,349.77,0.017361,100.99,0.016071,133.26,0.020393,183.45,-0.012729,113.43,-0.018950
4,2019-01-10,91.47,-0.007841,49.46,-0.003834,246.58,-0.020512,186.49,0.032758,42.47,...,356.41,0.018806,98.91,-0.020811,130.92,-0.017716,185.81,0.012782,113.00,-0.003798
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,2019-12-27,131.83,0.024651,62.80,0.033682,310.75,0.027139,227.03,-0.000396,40.67,...,343.47,0.031949,129.53,-0.006234,146.85,-0.026742,204.54,0.009678,149.65,-0.020176
248,2019-12-30,125.68,-0.047774,60.06,-0.044611,301.50,-0.030219,225.01,-0.008937,39.98,...,330.70,-0.037888,125.60,-0.030810,146.00,-0.005805,203.65,-0.004361,148.24,-0.009467
249,2019-12-31,128.90,0.025298,59.87,-0.003169,294.04,-0.025054,222.78,-0.009960,39.30,...,326.73,-0.012077,127.57,0.015563,152.85,0.045850,207.23,0.017426,145.59,-0.018038
250,2020-01-02,126.67,-0.017452,61.07,0.019845,302.80,0.029357,221.29,-0.006711,39.32,...,343.42,0.049820,126.79,-0.006133,146.73,-0.040863,201.73,-0.026899,153.00,0.049643


In [112]:
# Merge Stocks Dataframe with Market Dataframe
dow_returns = pd.merge(dow_df, stock_returns_dfs, on="Date", how="inner")
dow_returns_df = dow_returns.set_index("Date")
final_dow_df = dow_returns_df.iloc[1:]
final_dow_df

Unnamed: 0_level_0,DIA,DIA_log_returns,PG,PG_log_returns,INTC,INTC_log_returns,UNH,UNH_log_returns,HD,HD_log_returns,...,BA,BA_log_returns,AXP,AXP_log_returns,JNJ,JNJ_log_returns,MCD,MCD_log_returns,DIS,DIS_log_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-07,245.76,0.010759,92.99,-0.007713,47.64,0.005262,246.92,0.001743,181.66,0.024745,...,339.56,0.032231,98.83,-0.017751,129.34,-0.006781,185.60,0.027420,111.03,-0.029026
2019-01-08,241.25,-0.018522,93.38,0.004185,49.08,0.029779,247.52,0.002427,186.49,0.026241,...,343.75,0.012264,99.38,0.005550,130.57,0.009465,185.80,0.001077,115.60,0.040336
2019-01-09,240.26,-0.004112,92.19,-0.012826,49.65,0.011547,251.69,0.016707,180.48,-0.032758,...,349.77,0.017361,100.99,0.016071,133.26,0.020393,183.45,-0.012729,113.43,-0.018950
2019-01-10,240.00,-0.001083,91.47,-0.007841,49.46,-0.003834,246.58,-0.020512,186.49,0.032758,...,356.41,0.018806,98.91,-0.020811,130.92,-0.017716,185.81,0.012782,113.00,-0.003798
2019-01-11,241.27,0.005278,93.72,0.024301,51.29,0.036332,260.03,0.053111,181.75,-0.025746,...,365.10,0.024090,103.46,0.044975,130.19,-0.005592,188.94,0.016705,118.24,0.045329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-27,289.34,0.000864,131.83,0.024651,62.80,0.033682,310.75,0.027139,227.03,-0.000396,...,343.47,0.031949,129.53,-0.006234,146.85,-0.026742,204.54,0.009678,149.65,-0.020176
2019-12-30,284.91,-0.015429,125.68,-0.047774,60.06,-0.044611,301.50,-0.030219,225.01,-0.008937,...,330.70,-0.037888,125.60,-0.030810,146.00,-0.005805,203.65,-0.004361,148.24,-0.009467
2019-12-31,289.00,0.014253,128.90,0.025298,59.87,-0.003169,294.04,-0.025054,222.78,-0.009960,...,326.73,-0.012077,127.57,0.015563,152.85,0.045850,207.23,0.017426,145.59,-0.018038
2020-01-02,294.88,0.020142,126.67,-0.017452,61.07,0.019845,302.80,0.029357,221.29,-0.006711,...,343.42,0.049820,126.79,-0.006133,146.73,-0.040863,201.73,-0.026899,153.00,0.049643


In [99]:
symbol_list = ["DIA"]

for stock in dow_stocks:
    symbol_list.append(stock)

# Built Statistics Dataframe
stat_data = pd.DataFrame({"Stock": symbol_list,
                          "Mean_of_Daily_Returns": ""})

stat_data.head()



Unnamed: 0,Stock,Mean_of_Daily_Returns
0,DIA,
1,PG,
2,INTC,
3,UNH,
4,HD,


In [100]:
# Number of trading days in sample
trading_days = len(iex_market_response)
trading_days

252

In [113]:
# Iterating and Printing Column Values

for columnName, columnData in final_dow_df.iteritems():
   print('Column Name : ', columnName)
   #print('Column Contents : ', columnData.values) 

Column Name :  DIA
Column Name :  DIA_log_returns
Column Name :  PG
Column Name :  PG_log_returns
Column Name :  INTC
Column Name :  INTC_log_returns
Column Name :  UNH
Column Name :  UNH_log_returns
Column Name :  HD
Column Name :  HD_log_returns
Column Name :  PFE
Column Name :  PFE_log_returns
Column Name :  WMT
Column Name :  WMT_log_returns
Column Name :  IBM
Column Name :  IBM_log_returns
Column Name :  GS
Column Name :  GS_log_returns
Column Name :  MSFT
Column Name :  MSFT_log_returns
Column Name :  WBA
Column Name :  WBA_log_returns
Column Name :  CVX
Column Name :  CVX_log_returns
Column Name :  KO
Column Name :  KO_log_returns
Column Name :  CSCO
Column Name :  CSCO_log_returns
Column Name :  VZ
Column Name :  VZ_log_returns
Column Name :  MRK
Column Name :  MRK_log_returns
Column Name :  MMM
Column Name :  MMM_log_returns
Column Name :  UTX
Column Name :  UTX_log_returns
Column Name :  TRV
Column Name :  TRV_log_returns
Column Name :  CAT
Column Name :  CAT_log_returns
Colu

In [114]:
# Overall Market List of Daily Returns and Market Variance variable

market_returns = final_dow_df["DIA_log_returns"]

market_variance = final_dow_df["DIA_log_returns"].var()

print(market_variance)


0.00042728237122261875


In [115]:
# Iterating Columns for statistical data

means = []
variances = []
standard_deviations = []
annual_sds = []
covariances = []
betas = []
coefficients = []
systematic_risks = []
unsystematic_risks = []

for columnName, columnData in final_dow_df.iteritems():

    if columnName in list(symbol_list):
        pass
    else: 
        # Calculate Stock Returns Mean
        mean = columnData.mean()
        means.append(mean)
        
        # Calculate Stock Returns Variance
        var = columnData.var()
        variances.append(var)
        
        # Calculate Stock Returns Standard Deviations
        sd = math.sqrt(var)
        standard_deviations.append(sd) 
        
        # Calculate Annualized Standard Deviations
        sd_annual = sd * math.sqrt(trading_days)
        annual_sds.append(sd_annual)
        
        # Calculate Covariance
        covars = np.cov(columnData, market_returns, ddof=1)[0][1]
        covariances.append(covars)
        
        # Beta = Covariance / Market Variance 
        beta = covars / market_variance
        betas.append(beta)
        
        #Calculate Correlation Coefficients
        corr_coef = np.corrcoef(columnData, market_returns, ddof=1)[0][1]
        coefficients.append(corr_coef)
        
        # Total Risk
        total_risk = var
        
        # Calculate Systematic Risk
        systematic = ((np.square(beta) * market_variance) / total_risk) * 100
        systematic_risks.append(systematic)
        
        # Calculate Systematic Risk
        unsystematic = 100 - systematic
        unsystematic_risks.append(unsystematic)
        
        
        
# print(means)
# print(variances)
# print(standard_deviations)
# print(annual_sds)    
print(covariances)  

[0.00042728237122261885, -6.984947650602694e-06, 7.806156745637727e-05, 9.589510084568471e-05, 5.713459560405104e-05, 0.00011199155022501633, -9.175099313952683e-06, 1.3523339463379062e-05, 5.760943641417976e-05, 7.586405744094636e-05, 3.487169232090925e-05, 4.1942611227797874e-05, 2.278901991675222e-05, 8.575782166786134e-05, 1.4988642035022035e-05, 3.436299328597265e-05, 9.957433978990939e-05, 4.026370189426074e-05, 6.304443811870611e-05, 7.780267900677832e-05, 2.9864051788858493e-06, -5.69088440544536e-06, 3.2270632980836895e-05, nan, -5.233065666864509e-07, -5.058389621918774e-05, 9.579370992279871e-05, 3.84447932944663e-05, -2.0385804981597878e-05, -1.281127987903656e-05, -2.835484423250312e-05]




In [None]:
# # Total Risk
# total_risk = stock_var
# print("Total Risk: " + str(total_risk))

# # Systematic Risk (Proportion of total risk explained by market)
# systematic_risk = ((np.square(beta) * market_var) / total_risk) * 100
# print("Systematic Risk (%): " + str(systematic_risk))

# # Unsystematic Risk (Proportion of total risk unexplained by market)
# unsystematic_risk = 100 - systematic_risk
# print("Unsystematic Risk (%): " + str(unsystematic_risk))

In [116]:
stat_data["Mean_of_Daily_Returns"] = means
stat_data["Variance_of_Daily_Returns"] = variances
stat_data["Standard_Deviation_of_Daily_Returns"] = standard_deviations
stat_data["Annual_Standard_Deviation"] = annual_sds
stat_data["Covariance"] = covariances
stat_data["Beta"] = betas
stat_data["Correlation_Coefficient"] = coefficients

stat_data["Systematic_Risk"] = systematic_risks
stat_data["Unsystematic_Risk"] = unsystematic_risks

# DataFrame for Dow Components statistical data 
stat_data

# Note that the statistics for symbol "DOW" have Null Values because data for the entire year cannot be pulled from IEX


Unnamed: 0,Stock,Mean_of_Daily_Returns,Variance_of_Daily_Returns,Standard_Deviation_of_Daily_Returns,Annual_Standard_Deviation,Covariance,Beta,Correlation_Coefficient,Systematic_Risk,Unsystematic_Risk
0,DIA,0.000832,0.000427,0.020671,0.328139,0.0004272824,1.0,1.0,100.0,-4.263256e-14
1,PG,0.001218,0.00042,0.020491,0.325289,-6.984948e-06,-0.016347,-0.016491,0.027194,99.97281
2,INTC,0.001134,0.000781,0.027944,0.4436,7.806157e-05,0.182693,0.135141,1.826317,98.17368
3,UNH,0.000822,0.000648,0.025454,0.404062,9.58951e-05,0.22443,0.18226,3.321865,96.67814
4,HD,0.000971,0.000613,0.024759,0.393033,5.71346e-05,0.133716,0.111638,1.246307,98.75369
5,PFE,-0.000411,0.000576,0.02401,0.381144,0.0001119916,0.262102,0.225652,5.091878,94.90812
6,WMT,0.000891,0.000482,0.021947,0.348401,-9.175099e-06,-0.021473,-0.020224,0.040902,99.9591
7,IBM,0.000579,0.000515,0.022691,0.360208,1.352334e-05,0.03165,0.028832,0.083128,99.91687
8,GS,0.001096,0.00062,0.024901,0.39529,5.760944e-05,0.134828,0.111923,1.252685,98.74731
9,MSFT,0.001932,0.000537,0.023175,0.367897,7.586406e-05,0.17755,0.158363,2.50787,97.49213


In [None]:
# # Convert DataFrame into Dictionary
# returns_dict = returns_df.to_dict(orient="records")
# returns_dict

In [None]:
# Use Matplotlib to create scatter chart
x_values = returns_df["Market_log_returns"]
y_values = returns_df["Stock_log_returns"]

# Perform a linear regression 
vc_slope, vc_int, vc_r, vc_p, vc_std_err = stats.linregress(x_values, y_values)

# Create equation of line to calculate predicted violent crime rate
vc_fit = vc_slope * x_values + vc_int

In [None]:
# Create scatter chart
plt.scatter(x_values,y_values)
plt.plot(x_values,vc_fit,"--", color="red")
# plt.xticks(x_values, rotation=90)
plt.xlabel('Market Returns')
plt.ylabel('Stock Returns')
plt.show()

In [None]:
# Calculate Stock Returns Mean
stock_mean = returns_df["Stock_log_returns"].mean()
print("Stock Daily Returns Mean: " + str(stock_mean))

In [None]:
# Calculate Market returns Mean
market_mean = returns_df["Market_log_returns"].mean()
print("Market Daily Returns Mean: " + str(market_mean))

In [None]:
# Number of trading days in sample
trading_days = len(iex_market_response)
trading_days

In [None]:
# Individual Stock (CSCO) Variance
stock_var = returns_df["Stock_log_returns"].var()
print("Stock Variance of Daily Returns: " + str(stock_var))

# Individual Stock (CSCO) Standard Deviation
stock_sd = math.sqrt(stock_var)
print("Stock Standard Deviation of Daily Returns: " + str(stock_sd))

stock_sd_annual = stock_sd * math.sqrt(trading_days)
print("Annual Stock Standard Deviation: " + str(stock_sd_annual))

In [None]:
# Market (SP500) Variance
market_var = returns_df["Market_log_returns"].var()
print("Market Variance of Daily Returns: " + str(market_var))

# Market (SP500) Standard Deviation
market_sd = math.sqrt(market_var)
print("Market Standard Deviation of Daily Returns: " + str(market_sd))

market_sd_annual = market_sd * math.sqrt(trading_days)
print("Annual Market Standard Deviation: " + str(market_sd_annual))

In [None]:
# Covariance
covariance = np.cov(returns_df["Stock_log_returns"], returns_df["Market_log_returns"], ddof=1)[0][1]
print("Covariance: " + str(covariance))

# Another Formula to calculate it another way and show in a DataFrame
#returns_df[["Stock_log_returns", "Market_log_returns"]].cov()

In [None]:
# Beta = Covariance / Market Variance 
beta = covariance / market_var
print("Beta: " + str(beta))

In [None]:
# Correlation Coefficient
correlation_coefficient = np.corrcoef(returns_df["Stock_log_returns"], returns_df["Market_log_returns"], ddof=1)[0][1]
correlation_coefficient

# returns_df[["Stock_log_returns", "Market_log_returns"]].corr()

In [None]:
# Total Risk
total_risk = stock_var
print("Total Risk: " + str(total_risk))

# Systematic Risk (Proportion of total risk explained by market)
systematic_risk = ((np.square(beta) * market_var) / total_risk) * 100
print("Systematic Risk (%): " + str(systematic_risk))

# Unsystematic Risk (Proportion of total risk unexplained by market)
unsystematic_risk = 100 - systematic_risk
print("Unsystematic Risk (%): " + str(unsystematic_risk))