In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date

from pandas_datareader import data
from scipy.stats import norm

%matplotlib inline

In [2]:
def cumm_prod(price_df, start=None, end=None):
    """
    Calculating the cummulative product of the percentage change to normalize the return and be
    able to compare the stocks. 

    """
    if start == None and end == None:
        return_series = (price_df.pct_change()+1).cumprod() - 1
        return_series.dropna(inplace=True)
    else:
        if end == None:
            end = date.today()
        return_series = (price_df.loc[start:end].pct_change()+1).cumprod() - 1
        return_series.dropna(inplace=True)
        
    
    return return_series

def annualized_return(adjClose_return_df, n_years=None):
    """
    n_years needs to represent the period of the adjClose_return_df.
        If it's 1 year worth of cummulative return, you input 1 for the 1y annualized returns.
        If it's 3 years worth of cummulative return, you input 3 for the long period annualized returns.

        (1+total return)pow(1/N) - 1) where N = number of years
    """
    if n_years == None or n_years == 0:
        print("Input How many years does your data covers!")
    else:
        annualized_return = ((1 + adjClose_return_df.tail(1))**(1/n_years)-1) * 100
        return annualized_return

def annualized_historical_volatility(closePrice_df, start=None, end=None, n_days=None):
    """
    start (str): To calculate HV from a start date to today
    end (str): requires start, to calculate HV for a specific period
    n_days (int): To calculate HV for the past N days

    """

    if n_days != None and n_days != 0:
        get_n_data = closePrice_df.tail(n_days)
        hv = np.sqrt(np.log(get_n_data / get_n_data.shift(1)).var()) * np.sqrt(252)
    elif start != None:
        if end == None:
            end = date.today()
            get_n_data = closePrice_df.loc[start:end]
            hv = np.sqrt(np.log(get_n_data / get_n_data.shift(1)).var()) * np.sqrt(252)
        else:
            get_n_data = closePrice_df.loc[start:end]
            hv = np.sqrt(np.log(get_n_data / get_n_data.shift(1)).var()) * np.sqrt(252)       
    else:
        # Get HV for the whole data available
         hv = np.sqrt(np.log(closePrice_df / closePrice_df.shift(1)).var()) * np.sqrt(252)
    
    return hv

def rolling_historical_volatility(closePrice_df, start=None, end=None, window=None):

    get_data = closePrice_df.loc[start:end]
    cchv = np.sqrt(252) * pd.DataFrame.rolling(np.log(get_data / get_data.shift(1)),window=window).std()

    return cchv

In [3]:
close_panel = pd.read_csv('data_collection_cleaning/prices_collection_cleaning/Cleaned_closeP_5y.csv', index_col='Date')
AdjClose_panel = pd.read_csv('data_collection_cleaning/prices_collection_cleaning/Cleaned_AdjCloseP_5y.csv', index_col='Date')
stock_full_Data = pd.read_csv('Final_stock_data.csv')

In [4]:
stock_full_Data.head()

Unnamed: 0,ticker,name,sector,Earnings_expectation,irv_status,capm_status,esg_status,current_price,annu_return_10y,market_cap,marketcap_cat,beta,eps_ttm,growth_estimate_5y,pe_forward,pe_trailing,irv_FairValue,ESG_risk,capm_expected_return
0,AAP,Advance Auto Parts Inc.,Consumer Cyclical,Increase,Above Fair Price,UnderValued,Sustainable,214.15,0.103398,13084050000.0,Large,1.25,9.55,0.1525,13.932986,22.424082,109.188247,12.97,0.171149
1,ABT,Abbott Laboratories,Healthcare,Increase,Above Fair Price,OverValued,Average,118.29,0.181179,209171000000.0,Large,0.74,3.94,0.1318,22.92442,30.022842,51.234756,25.96,0.109154
2,ACN,Accenture plc,Technology,Increase,Above Fair Price,OverValued,Sustainable,323.905,0.208836,204708300000.0,Large,1.21,9.613,0.1135,31.025385,33.694477,121.148794,9.45,0.166287
3,ADM,Archer-Daniels-Midland Company,Consumer Defensive,Increase,Above Fair Price,OverValued,Not Sustainable,76.985,0.122815,43278430000.0,Large,0.81,4.79,0.066,14.804809,16.072025,19.449588,36.42,0.117663
4,ADP,"Automatic Data Processing, Inc.",Industrials,Increase,Above Fair Price,OverValued,Sustainable,201.01,0.182708,84433240000.0,Large,0.83,6.46,0.1371,26.48353,31.116098,90.801919,14.18,0.120094


In [5]:
close_panel.head()

Unnamed: 0_level_0,AAP,ABT,ACN,ADM,ADP,ADSK,AEE,AEP,AFL,AJG,...,WRLD,WTFC,WTRH,WW,WWD,XOMA,XRX,ZD,ZUMZ,ESNT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-02-03,162.75,42.779999,114.489998,43.990002,96.870003,84.410004,52.689999,64.029999,34.305,54.009998,...,49.310001,72.650002,9.901,12.29,70.330002,4.45,28.6,73.565216,21.15,35.91
2017-02-06,160.270004,42.43,114.18,44.02,96.470001,82.82,52.650002,63.82,34.465,54.02,...,49.139999,71.860001,9.9,12.35,70.010002,4.3,28.280001,73.400002,20.450001,35.720001
2017-02-07,159.179993,42.889999,115.919998,44.59,96.290001,84.5,52.68,63.93,34.485001,53.93,...,49.84,71.599998,9.9,12.31,69.730003,4.21,28.280001,73.886955,20.1,36.040001
2017-02-08,162.729996,42.400002,115.43,43.459999,96.580002,82.93,53.119999,63.799999,34.82,54.060001,...,50.43,71.160004,9.9,12.28,69.400002,4.15,29.440001,73.034782,19.950001,36.290001
2017-02-09,164.830002,42.540001,116.980003,43.540001,97.650002,83.879997,52.900002,63.419998,34.950001,54.709999,...,51.259998,72.419998,10.0,12.35,70.389999,4.1,29.040001,74.417389,20.65,36.240002


In [6]:
AdjClose_panel.head()

Unnamed: 0_level_0,AAP,ABT,ACN,ADM,ADP,ADSK,AEE,AEP,AFL,AJG,...,WRLD,WTFC,WTRH,WW,WWD,XOMA,XRX,ZD,ZUMZ,ESNT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-02-03,157.890411,39.270012,105.562996,37.597412,87.912636,84.410004,45.868515,53.538227,30.290247,48.557995,...,49.310001,68.225578,9.901,12.29,68.32328,4.45,23.375423,73.565216,21.15,34.572044
2017-02-06,155.484421,38.948734,105.277184,37.623055,87.549629,82.82,45.833706,53.36264,30.431515,48.566975,...,49.139999,67.483673,9.9,12.35,68.012421,4.3,23.113874,73.400002,20.450001,34.389118
2017-02-07,154.426956,39.370998,106.8815,38.110218,87.386269,84.5,45.859814,53.454613,30.449175,48.486065,...,49.84,67.370773,9.9,12.31,67.74041,4.21,23.113874,73.886955,20.1,34.697193
2017-02-08,157.870956,38.921196,106.429733,37.144428,87.64946,82.93,46.24284,53.842827,30.744982,48.602943,...,50.43,66.956764,9.9,12.28,67.419815,4.15,24.06197,73.034782,19.950001,34.937889
2017-02-09,159.908234,39.049706,107.858849,37.212811,88.620506,83.879997,46.051334,53.522125,30.85976,49.187328,...,51.259998,68.142334,10.0,12.35,68.381561,4.1,23.735037,74.417389,20.65,34.889748


# --------- Volatility Calculation ---------

In [7]:
ahv_5y = annualized_historical_volatility(close_panel)

In [8]:
ahv_5y_DF = pd.DataFrame(columns=['ticker', 'AHV_5y'])

In [9]:
ahv_5y_DF['ticker'] = ahv_5y.index
ahv_5y_DF['AHV_5y'] = ahv_5y.values

In [10]:
ahv_5y_DF

Unnamed: 0,ticker,AHV_5y
0,AAP,0.355467
1,ABT,0.249769
2,ACN,0.254874
3,ADM,0.252289
4,ADP,0.272415
...,...,...
874,XOMA,0.689078
875,XRX,0.405374
876,ZD,0.308007
877,ZUMZ,0.513665


In [13]:
stock_full_Data = stock_full_Data.merge(ahv_5y_DF, how='left', on='ticker')

In [16]:
stock_full_Data

Unnamed: 0,ticker,name,sector,Earnings_expectation,irv_status,capm_status,esg_status,current_price,annu_return_10y,market_cap,marketcap_cat,beta,eps_ttm,growth_estimate_5y,pe_forward,pe_trailing,irv_FairValue,ESG_risk,capm_expected_return,AHV_5y
0,AAP,Advance Auto Parts Inc.,Consumer Cyclical,Increase,Above Fair Price,UnderValued,Sustainable,214.1500,0.103398,1.308405e+10,Large,1.25,9.550,0.1525,13.932986,22.424082,109.188247,12.97,0.171149,0.355467
1,ABT,Abbott Laboratories,Healthcare,Increase,Above Fair Price,OverValued,Average,118.2900,0.181179,2.091710e+11,Large,0.74,3.940,0.1318,22.924420,30.022842,51.234756,25.96,0.109154,0.249769
2,ACN,Accenture plc,Technology,Increase,Above Fair Price,OverValued,Sustainable,323.9050,0.208836,2.047083e+11,Large,1.21,9.613,0.1135,31.025385,33.694477,121.148794,9.45,0.166287,0.254874
3,ADM,Archer-Daniels-Midland Company,Consumer Defensive,Increase,Above Fair Price,OverValued,Not Sustainable,76.9850,0.122815,4.327843e+10,Large,0.81,4.790,0.0660,14.804809,16.072025,19.449588,36.42,0.117663,0.252289
4,ADP,"Automatic Data Processing, Inc.",Industrials,Increase,Above Fair Price,OverValued,Sustainable,201.0100,0.182708,8.443324e+10,Large,0.83,6.460,0.1371,26.483530,31.116098,90.801919,14.18,0.120094,0.272415
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,XRX,Xerox Holdings Corporation,Technology,Increase,Above Fair Price,UnderValued,No Data,20.6059,0.029031,3.678401e+09,Small,1.75,1.459,-0.1600,10.302950,14.123303,0.609810,999.00,0.231929,0.405374
934,ZD,"Ziff Davis, Inc.",Communication Services,Increase,Above Fair Price,OverValued,No Data,102.4800,0.145864,4.941668e+09,Small,1.01,4.120,0.1140,14.193907,24.873789,38.485339,999.00,0.141975,0.308007
935,ZUMZ,Zumiez Inc.,Consumer Cyclical,Decrease,Above Fair Price,UnderValued,No Data,42.6600,0.035214,9.777245e+08,Small,1.68,4.870,0.1500,9.233767,8.759754,21.330001,999.00,0.223419,0.513665
936,ESNT,Essent Group Ltd.,Financial Services,Increase,Above Fair Price,UnderValued,No Data,44.4800,0.082041,4.912015e+09,Small,1.33,5.569,0.1555,7.339934,7.987071,23.215807,999.00,0.180874,0.502764


In [18]:
# stock_full_Data.to_csv('Final_stock_data.csv', index=False)

# ------- Correlation Matrix -------

In [23]:
corr_matrix = cumm_prod(close_panel).corr().round(2)

In [24]:
corr_matrix

Unnamed: 0,AAP,ABT,ACN,ADM,ADP,ADSK,AEE,AEP,AFL,AJG,...,WRLD,WTFC,WTRH,WW,WWD,XOMA,XRX,ZD,ZUMZ,ESNT
AAP,1.00,0.76,0.82,0.81,0.85,0.70,0.70,0.45,0.68,0.82,...,0.79,0.26,-0.47,-0.38,0.67,0.19,-0.32,0.77,0.80,0.41
ABT,0.76,1.00,0.95,0.71,0.90,0.95,0.91,0.66,0.56,0.96,...,0.71,-0.10,-0.78,-0.44,0.73,0.59,-0.52,0.69,0.89,0.36
ACN,0.82,0.95,1.00,0.83,0.93,0.91,0.84,0.56,0.64,0.99,...,0.82,0.13,-0.73,-0.38,0.72,0.54,-0.47,0.81,0.91,0.41
ADM,0.81,0.71,0.83,1.00,0.74,0.70,0.56,0.23,0.64,0.82,...,0.77,0.49,-0.40,-0.17,0.59,0.39,-0.34,0.82,0.82,0.36
ADP,0.85,0.90,0.93,0.74,1.00,0.83,0.89,0.70,0.79,0.93,...,0.84,0.16,-0.72,-0.34,0.84,0.49,-0.26,0.81,0.89,0.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XOMA,0.19,0.59,0.54,0.39,0.49,0.65,0.43,0.30,0.35,0.54,...,0.32,0.07,-0.47,0.11,0.56,1.00,-0.19,0.44,0.65,0.40
XRX,-0.32,-0.52,-0.47,-0.34,-0.26,-0.54,-0.35,-0.01,0.22,-0.47,...,-0.18,0.34,0.32,0.26,0.10,-0.19,1.00,-0.14,-0.32,0.47
ZD,0.77,0.69,0.81,0.82,0.81,0.70,0.59,0.38,0.74,0.81,...,0.79,0.40,-0.55,-0.26,0.74,0.44,-0.14,1.00,0.83,0.50
ZUMZ,0.80,0.89,0.91,0.82,0.89,0.88,0.77,0.53,0.69,0.91,...,0.75,0.20,-0.68,-0.25,0.82,0.65,-0.32,0.83,1.00,0.52


In [26]:
corr_matrix.to_csv('Correlation_matrix.csv')

# ----------

In [33]:
adjClose_return = cumm_prod(AdjClose_panel)

In [64]:
returns_5y = (annualized_return(adjClose_return, 5)/100).transpose()
returns_5y

Date,2022-02-24
AAP,0.049267
ABT,0.247649
ACN,0.245322
ADM,0.150693
ADP,0.178939
...,...
XOMA,0.371572
XRX,-0.032851
ZD,0.055993
ZUMZ,0.157523


In [65]:
returns_5y = returns_5y.reset_index(drop=False)
returns_5y

Date,index,2022-02-24
0,AAP,0.049267
1,ABT,0.247649
2,ACN,0.245322
3,ADM,0.150693
4,ADP,0.178939
...,...,...
874,XOMA,0.371572
875,XRX,-0.032851
876,ZD,0.055993
877,ZUMZ,0.157523


In [71]:
returns_5y.rename(columns={'2022-02-24':'annu_return_5y', 'index':'ticker'}, inplace=True)

In [72]:
stock_full_Data = pd.read_csv('Final_stock_data.csv')

In [73]:
stock_full_Data.head()

Unnamed: 0,ticker,name,sector,Earnings_expectation,irv_status,capm_status,esg_status,current_price,annu_return_10y,market_cap,marketcap_cat,beta,eps_ttm,growth_estimate_5y,pe_forward,pe_trailing,irv_FairValue,ESG_risk,capm_expected_return,AHV_5y
0,AAP,Advance Auto Parts Inc.,Consumer Cyclical,Increase,Above Fair Price,UnderValued,Sustainable,214.15,0.103398,13084050000.0,Large,1.25,9.55,0.1525,13.932986,22.424082,109.188247,12.97,0.171149,0.355467
1,ABT,Abbott Laboratories,Healthcare,Increase,Above Fair Price,OverValued,Average,118.29,0.181179,209171000000.0,Large,0.74,3.94,0.1318,22.92442,30.022842,51.234756,25.96,0.109154,0.249769
2,ACN,Accenture plc,Technology,Increase,Above Fair Price,OverValued,Sustainable,323.905,0.208836,204708300000.0,Large,1.21,9.613,0.1135,31.025385,33.694477,121.148794,9.45,0.166287,0.254874
3,ADM,Archer-Daniels-Midland Company,Consumer Defensive,Increase,Above Fair Price,OverValued,Not Sustainable,76.985,0.122815,43278430000.0,Large,0.81,4.79,0.066,14.804809,16.072025,19.449588,36.42,0.117663,0.252289
4,ADP,"Automatic Data Processing, Inc.",Industrials,Increase,Above Fair Price,OverValued,Sustainable,201.01,0.182708,84433240000.0,Large,0.83,6.46,0.1371,26.48353,31.116098,90.801919,14.18,0.120094,0.272415


In [74]:
stock_full_Data = stock_full_Data.merge(returns_5y, how='left', on='ticker')

In [75]:
stock_full_Data

Unnamed: 0,ticker,name,sector,Earnings_expectation,irv_status,capm_status,esg_status,current_price,annu_return_10y,market_cap,...,beta,eps_ttm,growth_estimate_5y,pe_forward,pe_trailing,irv_FairValue,ESG_risk,capm_expected_return,AHV_5y,annu_return_5y
0,AAP,Advance Auto Parts Inc.,Consumer Cyclical,Increase,Above Fair Price,UnderValued,Sustainable,214.1500,0.103398,1.308405e+10,...,1.25,9.550,0.1525,13.932986,22.424082,109.188247,12.97,0.171149,0.355467,0.049267
1,ABT,Abbott Laboratories,Healthcare,Increase,Above Fair Price,OverValued,Average,118.2900,0.181179,2.091710e+11,...,0.74,3.940,0.1318,22.924420,30.022842,51.234756,25.96,0.109154,0.249769,0.247649
2,ACN,Accenture plc,Technology,Increase,Above Fair Price,OverValued,Sustainable,323.9050,0.208836,2.047083e+11,...,1.21,9.613,0.1135,31.025385,33.694477,121.148794,9.45,0.166287,0.254874,0.245322
3,ADM,Archer-Daniels-Midland Company,Consumer Defensive,Increase,Above Fair Price,OverValued,Not Sustainable,76.9850,0.122815,4.327843e+10,...,0.81,4.790,0.0660,14.804809,16.072025,19.449588,36.42,0.117663,0.252289,0.150693
4,ADP,"Automatic Data Processing, Inc.",Industrials,Increase,Above Fair Price,OverValued,Sustainable,201.0100,0.182708,8.443324e+10,...,0.83,6.460,0.1371,26.483530,31.116098,90.801919,14.18,0.120094,0.272415,0.178939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,XRX,Xerox Holdings Corporation,Technology,Increase,Above Fair Price,UnderValued,No Data,20.6059,0.029031,3.678401e+09,...,1.75,1.459,-0.1600,10.302950,14.123303,0.609810,999.00,0.231929,0.405374,-0.032851
934,ZD,"Ziff Davis, Inc.",Communication Services,Increase,Above Fair Price,OverValued,No Data,102.4800,0.145864,4.941668e+09,...,1.01,4.120,0.1140,14.193907,24.873789,38.485339,999.00,0.141975,0.308007,0.055993
935,ZUMZ,Zumiez Inc.,Consumer Cyclical,Decrease,Above Fair Price,UnderValued,No Data,42.6600,0.035214,9.777245e+08,...,1.68,4.870,0.1500,9.233767,8.759754,21.330001,999.00,0.223419,0.513665,0.157523
936,ESNT,Essent Group Ltd.,Financial Services,Increase,Above Fair Price,UnderValued,No Data,44.4800,0.082041,4.912015e+09,...,1.33,5.569,0.1555,7.339934,7.987071,23.215807,999.00,0.180874,0.502764,0.045034


In [76]:
# stock_full_Data.to_csv('Final_stock_data_V2.csv', index=False)