In [1]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
from linearmodels import PanelOLS
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from yellowbrick.datasets import load_concrete
from yellowbrick.regressor import ResidualsPlot
from scipy import stats

In [2]:
joint = pd.read_excel('joint_snap.xlsx')
table_org = pd.read_excel('FinalVersion.xlsx')

In [3]:
#convert the month column into correct form
joint['Month'] = pd.to_datetime(joint['Month'])
age = (joint['Month'] - joint['Inception Date'])/np.timedelta64(1,'M')
age = [int(i) for i in age]
joint['Age'] = age

#relative_flow is the size divided by flow value
flow = joint["Fund Flow"]
size = joint['Fund Size']
relative_flow = flow/size
joint['relative_flow'] = relative_flow

joint_new = joint.rename(columns={"Fund Sustainability": "esg_score"})

In [4]:
#build a new column, ESG_Score(t-1)
esg_ex = joint_new['esg_score']

esg_mod = [np.nan]*len(esg_ex)

for i in range(3276):
    for j in range(0,91):
         esg_mod[92*i+j+1] = esg_ex[92*i+j]
            
joint_new['esg_mod'] = esg_mod

In [5]:
#build a new column, in which the relative_flow is the value of last month
capm_org = joint_new['performance_Three_Factor']
capm_nxt = [np.nan]*len(capm_org)

verschiebung = 1

for i in range(3276):
    for j in range(verschiebung,92):
         capm_nxt[92*i+j-verschiebung] = capm_org[92*i+j]
            
joint_new['capm_Performance_mod'] = capm_nxt

In [None]:
#Calculate the volatility of fund return each 12 months

def calc_Volatility(lst):
    
    mean_value = np.mean(lst)
    square_variance = (lst - mean_value)**2
    variance = sum(square_variance)/(len(lst)-1)
    volatility = np.sqrt(variance)
    
    return volatility  

fund_return = table_org['Fund Return'].tolist()
volatility = []

for i in range(3276):
    for j in range(8):
        
        array = []
        lst = fund_return[i*92+j*12:i*92+j*12+11]
        for x in range(len(lst)):
            if lst[x] != 0:
                array.append(lst[x])
                
        if len(array) > 1:
            vol = calc_Volatility(array)
        elif len(array) == 1:
            vol = 0.0
        else:
            vol = np.nan
            
        volatility.append(vol)

In [6]:
#joint_new[joint_new['Month']-pd.to_datetime(2015-10-12)>0]
diff = (joint['Month'] - pd.Timestamp(2015,10,12))/np.timedelta64(1,'M')
joint_new['date_diff'] = diff

In [7]:
front_month = joint_new[joint_new['date_diff']<0]
behind_month = joint_new[joint_new['date_diff']>0]

In [10]:
#catagorize the month - 2012/01 to 2019/08, totoally 92 months
month_front = pd.Categorical(front_month.Month)
month_behind = pd.Categorical(behind_month.Month)

#use the 'Month' to divide the dataset, the index is Fund ID
df_front = front_month.set_index(['FundId', 'Month'])
df_front['Month'] = month_front

df_behind = behind_month.set_index(['FundId', 'Month'])
df_behind['Month'] = month_behind

In [None]:
#capm Performance summary
exog_vars = ['esg_score','esg_mod']
exog = sm.add_constant(df[exog_vars])

mod = PanelOLS(df.capm_Performance_mod, exog, entity_effects=True, time_effects=True)

res = mod.fit()

In [None]:
print(res)

In [13]:
#robustness test
exog_vars_front = ['esg_score','esg_mod', 'size_log','relative_flow', 'Age','performance_Fou_Factor']
exog_front = sm.add_constant(df_front[exog_vars_front])

mod_front = PanelOLS(df_front.capm_Performance_mod, exog_front, entity_effects=True, time_effects=True)

res_front = mod_front.fit()


exog_vars_behind = ['esg_score','esg_mod', 'size_log','relative_flow', 'Age','performance_Three_Factor']
exog_behind = sm.add_constant(df_behind[exog_vars_behind])

mod_behind = PanelOLS(df_behind.capm_Performance_mod, exog_behind, entity_effects=True, time_effects=True)

res_behind = mod_behind.fit()

  return ptp(axis=axis, out=out, **kwargs)
Inputs contain missing values. Dropping rows with missing observations.


In [14]:
print(res_front)

                           PanelOLS Estimation Summary                            
Dep. Variable:     capm_Performance_mod   R-squared:                        0.0052
Estimator:                     PanelOLS   R-squared (Between):             -0.3747
No. Observations:                 51326   R-squared (Within):              -0.0163
Date:                  Wed, Apr 08 2020   R-squared (Overall):             -0.0484
Time:                          21:32:15   Log-likelihood                -1.118e+05
Cov. Estimator:              Unadjusted                                           
                                          F-statistic:                      42.838
Entities:                          1823   P-value                           0.0000
Avg Obs:                         28.155   Distribution:                 F(6,49454)
Min Obs:                         1.0000                                           
Max Obs:                         44.000   F-statistic (robust):             42.838
    

In [15]:
print(res_behind)

                           PanelOLS Estimation Summary                            
Dep. Variable:     capm_Performance_mod   R-squared:                        0.0063
Estimator:                     PanelOLS   R-squared (Between):             -2.9643
No. Observations:                 90610   R-squared (Within):               0.0141
Date:                  Wed, Apr 08 2020   R-squared (Overall):             -0.0914
Time:                          21:32:16   Log-likelihood                -1.942e+05
Cov. Estimator:              Unadjusted                                           
                                          F-statistic:                      93.000
Entities:                          2871   P-value                           0.0000
Avg Obs:                         31.560   Distribution:                 F(6,87689)
Min Obs:                         1.0000                                           
Max Obs:                         45.000   F-statistic (robust):             93.000
    