In [1]:
# matplotlib inline plotting
%matplotlib inline
# make inline plotting higher resolution
%config InlineBackend.figure_format ='svg'

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import re
import statsmodels.api as sm
from typing import List
from statsmodels.tsa.ar_model import AutoReg
from itertools import dropwhile

# local imports
from helpers.sql import connect_to_db, update_database, read_db
from helpers.ar_model import transformation, transform, forecast_residual
from helpers.fama_macbeth import FamaMacbeth
from helpers.pretty_print import pretty_print_pval

In [3]:
# Connect to database (downloads if not exist)
engine = connect_to_db()

# Set this to True to update database locally
update = False

if update:
    update_database()

In [4]:
# Loading data
factors = read_db(engine=engine, statement="select * from factors", idx_col="date")
risk_free_rate = read_db(
    engine=engine, statement="select * from riskfree", idx_col="date"
)
sentiment = read_db(engine=engine, statement="select * from sentiment", idx_col="date")
returns_bm_size = read_db(
    engine=engine, statement="select * from bm_size_portfolios", idx_col="date"
)
returns_industry_10 = read_db(
    engine=engine, statement='select * from "10_industry_portfolios"', idx_col="date"
)
returns_industry_49 = read_db(
    engine=engine, statement='select * from "49_industry_portfolios"', idx_col="date"
)
sentiment_sum_ar1 = read_db(
    engine=engine, statement="select * from climate_sum_ar1", idx_col="date"
)
sentiment_mean_n_ar1 = read_db(
    engine=engine, statement="select * from climate_mean_n_ar1", idx_col="date"
)


In [5]:
# Merging into one dataframe
factors_sum = pd.merge(left=factors, right=sentiment_sum_ar1, left_index=True, right_index=True)
factors_mean_n = pd.merge(left=factors, right=sentiment_mean_n_ar1, left_index=True, right_index=True)

In [6]:
# Subtracting the risk-free rate from the returns
returns_bm_size = pd.merge(
    left=returns_bm_size, right=risk_free_rate, left_index=True, right_index=True
)
returns_bm_size = returns_bm_size.apply(lambda x: x - x["rf"], axis=1)
returns_bm_size = returns_bm_size.drop(columns=["rf"])

returns_industry_10 = pd.merge(
    left=returns_industry_10, right=risk_free_rate, left_index=True, right_index=True
)
returns_industry_10 = returns_industry_10.apply(lambda x: x - x["rf"], axis=1)
returns_industry_10 = returns_industry_10.drop(columns=["rf"])

returns_industry_49 = pd.merge(
    left=returns_industry_49, right=risk_free_rate, left_index=True, right_index=True
)
returns_industry_49 = returns_industry_49.apply(lambda x: x - x["rf"], axis=1)
returns_industry_49 = returns_industry_49.drop(columns=["rf"])


In [7]:
TOPICS = [
    "aggregate_transformed_residuals",
    "weather_extremes_transformed_residuals",
    "importance_of_human_intervantion_transformed_residuals",
    "politics_transformed_residuals",
]

SIGNIFICANT_DIGITS = 5

# setting newey west lags to 7 according to Greene
# (Econometric Analysis, 7th edition, section 20.5.2, p. 960)
NEWEY_WEST = 7

# there can be some discrepencies in the gmm standard errors when annualising
# this is not the case for the normal fm errors and may be due to some numerical optimisations
# in the gmm
YEARLY_BUSINESS_DAYS: int = 250

START_DATE = '2009-10-13' # first date we have full politics UCP
#START_DATE = "2009-03-02" # the old start date - should not be used (!)
#START_DATE = "2019-09-01"  # Be aware: Large data merge 1. september 2017
END_DATE = "2019-10-01"  # Be aware: Large data merge 1. september 2017

BASE_AP_MODEL = ["mkt-rf", "smb", "hml", "mom"]

# select return observations
returns = {
    "bm_size": returns_bm_size.loc[START_DATE:END_DATE],
    "10_industries": returns_industry_10.loc[START_DATE:END_DATE],
    "49_industries": returns_industry_49.loc[START_DATE:END_DATE],
}

# select factor observations
factors_sum = factors_sum.loc[START_DATE:END_DATE]
factors_mean_n = factors_mean_n.loc[START_DATE:END_DATE]


In [8]:
# annualise returns
for key, value in returns.items():
    returns[key] = value * YEARLY_BUSINESS_DAYS

# annualise factors
for factor in BASE_AP_MODEL:
    factors_sum[factor] = factors_sum[factor] * YEARLY_BUSINESS_DAYS
    factors_mean_n[factor] = factors_mean_n[factor] * YEARLY_BUSINESS_DAYS


In [9]:
# remove weekends and holidays from factors
factors_sum = factors_sum.reindex(returns_bm_size.index).dropna(how="all")
factors_mean_n = factors_mean_n.reindex(returns_bm_size.index).dropna(how="all")


## Fama Macbeth with sentiment residuals
This script produces the Fama Macbeth results with residuals.

In [10]:
def structure_fm_results(fm_res, variant, portfolio_type):
    variants = {
        0: f"I:{portfolio_type}",
        1: f"II:{portfolio_type}",
        2: f"III:{portfolio_type}",
        3: f"IV:{portfolio_type}",
    }

    index = []
    values = []

    for i in fm_res.index:
        # add loading
        index.append(i)
        values.append(fm_res["gamma"].loc[i])

        # add t-stat
        index.append(f"t_{i}")
        values.append(fm_res["tstat_gmm"].loc[i])

    structured = pd.DataFrame(values, index=index, columns=[variants[variant]])

    return structured


def generate_fm_results(
    fac,
    ret,
    portfolios: List[str] = None,
    prettify=True,
    split_forward=False,
    split_backward=False,
    split_date="2014-01-01",
):

    # placeholder for data
    collected = pd.DataFrame()

    # raw fm results
    fm_raw = pd.DataFrame()

    # allow for setting portfolios manually
    if portfolios is None:
        portfolios = ret.keys()

    for portfolio in tqdm(portfolios):
        # placeholder for inner loop
        one_model = pd.DataFrame()

        for i, topic in enumerate(TOPICS):
            # set base factors
            X = fac[BASE_AP_MODEL].copy()
            X[topic] = fac[topic]

            # there is some data missing in 2009 for `importance of human intervation` topic
            # We should probably look into why this is the case
            X = X.dropna()

            # reindex Y (only relevant in case of missing observations in factors
            # i.e. for some sentiment topics
            Y = ret[portfolio].copy().loc[X.index]

            # split sample if needed
            if split_forward and split_backward:
                raise ValueError("Can't split forward and backward at the same time")

            if split_forward:
                X = X.loc[split_date:]
                Y = Y.loc[split_date:]
            if split_backward:
                X = X.loc[:split_date]
                Y = Y.loc[:split_date]

            # estimate and fit FM model
            fm = FamaMacbeth(
                gmm_errors=True,
                n_west_lags=NEWEY_WEST,
                assets=Y,
                factors=X,
            )
            fm.fit()

            # capture results
            fm_fit = fm.summary.copy()
            fm_raw = pd.concat([fm_raw, fm_fit])

            # save additional values
            nobs = fm_fit["nobs"].unique()[0]
            r2 = fm_fit["cs_r2"].unique()[0]

            fm_fit = fm_fit.set_index("gammas", drop=True)
            fm_fit.index.name = None
            fm_fit = fm_fit[["gamma", "tstat_gmm"]]

            # structure results
            if one_model.shape == (0, 0):
                one_model = structure_fm_results(fm_fit, i, portfolio)
                one_model[f"sort_{i}"] = [i for i in range(one_model.shape[0])]
                one_model.loc["nobs"] = nobs
                one_model.loc["r2"] = r2
                continue
                # break

            else:
                struct = structure_fm_results(fm_fit, i, portfolio)
                struct[f"sort_{i}"] = [i for i in range(struct.shape[0])]
                struct.loc["nobs"] = nobs
                struct.loc["r2"] = r2

                one_model = pd.merge(
                    left=one_model,
                    right=struct,
                    how="outer",
                    left_index=True,
                    right_index=True,
                )

        # fix weird sorting. This is a major hack that I needed to implement
        # as "outer" joins do not preserve the order of the index
        sort_cols = [f"sort_{i}" for i in range(len(TOPICS))]
        one_model = one_model.sort_values(by=sort_cols).drop(columns=sort_cols)

        if collected.shape == (0, 0):
            collected = one_model
            continue
        else:
            collected = pd.merge(
                left=collected,
                right=one_model,
                how="left",
                left_index=True,
                right_index=True,
            )

    # yet another hack to move r2 and nobs to the bottom of dataframe
    # store r2 and nobs in separate dataframe
    r2_nobs = collected.loc[["r2", "nobs"]]
    # drop r2 and nobs from collected
    collected = collected.drop(index=["r2", "nobs"])
    # concat r2 and nobs to collected
    collected = pd.concat([collected, r2_nobs])

    return collected, fm_raw


In [11]:
fm_results_sum = generate_fm_results(
    factors_sum, returns, portfolios=["bm_size", "49_industries"]
)

fm_results_mean_n = generate_fm_results(
    factors_mean_n, returns, portfolios=["bm_size", "49_industries"]
)


# Forwards for the Sum and Mean_N measure
fm_results_forward_sum = generate_fm_results(
    factors_sum,
    returns,
    split_forward=True,
    split_backward=False,
    split_date="2014-01-01",
    portfolios=["bm_size", "49_industries"],
)

fm_results_forward_mean_n = generate_fm_results(
    factors_mean_n,
    returns,
    split_forward=True,
    split_backward=False,
    split_date="2014-01-01",
    portfolios=["bm_size", "49_industries"],
)


# Backwards for the Sum and Mean_N measure
fm_results_backward_sum = generate_fm_results(
    factors_sum,
    returns,
    split_forward=False,
    split_backward=True,
    split_date="2014-01-01",
    portfolios=["bm_size", "49_industries"],
)

fm_results_backward_mean_n = generate_fm_results(
    factors_mean_n,
    returns,
    split_forward=False,
    split_backward=True,
    split_date="2014-01-01",
    portfolios=["bm_size", "49_industries"],
)


100%|██████████| 2/2 [05:03<00:00, 151.83s/it]
100%|██████████| 2/2 [04:59<00:00, 149.60s/it]
100%|██████████| 2/2 [01:35<00:00, 47.51s/it]
100%|██████████| 2/2 [01:34<00:00, 47.20s/it]
100%|██████████| 2/2 [00:51<00:00, 25.59s/it]
100%|██████████| 2/2 [00:50<00:00, 25.40s/it]


In [12]:
print("Sum")
display(fm_results_sum[0])

print("Mean")
display(fm_results_mean_n[0])

print("Forward - Sum")
display(fm_results_forward_sum[0])

print("Forward - Mean")
display(fm_results_forward_mean_n[0])

print("Backward - Sum")
display(fm_results_backward_sum[0])

print("Backward - Mean")
display(fm_results_backward_mean_n[0])


Sum


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.07557,0.103317,0.08047,0.085773,0.073082,0.06878,0.064487,0.063746
t_c,0.995026,1.131778,0.974608,0.95903,1.027187,1.013428,0.986403,0.941917
mkt-rf,0.059129,0.028833,0.055094,0.047197,0.065215,0.068792,0.07319,0.07391
t_mkt-rf,0.670622,0.285526,0.581904,0.472897,0.756638,0.819183,0.889437,0.881874
smb,-0.020335,-0.015913,-0.020836,-0.017434,-0.000786,-0.003219,-0.005367,-0.004817
t_smb,-0.77137,-0.608143,-0.790405,-0.65974,-0.019158,-0.08208,-0.134492,-0.125372
hml,-0.034649,-0.03475,-0.037384,-0.037039,-0.035954,-0.036583,-0.03796,-0.03819
t_hml,-1.331683,-1.323701,-1.433895,-1.433491,-1.139742,-1.166436,-1.224607,-1.209948
mom,0.175036,0.18612,0.161536,0.16667,0.296699,0.296447,0.295101,0.294765
t_mom,1.166196,1.096582,1.070426,0.993063,2.600492,2.613997,2.563009,2.698215


Mean


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.097371,0.082629,0.079434,0.137923,0.065962,0.065234,0.063975,0.064029
t_c,1.156698,1.040098,1.049305,1.318525,0.969582,0.984727,0.978682,0.979167
mkt-rf,0.036482,0.051788,0.054989,-0.004281,0.07225,0.072121,0.073751,0.07436
t_mkt-rf,0.389535,0.568463,0.622678,-0.037551,0.863109,0.870051,0.891537,0.911145
smb,-0.017074,-0.019872,-0.019881,-0.014319,-0.003902,-0.005818,-0.005206,-0.007027
t_smb,-0.653356,-0.756714,-0.755972,-0.541153,-0.097855,-0.151507,-0.129959,-0.184878
hml,-0.034975,-0.035157,-0.033989,-0.040493,-0.036189,-0.036791,-0.038114,-0.038854
t_hml,-1.355012,-1.347833,-1.305148,-1.572006,-1.14028,-1.165031,-1.223459,-1.224063
mom,0.09101,0.16079,0.162175,0.108392,0.296471,0.296174,0.295584,0.294893
t_mom,0.532809,1.08592,1.114138,0.576421,2.617613,2.607971,2.675175,2.580788


Forward - Sum


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.037565,0.07802,0.033346,0.045524,0.073147,0.070899,0.065185,0.090718
t_c,0.44186,0.867728,0.376683,0.532205,0.898251,0.929195,0.864259,1.081068
mkt-rf,0.077121,0.033311,0.080312,0.068513,0.04016,0.039156,0.046263,0.021622
t_mkt-rf,0.753714,0.319527,0.766897,0.685948,0.420183,0.421378,0.498522,0.218705
smb,-0.050832,-0.047713,-0.047699,-0.050798,-0.034547,-0.036581,-0.035728,-0.037231
t_smb,-1.428275,-1.354702,-1.362204,-1.426511,-0.669608,-0.722878,-0.695823,-0.725806
hml,-0.047359,-0.042701,-0.050354,-0.045414,-0.06231,-0.064959,-0.072625,-0.068926
t_hml,-1.274111,-1.141775,-1.358371,-1.228281,-1.385265,-1.446968,-1.618418,-1.516005
mom,0.179195,0.172004,0.174249,0.191381,0.174473,0.174313,0.169497,0.17856
t_mom,1.566289,1.346361,1.524439,1.670692,1.449742,1.499434,1.450605,1.536746


Forward - Mean


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.035783,0.054456,0.033974,0.073163,0.067425,0.069499,0.073729,0.085323
t_c,0.452735,0.669746,0.435027,0.834921,0.862952,0.909068,0.939303,1.032611
mkt-rf,0.078207,0.058917,0.079926,0.039578,0.046807,0.040374,0.036996,0.028831
t_mkt-rf,0.821721,0.601361,0.847524,0.387796,0.509101,0.430971,0.38993,0.300879
smb,-0.050404,-0.050616,-0.050512,-0.048572,-0.040273,-0.037985,-0.035395,-0.043515
t_smb,-1.430326,-1.421741,-1.42297,-1.364013,-0.788135,-0.755018,-0.683828,-0.851862
hml,-0.044062,-0.044764,-0.043719,-0.047203,-0.057109,-0.067817,-0.072797,-0.067076
t_hml,-1.186021,-1.200176,-1.175645,-1.275882,-1.259885,-1.517342,-1.624501,-1.484347
mom,0.167572,0.157913,0.166759,0.175921,0.165804,0.174502,0.166789,0.1761
t_mom,1.52032,1.303081,1.504946,1.486481,1.36976,1.509831,1.516642,1.491488


Backward - Sum


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.085208,0.096464,0.108107,0.120126,0.178269,0.143601,0.117397,0.140882
t_c,0.66513,0.805089,0.912029,0.93852,1.4997,1.336132,1.080112,1.217406
mkt-rf,0.075077,0.065102,0.052771,0.042821,-0.014682,0.020585,0.047823,0.024056
t_mkt-rf,0.461314,0.417879,0.344967,0.262764,-0.094935,0.1416,0.326667,0.157652
smb,0.025923,0.02637,0.027389,0.027355,0.040716,0.028338,0.015114,0.023911
t_smb,0.670394,0.678736,0.708143,0.707002,0.658192,0.467937,0.246398,0.390434
hml,-0.02566,-0.026755,-0.024681,-0.029552,0.022066,0.023668,0.030477,0.025193
t_hml,-0.73215,-0.768781,-0.712157,-0.848522,0.477546,0.521288,0.679167,0.536879
mom,0.144813,0.151315,0.138692,0.13238,0.448131,0.450413,0.438329,0.449077
t_mom,1.123756,1.219734,1.213755,1.053237,3.695898,3.713115,3.648089,3.660768


Backward - Mean


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.081427,0.066194,0.098533,0.127691,0.158699,0.139098,0.097405,0.110403
t_c,0.577288,0.487421,0.840596,0.960186,1.419875,1.254158,0.893673,1.026365
mkt-rf,0.077307,0.094061,0.063465,0.03683,0.004448,0.025262,0.069204,0.05271
t_mkt-rf,0.446692,0.556847,0.415865,0.22006,0.029712,0.169946,0.474903,0.363337
smb,0.025536,0.0288,0.025519,0.026382,0.03709,0.024004,0.003782,0.020216
t_smb,0.660928,0.744081,0.660486,0.681015,0.612959,0.400269,0.060278,0.337945
hml,-0.023734,-0.027421,-0.026659,-0.028607,0.026532,0.021924,0.03515,0.020384
t_hml,-0.673554,-0.77456,-0.768859,-0.827487,0.57841,0.486092,0.768454,0.449034
mom,0.192133,0.134295,0.150322,0.132323,0.459383,0.454299,0.403737,0.454891
t_mom,1.299248,1.061789,1.235646,1.06359,3.76931,3.710974,3.420558,3.7582


In [13]:
def map_results(coef, prepend="gamma"):
    mapper = {
        "c": r"\multirow{{2}}{{*}}{{$\widehat{{\{}_{{0}}}}$}}".format(prepend),
        "t_c": np.nan,

        "mkt-rf": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{mkt-rf}}$}}".format(prepend),
        "t_mkt-rf": np.nan,

        "smb": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{smb}}$}}".format(prepend),
        "t_smb": np.nan,

        "hml": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{hml}}$}}".format(prepend),
        "t_hml": np.nan,

        "mom": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{mom}}$}}".format(prepend),
        "t_mom": np.nan,

        "aggregate": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{agg}}$}}".format(prepend),
        "t_aggregate": np.nan,

        "weather_extremes": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{wea}}$}}".format(prepend),
        "t_weather_extremes": np.nan,

        "importance_of_human_intervantion": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{imp}}$}}".format(prepend),
        "t_importance_of_human_intervantion": np.nan,
        
        "politics": r"\multirow{{2}}{{*}}{{$\widehat{{\{}}}_{{pol}}$}}".format(prepend),
        "t_politics": np.nan,

        "r2": r"$R^2$",
        "nobs": r"$N$",
    }

    return mapper.get(coef)


def prettify_fm_results(df):
    coll_df = df.copy()

    # apply multiindex to columns and split on portfolio type
    arrays = [
        coll_df.columns.map(lambda x: x.split(":")[1]),
        coll_df.columns.map(lambda x: x.split(":")[0]),
    ]
    tuples = list(zip(*arrays))

    coll_df.columns = pd.MultiIndex.from_tuples(tuples)

    # add parenthesis to t-stat
    for row in coll_df.loc[coll_df.index.str.contains(rf"t_")].index:
        coll_df.loc[row] = (
            coll_df.loc[row]
            .apply(
                pretty_print_pval, freedom=coll_df.loc['nobs'].min(), precision=SIGNIFICANT_DIGITS
            )
            .astype(str)
            .mask(coll_df.loc[row].isna())
        )

    # map index to remove _transformed_residuals
    coll_df.index = coll_df.index.map(lambda x: x.replace("_transformed_residuals", ""))

    # pretty print results
    coll_df = coll_df.rename(columns={
        "bm_size": r"\textbf{25 Book-to-Market and Size Sorted Portfolios}",
        "49_industries": r"\textbf{49 Industry Portfolios}"
    })

    coll_df.loc['nobs'] = (
            coll_df.loc['nobs']
            .apply(
                lambda x: f"{round(x, 0):.{0}f}"
            )
            .astype(str)
            .mask(coll_df.loc['nobs'].isna())
        )

    return coll_df


def print_latex_table(df, map_rows: bool=True, prettify: bool = True, **kwargs):
    print_df = df.copy()

    if prettify:
        print_df = prettify_fm_results(print_df)

    if map_rows:
        print_df.index = print_df.index.map(map_results)

    latex = print_df.to_latex(
        index=True,
        escape=False,
        sparsify=True,
        multirow=True,
        multicolumn=True,
        na_rep="",
        multicolumn_format="c",
        float_format=f"{{:.{SIGNIFICANT_DIGITS}f}}".format,
        position="H",
        **kwargs
    )

    latex = re.sub(r"\\(mid|top|bottom)rule", "", latex)

    # remove empty lines
    latex = latex.replace("NaN", "")

    print(latex)

    return

In [14]:
print("Sum - full sample \n\n")

print_latex_table(fm_results_sum[0], bold_rows = False)

  latex = print_df.to_latex(


Sum - full sample 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &              IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.07557 &         0.10332 &         0.08047 &         0.08577 &                         0.07308 &          0.06878 &          0.06449 &          0.06375 \\
                                          &                                          (0.99503) &       (1.13178) &       (0.97461) &       (0.95903) &                       (1.02719) &        (1.01343) &        (0.98640) &        (0.94192) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                            

In [15]:
print("Mean N - full sample \n\n")

print_latex_table(fm_results_mean_n[0], bold_rows = False)

Mean N - full sample 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &               IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.09737 &         0.08263 &         0.07943 &          0.13792 &                         0.06596 &          0.06523 &          0.06397 &          0.06403 \\
                                          &                                          (1.15670) &       (1.04010) &       (1.04930) &    (1.31852) (*) &                       (0.96958) &        (0.98473) &        (0.97868) &        (0.97917) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                      

  latex = print_df.to_latex(


In [16]:
print("Sum - Forward \n\n")

print_latex_table(fm_results_forward_sum[0], bold_rows = False)

Sum - Forward 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &              IV &                               I &              II &             III &              IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.03757 &         0.07802 &         0.03335 &         0.04552 &                         0.07315 &         0.07090 &         0.06518 &         0.09072 \\
                                          &                                          (0.44186) &       (0.86773) &       (0.37668) &       (0.53220) &                       (0.89825) &       (0.92919) &       (0.86426) &       (1.08107) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                            0.07712 &    

  latex = print_df.to_latex(


In [17]:
print("Mean N - Forward \n\n")

print_latex_table(fm_results_forward_mean_n[0], bold_rows = False)

Mean N - Forward 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &               IV &                               I &              II &             III &              IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.03578 &         0.05446 &         0.03397 &          0.07316 &                         0.06743 &         0.06950 &         0.07373 &         0.08532 \\
                                          &                                          (0.45274) &       (0.66975) &       (0.43503) &        (0.83492) &                       (0.86295) &       (0.90907) &       (0.93930) &       (1.03261) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                            0.07821

  latex = print_df.to_latex(


In [18]:
print("Sum - Backward \n\n")

print_latex_table(fm_results_backward_sum[0], bold_rows = False)

Sum - Backward 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &          II &         III &          IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.08521 &     0.09646 &     0.10811 &     0.12013 &                         0.17827 &          0.14360 &          0.11740 &          0.14088 \\
                                          &                                          (0.66513) &   (0.80509) &   (0.91203) &   (0.93852) &                   (1.49970) (*) &    (1.33613) (*) &        (1.08011) &        (1.21741) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                            0.07508 &     0.06510 &     0.05277 &  

  latex = print_df.to_latex(


In [19]:
print("Mean N - Backward \n\n")

print_latex_table(fm_results_backward_mean_n[0], bold_rows = False)

Mean N - Backward 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &          II &         III &          IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.08143 &     0.06619 &     0.09853 &     0.12769 &                         0.15870 &          0.13910 &          0.09740 &          0.11040 \\
                                          &                                          (0.57729) &   (0.48742) &   (0.84060) &   (0.96019) &                   (1.41988) (*) &        (1.25416) &        (0.89367) &        (1.02637) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                            0.07731 &     0.09406 &     0.06347 

  latex = print_df.to_latex(


# Robustness check - Positive vs. Negative sentiment

# Unexpected climate change
This script generates the measure of unexpected climate change perception using AR(1) model


In [20]:
# Loading in Twitter data
df = read_db(engine=engine, statement="select * from twitter_kaggle")

# Setting datetime (potentially, we should deal with time-zones problems)
df["datetime"] = pd.to_datetime(df["created_at"])
df = df.set_index(df["datetime"], drop=True)  # Setting index
df.index.name = None

df = df.drop(columns=["created_at", "datetime"]) # dropping irrelevant colunms


In [21]:
df_pos = df.loc[df['sentiment'] > 0]
df_neg = df.loc[df['sentiment'] < 0]

df = None

In [22]:
def create_daily_sum(dataframe):

    # will be overwritten in loop
    topic = "aggregate"

    daily_sum = transform(dataframe, transform_type="sum", mapping="log", topic=topic)
    daily_sum = pd.merge(
        left=daily_sum,
        right=forecast_residual(
            daily_sum[f"{topic}_transformed"],
            return_spec_test=True,
            auto_lag=True,
        ),
        how="left",
        left_index=True,
        right_index=True,
    )


    for topic in dataframe["topic"].unique():
        daily_topic_sum = transform(dataframe, transform_type="sum", mapping="log", topic=topic)

        daily_topic_sum = pd.merge(
            left=daily_topic_sum,
            right=forecast_residual(
                daily_topic_sum[f"{topic}_transformed"],
                return_spec_test=True,
                auto_lag=True,
            ),
            left_index=True,
            right_index=True,
            how="left",
        )

        daily_sum = pd.merge(
            left=daily_sum,
            right=daily_topic_sum,
            how="left",
            left_index=True,
            right_index=True,
        )

    daily_sum.columns = daily_sum.columns.map(
        lambda x: x.lower().replace("/ ", "").replace(" ", "_")
    )

    return daily_sum

In [23]:
df_pos = create_daily_sum(df_pos)
df_neg = create_daily_sum(df_neg)

In [24]:
factors_pos = pd.merge(left=factors, right=df_pos, left_index=True, right_index=True)
factors_neg = pd.merge(left=factors, right=df_neg, left_index=True, right_index=True)

In [25]:
# annualise factors
for factor in BASE_AP_MODEL:
    factors_pos[factor] = factors_pos[factor] * YEARLY_BUSINESS_DAYS
    factors_neg[factor] = factors_neg[factor] * YEARLY_BUSINESS_DAYS

In [26]:
# remove weekends and holidays from factors
factors_pos = factors_pos.reindex(returns_bm_size.index).dropna(how="all")
factors_neg = factors_neg.reindex(returns_bm_size.index).dropna(how="all")

# select factor observations
factors_pos = factors_pos.loc[START_DATE:END_DATE]
factors_neg = factors_neg.loc[START_DATE:END_DATE]

In [27]:
fm_results_pos = generate_fm_results(
    factors_pos, returns, portfolios=["bm_size", "49_industries"]
)
fm_results_neg = generate_fm_results(
    factors_neg, returns, portfolios=["bm_size", "49_industries"]
)

100%|██████████| 2/2 [05:10<00:00, 155.45s/it]
100%|██████████| 2/2 [05:13<00:00, 156.55s/it]


In [28]:
print("Positive")
display(fm_results_pos[0])

print("Negative")
display(fm_results_neg[0])

Positive


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.060112,0.074004,0.067651,0.067771,0.072113,0.068133,0.07979,0.067289
t_c,0.728784,0.93381,0.857178,0.82751,1.091145,1.014862,1.217983,0.972084
mkt-rf,0.072867,0.059049,0.065004,0.065282,0.065705,0.069813,0.058287,0.07092
t_mkt-rf,0.78301,0.648274,0.721289,0.70575,0.796504,0.835732,0.711899,0.830099
smb,-0.019548,-0.019078,-0.019467,-0.013289,-0.002944,-0.003578,-0.005585,0.004894
t_smb,-0.74736,-0.734313,-0.745643,-0.502943,-0.076169,-0.091638,-0.145059,0.127837
hml,-0.034215,-0.034058,-0.034528,-0.030619,-0.037615,-0.038231,-0.036617,-0.034485
t_hml,-1.320085,-1.310217,-1.330943,-1.183637,-1.191141,-1.212632,-1.148425,-1.077396
mom,0.133346,0.154697,0.126738,0.059831,0.291661,0.294993,0.276439,0.308165
t_mom,0.91071,1.059022,0.874581,0.391225,2.549332,2.590134,2.425294,2.68243


Negative


Unnamed: 0,I:bm_size,II:bm_size,III:bm_size,IV:bm_size,I:49_industries,II:49_industries,III:49_industries,IV:49_industries
c,0.066165,0.067688,0.070787,0.090983,0.072057,0.073807,0.08185,0.074677
t_c,0.807229,0.822134,0.862905,1.041051,1.084525,1.090809,1.212044,1.108364
mkt-rf,0.067792,0.064973,0.063496,0.043962,0.065945,0.064296,0.056756,0.065065
t_mkt-rf,0.735573,0.688588,0.685022,0.452129,0.796675,0.767779,0.680673,0.779423
smb,-0.020146,-0.01891,-0.020393,-0.017724,-0.002925,-0.002248,-0.001023,-0.014811
t_smb,-0.768773,-0.72401,-0.775814,-0.677647,-0.075465,-0.057398,-0.026365,-0.382045
hml,-0.034229,-0.034863,-0.034479,-0.03573,-0.036902,-0.037479,-0.036479,-0.038277
t_hml,-1.317116,-1.335795,-1.321875,-1.367807,-1.166639,-1.189271,-1.144696,-1.202531
mom,0.154819,0.155607,0.161081,0.245942,0.293561,0.293364,0.284347,0.291259
t_mom,1.096631,1.027601,1.147672,1.604677,2.573721,2.550144,2.539154,2.551646


In [29]:
print("Positive - full sample \n\n")

print_latex_table(fm_results_pos[0], bold_rows = False)

Positive - full sample 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &              IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.06011 &         0.07400 &         0.06765 &         0.06777 &                         0.07211 &          0.06813 &          0.07979 &          0.06729 \\
                                          &                                          (0.72878) &       (0.93381) &       (0.85718) &       (0.82751) &                       (1.09114) &        (1.01486) &        (1.21798) &        (0.97208) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                       

  latex = print_df.to_latex(


In [30]:
print("Negative - full sample \n\n")

print_latex_table(fm_results_neg[0], bold_rows = False)

Negative - full sample 


\begin{table}[H]
\centering
\begin{tabular}{lllllllll}

{} & \multicolumn{4}{c}{\textbf{25 Book-to-Market and Size Sorted Portfolios}} & \multicolumn{4}{c}{\textbf{49 Industry Portfolios}} \\
{} &                                                     I &              II &             III &              IV &                               I &               II &              III &               IV \\

\multirow{2}{*}{$\widehat{\gamma_{0}}$}      &                                            0.06616 &         0.06769 &         0.07079 &         0.09098 &                         0.07206 &          0.07381 &          0.08185 &          0.07468 \\
                                          &                                          (0.80723) &       (0.82213) &       (0.86291) &       (1.04105) &                       (1.08453) &        (1.09081) &        (1.21204) &        (1.10836) \\
\multirow{2}{*}{$\widehat{\gamma}_{mkt-rf}$} &                                       

  latex = print_df.to_latex(
