# Tables + Robustness checks ESG 

## TABLES ##

This section there are a bunch of codes for table creation in latex format

In [3]:
import pandas as pd

# Load CSVs
df_ff = pd.read_csv("Input/1147climate_new_ff3_industrial_stats.csv")
df_esg = pd.read_csv("Input/1147climate_new_ff3esg_industrial_stats.csv")

# Define columns: (mean_col, std_col, LaTeX label)
columns = [
    ("Alpha_mean", "Alpha_std", r"$\alpha$"),
    ("Beta_SMB_mean", "Beta_SMB_std", r"$\beta_{SMB}$"),
    ("Beta_HML_mean", "Beta_HML_std", r"$\beta_{HML}$"),
    ("Beta_MktRF_mean", "Beta_MktRF_std", r"$\beta_{MKRf}$"),
    ("Beta_Overall_ESG_Exposure_mean", "Beta_Overall_ESG_Exposure_std", r"$\beta_{ENV\_Score}$"),
    ("Beta_Overall_ESG_Sentiment_mean", "Beta_Overall_ESG_Sentiment_std", r"$\beta_{ENV\_Sentiment}$"),
    ("R_squared_mean", None, r"$R^2$"),
    ("Adj_R_squared_mean", None, r"Adj. $R^2$"),
    ("N_Companies", None, "Obs.")
]

# Formatting helpers
def fmt(val, digits=4):
    return f"{val:.{digits}f}"

def fmt_std(val, digits=2):
    return f"({val:.{digits}f})"

# Initialize LaTeX table
lines = [
    r"\begin{table}[ht]",
    r"\centering",
    r"\small",
    r"\renewcommand{\arraystretch}{1.2}",
    r"\setlength{\tabcolsep}{6pt}",
    r"\resizebox{\textwidth}{!}{%",
    r"\begin{tabular}{l|" + "c" * len(columns) + "}",
    r"\hline",
    " & " + " & ".join(label for _, _, label in columns) + r" \\",
    r"\hline",
    r"\multicolumn{9}{c}{\textbf{Panel A: Fama-French Factors Only}} \\",
    r"\hline"
]

# Panel A: Fama-French only, leave ESG fields blank
for _, row in df_ff.iterrows():
    sector = row["Macro_Sector"]
    values, stds = [], []
    for mean_col, std_col, _ in columns:
        if mean_col not in df_ff.columns:
            values.append("")  # ESG col
            stds.append("")
        else:
            if mean_col == "N_Companies":
                # val = str(int(row[mean_col])) if pd.notna(row[mean_col]) else ""
                val = str(int(row[mean_col] * 70)) if pd.notna(row[mean_col]) else ""
            else:
                val = fmt(row[mean_col]) if pd.notna(row[mean_col]) else ""
            std = fmt_std(row[std_col]) if std_col and pd.notna(row[std_col]) else ""
            values.append(val)
            stds.append(std)
    lines.append(f"{sector} & " + " & ".join(values) + r" \\")
    lines.append(" " * len(sector) + " & " + " & ".join(stds) + r" \\")
# Panel B: Full ESG version
lines += [
    r"\hline",
    r"\multicolumn{9}{c}{\textbf{Panel B: Fama-French + ENV Factors}} \\",
    r"\hline"
]

for _, row in df_esg.iterrows():
    sector = row["Macro_Sector"]
    values, stds = [], []
    for mean_col, std_col, _ in columns:
        if mean_col == "N_Companies":
            # val = str(int(row[mean_col])) if pd.notna(row[mean_col]) else ""
            val = str(int(row[mean_col] * 70)) if pd.notna(row[mean_col]) else ""
        else:
            val = fmt(row[mean_col]) if pd.notna(row[mean_col]) else ""
        std = fmt_std(row[std_col]) if std_col and pd.notna(row[std_col]) else ""
        values.append(val)
        stds.append(std)
    lines.append(f"{sector} & " + " & ".join(values) + r" \\")
    lines.append(" " * len(sector) + " & " + " & ".join(stds) + r" \\")

# Close table
lines += [
    r"\hline",
    r"\end{tabular}",
    r"}",
    r"\caption{Comparison of Regression Results: Fama-French vs Fama-French + ENV factors. STD in brackets}",
    r"\label{tab:ff_macro_sector_two_panels}",
    r"\end{table}"
]

# Save to LaTeX file
with open("Output/Table_FF_Sector_Regression_TwoPanels_TwoCSVs.tex", "w") as f:
    f.writelines(line + "\n" for line in lines)


In [4]:
df_second_table = pd.read_csv("Input/1147_climate_dataset_fama_french_3esg_regression_results.csv")
df_second_table = df_second_table[~((df_second_table["Beta_Overall_ESG_Sentiment_Significant"] == False) & 
                                    (df_second_table["Beta_Overall_ESG_Exposure_Significant"] == False))]
df_second_table

Unnamed: 0,TICKER,Macro_Sector,Alpha,Beta_MktRF,Beta_SMB,Beta_HML,Beta_Overall_ESG_Exposure,Beta_Overall_ESG_Sentiment,T_Alpha,T_Beta_MktRF,...,P_Beta_Overall_ESG_Sentiment,Beta_MktRF_Significant,Beta_SMB_Significant,Beta_HML_Significant,Beta_Overall_ESG_Exposure_Significant,Beta_Overall_ESG_Sentiment_Significant,Alpha_Significant,Adj_R_squared,R_squared,Num_Obs
1,CDNS,Consumer,0.093805,1.132462,-0.134178,-0.779023,-3.174189,-0.149363,2.39248,8.919626,...,0.295479,True,False,True,True,False,True,0.602532,0.631334,70
10,ZTS,Industrials,0.041409,0.788725,0.082108,-0.284881,0.084789,-0.101001,1.267708,7.562706,...,0.019984,True,False,False,False,True,False,0.400351,0.443803,70
11,PEP,Industrials,0.026035,0.564194,-0.453132,0.244929,0.359271,-0.288332,1.316153,5.639964,...,0.000577,True,True,True,False,True,False,0.381166,0.426009,70
15,ANET,Industrials,0.058108,1.25634,-0.676068,-0.48409,-3.561758,0.023092,1.920144,6.946721,...,0.876813,True,False,True,True,False,False,0.309078,0.359145,70
19,JNJ,Industrials,-0.003412,0.554814,-0.246959,0.208166,2.054805,-0.102433,-0.277931,5.338949,...,0.039865,True,False,True,False,True,False,0.307053,0.357267,70
29,WMB,Energy & Utilities,-0.41641,1.100815,-0.368143,0.817257,1.023984,0.391195,-1.850865,6.170372,...,0.040107,True,False,True,False,True,False,0.526966,0.561244,70
31,CMI,Industrials,0.064092,0.98802,0.132094,0.530941,-0.381138,-0.399488,2.036062,7.776064,...,0.000382,True,False,True,False,True,True,0.581065,0.611422,70
32,SWK,Industrials,-0.041184,1.062894,0.451144,0.716936,-0.005579,0.164447,-3.694343,6.747399,...,0.023421,True,False,True,False,True,True,0.544959,0.577933,70
43,AES,Energy & Utilities,-0.056424,0.975824,0.203017,0.307283,0.333327,-0.563583,-2.026923,4.854095,...,0.007744,True,False,False,True,True,True,0.26575,0.318957,70
45,BSX,Industrials,-0.07783,0.829932,-0.473114,0.100254,3.23994,-0.063609,-1.347302,7.464493,...,0.228079,True,True,False,True,False,False,0.404036,0.447221,70


In [5]:
df_newsecond = df_second_table.copy()
df_newsecond = df_newsecond.drop(["Beta_MktRF_Significant", "Beta_SMB_Significant", "Beta_HML_Significant", "Beta_Overall_ESG_Exposure_Significant", "Beta_Overall_ESG_Sentiment_Significant", "Alpha_Significant"], axis=1 )
df_newsecond

Unnamed: 0,TICKER,Macro_Sector,Alpha,Beta_MktRF,Beta_SMB,Beta_HML,Beta_Overall_ESG_Exposure,Beta_Overall_ESG_Sentiment,T_Alpha,T_Beta_MktRF,...,T_Beta_Overall_ESG_Sentiment,P_Alpha,P_Beta_MktRF,P_Beta_SMB,P_Beta_HML,P_Beta_Overall_ESG_Exposure,P_Beta_Overall_ESG_Sentiment,Adj_R_squared,R_squared,Num_Obs
1,CDNS,Consumer,0.093805,1.132462,-0.134178,-0.779023,-3.174189,-0.149363,2.39248,8.919626,...,-1.046178,0.016735,4.678648999999999e-19,0.514079,1.209333e-09,0.040503,0.295479,0.602532,0.631334,70
10,ZTS,Industrials,0.041409,0.788725,0.082108,-0.284881,0.084789,-0.101001,1.267708,7.562706,...,-2.326656,0.204902,3.947677e-14,0.776015,0.08598292,0.834329,0.019984,0.400351,0.443803,70
11,PEP,Industrials,0.026035,0.564194,-0.453132,0.244929,0.359271,-0.288332,1.316153,5.639964,...,-3.442378,0.188123,1.700859e-08,0.018623,0.01324566,0.096725,0.000577,0.381166,0.426009,70
15,ANET,Industrials,0.058108,1.25634,-0.676068,-0.48409,-3.561758,0.023092,1.920144,6.946721,...,0.155011,0.05484,3.738735e-12,0.086586,0.04680593,0.00989,0.876813,0.309078,0.359145,70
19,JNJ,Industrials,-0.003412,0.554814,-0.246959,0.208166,2.054805,-0.102433,-0.277931,5.338949,...,-2.055141,0.781065,9.348698e-08,0.187666,0.04869142,0.163726,0.039865,0.307053,0.357267,70
29,WMB,Energy & Utilities,-0.41641,1.100815,-0.368143,0.817257,1.023984,0.391195,-1.850865,6.170372,...,2.052647,0.064189,6.812966e-10,0.084532,2.650357e-09,0.081921,0.040107,0.526966,0.561244,70
31,CMI,Industrials,0.064092,0.98802,0.132094,0.530941,-0.381138,-0.399488,2.036062,7.776064,...,-3.551927,0.041744,7.48161e-15,0.394079,8.92689e-05,0.078087,0.000382,0.581065,0.611422,70
32,SWK,Industrials,-0.041184,1.062894,0.451144,0.716936,-0.005579,0.164447,-3.694343,6.747399,...,2.266492,0.00022,1.505188e-11,0.283194,1.913089e-06,0.958767,0.023421,0.544959,0.577933,70
43,AES,Energy & Utilities,-0.056424,0.975824,0.203017,0.307283,0.333327,-0.563583,-2.026923,4.854095,...,-2.663023,0.04267,1.209378e-06,0.519397,0.1697982,0.010537,0.007744,0.26575,0.318957,70
45,BSX,Industrials,-0.07783,0.829932,-0.473114,0.100254,3.23994,-0.063609,-1.347302,7.464493,...,-1.205321,0.177883,8.362109e-14,0.009409,0.3087355,0.042862,0.228079,0.404036,0.447221,70


In [6]:
import pandas as pd

# Load dataset
df = df_newsecond.copy()

# Define macro sector panels
panels = {
    "Industrials": "Panel A: Industrials",
    "Consumer": "Panel B: Consumer",
    "Energy & Utilities": "Panel C: Energy & Utilities"
}

# Define betas and corresponding t-stats with improved LaTeX labels
# betas = [
#     ("Alpha", "T_Alpha", r"$\alpha$"),
#     ("Beta_MktRF", "T_Beta_MktRF", r"$\beta_{MKT}$"),
#     ("Beta_SMB", "T_Beta_SMB", r"$\beta_{SMB}$"),
#     ("Beta_HML", "T_Beta_HML", r"$\beta_{HML}$"),
#     ("Beta_ESG_score", "T_Beta_ESG_score", r"$\beta_{ESG\_Score}$"),
#     ("Beta_ESG_sentiment", "T_Beta_ESG_sentiment", r"$\beta_{ESG\_Sentiment}$")
# ]
betas = [
    ("Alpha", "T_Alpha", r"$\alpha$"),
    ("Beta_MktRF", "T_Beta_MktRF", r"$\beta_{MKT}$"),
    ("Beta_SMB", "T_Beta_SMB", r"$\beta_{SMB}$"),
    ("Beta_HML", "T_Beta_HML", r"$\beta_{HML}$"),
    ("Beta_Overall_ESG_Exposure", "T_Beta_Overall_ESG_Exposure", r"$\beta_{ENV\_Score}$"),
    ("Beta_Overall_ESG_Sentiment", "T_Beta_Overall_ESG_Sentiment", r"$\beta_{ENV\_Sentiment}$")
]

# Helper: format t-stat with significance stars
def format_tstat(tval):
    if pd.isna(tval):
        return ""
    abs_t = abs(tval)
    if abs_t >= 2.58:
        stars = "***"
    elif abs_t >= 1.96:
        stars = "**"
    elif abs_t >= 1.65:
        stars = "*"
    else:
        stars = ""
    return f"({tval:.2f}){stars}"

# Start LaTeX table lines
lines = [
    r"\begin{table}[ht]",
    r"\centering",
    r"\small",
    r"\renewcommand{\arraystretch}{1.2}",
    r"\setlength{\tabcolsep}{6pt}",
    r"\resizebox{\textwidth}{!}{%",
    r"\begin{tabular}{l|" + "c" * len(betas) + "|c|c|c}",
    r"\hline",
    "Ticker & " + " & ".join(label for _, _, label in betas) + r" & $R^2$ & Adj. $R^2$ & Obs. \\",
    r"\hline"
]

# Loop over panels
for sector, label in panels.items():
    lines.append(r"\hline")  # Add line before panel header
    lines.append(r"\multicolumn{" + str(len(betas) + 4) + r"}{c}{" + r"\textbf{" + label + r"}} \\")
    lines.append(r"\hline")
    sub_df = df[df["Macro_Sector"] == sector]
    for _, row in sub_df.iterrows():
        ticker = row["TICKER"]
        beta_vals = [f"{row[beta]:.4f}" if pd.notna(row[beta]) else "" for beta, _, _ in betas]
        t_vals = [format_tstat(row[t]) if t in row else "" for _, t, _ in betas]

        # First row: beta estimates
        lines.append(f"{ticker} & " + " & ".join(beta_vals) +
                     f" & {row['R_squared']:.3f} & {row['Adj_R_squared']:.3f} & {int(row['Num_Obs'])} \\\\")

        # Second row: t-stats with stars
        lines.append(" " * len(ticker) + " & " + " & ".join(t_vals) + r" & & & \\")

# End table
lines += [
    r"\hline",
    r"\end{tabular}",
    r"}",
    r"\caption{Regression Results by Sector: Coefficients and T-Stats with Significance Levels}",
    r"\label{tab:regression_results_by_sector}",
    r"\end{table}"
]

# Save to file
with open("Output/Sector_Regression_Table_TickerLevel.tex", "w") as f:
    f.writelines(line + "\n" for line in lines)



In [7]:
import pandas as pd

# Load dataset
df = df_newsecond.copy()

# Define macro sector panels
panels = {
    "Industrials": "Panel A: Industrials",
    "Consumer": "Panel B: Consumer",
    "Energy & Utilities": "Panel C: Energy & Utilities"
}

# Define betas and corresponding t-stats with improved LaTeX labels
# betas = [
#     ("Alpha", "T_Alpha", r"$\alpha$"),
#     ("Beta_MktRF", "T_Beta_MktRF", r"$\beta_{MKT}$"),
#     ("Beta_SMB", "T_Beta_SMB", r"$\beta_{SMB}$"),
#     ("Beta_HML", "T_Beta_HML", r"$\beta_{HML}$"),
#     ("Beta_ESG_score", "T_Beta_ESG_score", r"$\beta_{ESG\_Score}$"),
#     ("Beta_ESG_sentiment", "T_Beta_ESG_sentiment", r"$\beta_{ESG\_Sentiment}$")
# ]
betas = [
    ("Alpha", "T_Alpha", r"$\alpha$"),
    ("Beta_MktRF", "T_Beta_MktRF", r"$\beta_{MKT}$"),
    ("Beta_SMB", "T_Beta_SMB", r"$\beta_{SMB}$"),
    ("Beta_HML", "T_Beta_HML", r"$\beta_{HML}$"),
    ("Beta_Overall_ESG_Exposure", "T_Beta_Overall_ESG_Exposure", r"$\beta_{ENV\_Score}$"),
    ("Beta_Overall_ESG_Sentiment", "T_Beta_Overall_ESG_Sentiment", r"$\beta_{ENV\_Sentiment}$")
]

# Helper: format t-stat with significance stars
def format_tstat(tval):
    if pd.isna(tval):
        return ""
    abs_t = abs(tval)
    if abs_t >= 2.58:
        stars = "***"
    elif abs_t >= 1.96:
        stars = "**"
    elif abs_t >= 1.65:
        stars = "*"
    else:
        stars = ""
    return f"({tval:.2f}){stars}"

# Start LaTeX table lines
lines = [
    r"\begin{table}[ht]",
    r"\centering",
    r"\small",
    r"\renewcommand{\arraystretch}{1.2}",
    r"\setlength{\tabcolsep}{6pt}",
    r"\resizebox{\textwidth}{!}{%",
    r"\begin{tabular}{l|" + "c" * len(betas) + "|c}",
    r"\hline",
    "Ticker & " + " & ".join(label for _, _, label in betas) + r" & Adj. $R^2$ \\",
    r"\hline"
]

# Loop over panels
for sector, label in panels.items():
    lines.append(r"\hline")  # Add line before panel header
    lines.append(r"\multicolumn{" + str(len(betas) + 2) + r"}{c}{" + r"\textbf{" + label + r"}} \\")
    lines.append(r"\hline")
    sub_df = df[df["Macro_Sector"] == sector]
    for _, row in sub_df.iterrows():
        ticker = row["TICKER"]
        beta_vals = [f"{row[beta]:.4f}" if pd.notna(row[beta]) else "" for beta, _, _ in betas]
        t_vals = [format_tstat(row[t]) if t in row else "" for _, t, _ in betas]

        # First row: beta estimates
        lines.append(f"{ticker} & " + " & ".join(beta_vals) + f" & {row['Adj_R_squared']:.3f} \\\\")

        # Second row: t-stats with stars
        lines.append(" " * len(ticker) + " & " + " & ".join(t_vals) + r" & \\")

# End table
lines += [
    r"\hline",
    r"\end{tabular}",
    r"}",  # closes \resizebox
    r"\caption{Regression Results by Sector: Coefficients and T-Stats with Significance Levels (Adj. $R^2$ only)}",
    r"\label{tab:regression_results_by_sector}",
    r"\end{table}"
]

# Save to file
with open("Output/Sector_Regression_Table_TickerLevel_AdjR2Only.tex", "w") as f:
    f.writelines(line + "\n" for line in lines)


In [8]:
import pandas as pd

# Load CSV without ESG columns
df = pd.read_csv("Input/1147_climate_dataset_fama_french_3_regression_results.csv")
df = df[df['TICKER'].isin(df_newsecond['TICKER'])]


# Define macro sector panels
panels = {
    "Industrials": "Panel A: Industrials",
    "Consumer": "Panel B: Consumer",
    "Energy & Utilities": "Panel C: Energy & Utilities"
}

# Define available + missing beta columns 
betas = [
    ("Alpha", "T_Alpha", r"$\alpha$"),
    ("Beta_MktRF", "T_Beta_MktRF", r"$\beta_{MKT}$"),
    ("Beta_SMB", "T_Beta_SMB", r"$\beta_{SMB}$"),
    ("Beta_HML", "T_Beta_HML", r"$\beta_{HML}$"),
    ("", "", r"$\beta_{ENV\_Score}$"),  # left blank
    ("", "", r"$\beta_{ENV\_Sentiment}$")  # left blank
]

# Helper: format t-stat with significance stars
def format_tstat(tval):
    if pd.isna(tval):
        return ""
    abs_t = abs(tval)
    if abs_t >= 2.58:
        stars = "***"
    elif abs_t >= 1.96:
        stars = "**"
    elif abs_t >= 1.65:
        stars = "*"
    else:
        stars = ""
    return f"({tval:.2f}){stars}"

# Start LaTeX table lines
lines = [
    r"\begin{table}[ht]",
    r"\centering",
    r"\small",
    r"\renewcommand{\arraystretch}{1.2}",
    r"\setlength{\tabcolsep}{6pt}",
    r"\resizebox{\textwidth}{!}{%",
    r"\begin{tabular}{l|" + "c" * len(betas) + "|c}",
    r"\hline",
    "Ticker & " + " & ".join(label for _, _, label in betas) + r" & Adj. $R^2$ \\",
    r"\hline"
]

# Loop over panels
for sector, label in panels.items():
    lines.append(r"\hline")
    lines.append(r"\multicolumn{" + str(len(betas) + 1) + r"}{c}{\textbf{" + label + r"}} \\")
    lines.append(r"\hline")
    sub_df = df[df["Macro_Sector"] == sector]
    for _, row in sub_df.iterrows():
        ticker = row["TICKER"]
        beta_vals = []
        t_vals = []

        for beta_col, t_col, _ in betas:
            if beta_col:
                beta_val = f"{row[beta_col]:.4f}" if pd.notna(row[beta_col]) else ""
                t_val = format_tstat(row[t_col]) if pd.notna(row[t_col]) else ""
            else:
                beta_val = ""
                t_val = ""
            beta_vals.append(beta_val)
            t_vals.append(t_val)

        # First row: coefficients
        lines.append(f"{ticker} & " + " & ".join(beta_vals) + f" & {row['Adj_R_squared']:.3f} \\\\")

        # Second row: t-stats
        lines.append(" " * len(ticker) + " & " + " & ".join(t_vals) + r" & \\")

# End table
lines += [
    r"\hline",
    r"\end{tabular}",
    r"}",
    r"\caption{Fama-French Regression Results by Sector (No ENV): Coefficients and T-Stats}",
    r"\label{tab:regression_results_by_sector_no_env}",
    r"\end{table}"
]

with open("Output/Sector_FFRegression_Table_TickerLevel_AdjR2Only.tex", "w") as f:
    f.writelines(line + "\n" for line in lines)



## ESG VALUES ##

In this section there is the code for the robustness checks, in particular we check for predictive power of our text-based ESG score and ESG sentiment (using climate-BERT, hence focused on the environmental component).

In [9]:
import pandas as pd

df_ESGdiscl = pd.read_excel("Input/ESG_disclosed.xlsx")
df_ESGdiscl['Unnamed: 1'].unique()

array(['MMM', 'ABT', 'ABBV.K', 'ACN', 'ADBE.O', 'AAP', 'AMD.O', 'AES',
       'A', 'APD', 'ABNB.O', 'AKAM.O', 'ALK', 'ALB', 'ARE', 'ALGN.O',
       'ALLE.K', 'LNT.O', 'GOOGL.O', 'MO', 'AMZN.O', 'AMCR.K', 'AEE',
       'AAL.O', 'AEP.O', 'AMT', 'AWK', 'AME', 'AMGN.O', 'APH', 'ADI.O',
       'ANSS.O', 'APA.O', 'AAPL.OQ', 'AMAT.O', 'APTV.K', 'ADM', 'ANET.K',
       'T', 'ATO', 'ADSK.O', 'ADP.O', 'AZO', 'AVB', 'AVY', 'AXON.O',
       'BKR.O', 'BALL.K', 'BBWI.K', 'BAX', 'BDX', 'BBY', 'BIO', 'TECH.O',
       'BIIB.O', 'BA', 'BKNG.O', 'BXP', 'BSX', 'BMY', 'AVGO.O', 'BR',
       'BFb', 'BLDR.K', 'BG', 'CHRW.O', 'CDNS.O', 'CZR.O', 'CPT', 'CPB.O',
       'KMX', 'CCL', 'CARR.K', 'CTLT.K^L24', 'CAT', 'CBRE.K', 'CDW.O',
       'CE', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'CHTR.O', 'CPK', 'CVX',
       'CMG', 'CI', 'CTAS.O', 'CSCO.O', 'CLX', 'CMS', 'KO', 'CTSH.O',
       'CL', 'CMCSA.O', 'CAG', 'COP', 'ED', 'STZ', 'CEG.O', 'COO.O',
       'CPRT.O', 'GLW', 'CTVA.K', 'CSGP.O', 'COST.O', 'CTRA.K', 'CCI',
   

In [10]:
df_ESGdiscl['Unnamed: 1'] = df_ESGdiscl['Unnamed: 1'].str.replace(r'\..*', '', regex=True)
df_ESGdiscl = df_ESGdiscl.rename({'Unnamed: 1' : 'ticker'}, axis=1)
df_ESGdiscl['ticker'].unique()
for col in ['SIC Industry Name', 'SIC Industry Code']:
    df_ESGdiscl[col] = df_ESGdiscl.groupby('Unnamed: 0')[col].transform(lambda x: x.ffill().bfill())
df_ESGdiscl['year'] = pd.to_datetime(df_ESGdiscl['Update Date']).dt.year
df_ESGdiscl.drop(columns=['Update Date'], inplace=True)
df_ESGdiscl

  df_ESGdiscl[col] = df_ESGdiscl.groupby('Unnamed: 0')[col].transform(lambda x: x.ffill().bfill())


Unnamed: 0.1,Unnamed: 0,ticker,ISIN,SIC Industry Name,SIC Industry Code,ESG Score,year
0,3M Company,MMM,US88579Y1010,Surgical and Medical Instruments and Apparatus,3841.0,88.786099,2023
1,3M Company,MMM,,Surgical and Medical Instruments and Apparatus,3841.0,91.163507,2022
2,3M Company,MMM,,Surgical and Medical Instruments and Apparatus,3841.0,89.653487,2021
3,3M Company,MMM,,Surgical and Medical Instruments and Apparatus,3841.0,93.415343,2020
4,3M Company,MMM,,Surgical and Medical Instruments and Apparatus,3841.0,89.862456,2019
...,...,...,...,...,...,...,...
2597,Zoetis Inc.,ZTS,,Pharmaceutical Preparations,2834.0,72.278767,2022
2598,Zoetis Inc.,ZTS,,Pharmaceutical Preparations,2834.0,65.930350,2021
2599,Zoetis Inc.,ZTS,,Pharmaceutical Preparations,2834.0,64.851895,2020
2600,Zoetis Inc.,ZTS,,Pharmaceutical Preparations,2834.0,59.349309,2019


In [11]:
import pandas as pd

df_ESGcontro = pd.read_excel("Input/ESG_controv.xlsx", sheet_name='Sheet2')

df_ESGcontro['TICKER'] = df_ESGcontro['TICKER'].str.replace(r'\..*', '', regex=True)
df_ESGcontro = df_ESGcontro.rename({'TICKER' : 'ticker'}, axis=1)
#df_ESGcontro['ticker'].unique()
df_ESGcontro.drop(columns=['Unnamed: 0'], inplace=True)
df_ESGcontro['year'] = pd.to_datetime(df_ESGcontro['Date']).dt.year
df_ESGcontro.drop(columns=['Date'], inplace=True)
df_ESGcontro = df_ESGcontro[~df_ESGcontro['year'].isin([2024, 2016, 2017])]
df_ESGcontro


Unnamed: 0,ticker,Company Name,ESG Controversies Score,ESG,year
0,KO,Coca-Cola Co,4.166667,,2022
1,KO,,41.666667,,2021
2,KO,,23.529412,,2020
3,KO,,86.666667,,2019
4,KO,,5.555556,,2018
...,...,...,...,...,...
484,FTNT,,100.000000,,2022
485,FTNT,,100.000000,,2021
486,FTNT,,100.000000,,2020
487,FTNT,,100.000000,,2019


In [12]:
df_merged = pd.merge(df_ESGcontro, df_ESGdiscl, on=['ticker','year'], how='inner')
df_merged = df_merged.drop(["Company Name", "ESG", 'Unnamed: 0','SIC Industry Name', 'SIC Industry Code'], axis = 1)
df_merged


Unnamed: 0,ticker,ESG Controversies Score,year,ISIN,ESG Score
0,KO,4.166667,2022,US1912161007,79.266819
1,KO,41.666667,2021,US1912161007,82.877818
2,KO,23.529412,2020,US1912161007,82.474984
3,KO,86.666667,2019,US1912161007,66.467946
4,KO,5.555556,2018,US1912161007,68.274220
...,...,...,...,...,...
410,FTNT,100.000000,2022,US34959E1091,53.983296
411,FTNT,100.000000,2021,US34959E1091,52.785179
412,FTNT,100.000000,2020,US34959E1091,56.260001
413,FTNT,100.000000,2019,US34959E1091,51.094981


In [13]:
# checking for missing values
df_merged['ticker'].unique() 
df_merged

df_merged = df_merged.drop_duplicates(subset=['ticker', 'year'], keep='first')
count = df_merged['ticker'].value_counts()
count

ticker
ADP     6
PENN    6
DHR     6
RHI     6
EFX     6
       ..
KMX     5
JNJ     5
ROST    5
CDNS    5
KO      5
Name: count, Length: 69, dtype: int64

In [14]:
# Filter tickers that do not have data for the year 2023
tickers_with_2023 = df_merged[df_merged['year'] == 2023]['ticker'].unique()
tickers_missing_2023 = df_merged[~df_merged['ticker'].isin(tickers_with_2023)]['ticker'].unique()

# Convert the list of missing tickers to a Python list
tickers_missing_2023_list = tickers_missing_2023.tolist()

# Print the list
print("Tickers missing data for 2023:")
print(tickers_missing_2023_list)

Tickers missing data for 2023:
['KO', 'WMT', 'AAP', 'KMX', 'ROST']


In [15]:
# Identify tickers with and without data for 2023
tickers_with_2023 = df_merged[df_merged['year'] == 2023]['ticker'].unique()
tickers_missing_2023 = df_merged[~df_merged['ticker'].isin(tickers_with_2023)]['ticker'].unique()

# Filter rows for tickers missing 2023
df_missing_2023 = df_merged[df_merged['ticker'].isin(tickers_missing_2023)].copy()

# Calculate the unconditional mean for each ticker
mean_values = df_missing_2023.groupby('ticker')[['ESG Controversies Score', 'ESG Score']].mean().reset_index()

# Create a new DataFrame for the missing 2023 values
df_imputed_2023 = mean_values.copy()
df_imputed_2023['year'] = 2023
df_imputed_2023['missing'] = 1  # Add the 'missing' column with value 1 to keep track of added values

# Merge the imputed values back into the original DataFrame
df_merged['missing'] = 0  # Add a 'missing' column to the original DataFrame with default value 0
df_mergednew = pd.concat([df_merged, df_imputed_2023], ignore_index=True)

# Verify the result
df_mergednew

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merged['missing'] = 0  # Add a 'missing' column to the original DataFrame with default value 0


Unnamed: 0,ticker,ESG Controversies Score,year,ISIN,ESG Score,missing
0,KO,4.166667,2022,US1912161007,79.266819,0
1,KO,41.666667,2021,US1912161007,82.877818,0
2,KO,23.529412,2020,US1912161007,82.474984,0
3,KO,86.666667,2019,US1912161007,66.467946,0
4,KO,5.555556,2018,US1912161007,68.274220,0
...,...,...,...,...,...,...
406,AAP,82.258065,2023,,55.641350,1
407,KMX,88.679245,2023,,50.420308,1
408,KO,32.316993,2023,,75.872357,1
409,ROST,90.487421,2023,,51.221610,1


In [16]:
df_esgscores = pd.read_csv("Input/companies_env_scores.csv")
df_esgscores = df_esgscores.drop({'Item_1_ESG_Exposure','Item_1_ESG_Sentiment', 'Item_1A_ESG_Exposure','Item_1A_ESG_Sentiment','Item_7_ESG_Exposure','Item_7_ESG_Sentiment','Item_8_ESG_Exposure','Item_8_ESG_Sentiment', 'cik_str'}, axis=1)

df_esgscores

Unnamed: 0,year,company,cik,SIC,Overall_ESG_Exposure,Overall_ESG_Sentiment
0,2018,ABBOTT LABORATORIES,1800,2834,0.028207,0.475459
1,2019,ABBOTT LABORATORIES,1800,2834,0.036716,0.490040
2,2020,ABBOTT LABORATORIES,1800,2834,0.032571,0.559597
3,2021,ABBOTT LABORATORIES,1800,2834,0.015178,0.387513
4,2022,ABBOTT LABORATORIES,1800,2834,0.027045,0.539837
...,...,...,...,...,...,...
476,2019,"Evergy, Inc.",1711269,4931,0.222074,0.134712
477,2020,"Evergy, Inc.",1711269,4931,0.315844,0.179391
478,2021,"Evergy, Inc.",1711269,4931,0.123626,0.172710
479,2022,"Evergy, Inc.",1711269,4931,0.246674,0.266962


In [17]:
#using the cik to retreive the ticker for merging
from sec_cik_mapper import StockMapper

# Load the other CSV containing the tickers to keep
df_tickersreg = pd.read_csv("Input/1147_climate_dataset_fama_french_3_regression_results.csv")  # Replace with the actual path
tickers_to_keep = set(df_tickersreg['TICKER'].unique())

mapper = StockMapper()
cik_to_ticker = mapper.cik_to_tickers

df_esgscores['cik_str'] = df_esgscores['cik'].astype(str).str.zfill(10)

# Map ciks to tickers
df_esgscores['ticker'] = df_esgscores['cik_str'].map(
    lambda x: next(iter(cik_to_ticker.get(x, [])), None)
)

# Drop rows where the ticker is not in the list of tickers to keep
df_esgscores = df_esgscores[df_esgscores['ticker'].isin(tickers_to_keep)]

df_esgscores


Unnamed: 0,year,company,cik,SIC,Overall_ESG_Exposure,Overall_ESG_Sentiment,cik_str,ticker
0,2018,ABBOTT LABORATORIES,1800,2834,0.028207,0.475459,0000001800,ABT
1,2019,ABBOTT LABORATORIES,1800,2834,0.036716,0.490040,0000001800,ABT
2,2020,ABBOTT LABORATORIES,1800,2834,0.032571,0.559597,0000001800,ABT
3,2021,ABBOTT LABORATORIES,1800,2834,0.015178,0.387513,0000001800,ABT
4,2022,ABBOTT LABORATORIES,1800,2834,0.027045,0.539837,0000001800,ABT
...,...,...,...,...,...,...,...,...
476,2019,"Evergy, Inc.",1711269,4931,0.222074,0.134712,0001711269,EVRG
477,2020,"Evergy, Inc.",1711269,4931,0.315844,0.179391,0001711269,EVRG
478,2021,"Evergy, Inc.",1711269,4931,0.123626,0.172710,0001711269,EVRG
479,2022,"Evergy, Inc.",1711269,4931,0.246674,0.266962,0001711269,EVRG


In [18]:
# Merge df_esgscores with df_merged on common columns (e.g., 'ticker' and 'year')
df_final = pd.merge(df_mergednew, df_esgscores, on=['ticker', 'year'], how='inner')

# Display the merged DataFrame
df_final

Unnamed: 0,ticker,ESG Controversies Score,year,ISIN,ESG Score,missing,company,cik,SIC,Overall_ESG_Exposure,Overall_ESG_Sentiment,cik_str
0,KO,4.166667,2022,US1912161007,79.266819,0,COCA COLA CO,21344,2840,0.078912,0.229123,0000021344
1,KO,41.666667,2021,US1912161007,82.877818,0,COCA COLA CO,21344,2840,0.313117,0.205587,0000021344
2,KO,23.529412,2020,US1912161007,82.474984,0,COCA COLA CO,21344,2086,0.308487,0.215993,0000021344
3,KO,86.666667,2019,US1912161007,66.467946,0,COCA COLA CO,21344,2086,0.338436,0.174795,0000021344
4,KO,5.555556,2018,US1912161007,68.274220,0,COCA COLA CO,21344,2086,0.340771,0.133088,0000021344
...,...,...,...,...,...,...,...,...,...,...,...,...
377,AAP,82.258065,2023,,55.641350,1,ADVANCE AUTO PARTS INC NT,1158449,5531,0.071080,0.318056,0001158449
378,KMX,88.679245,2023,,50.420308,1,CARMAX INC,1170010,5521,0.054342,0.168597,0001170010
379,KO,32.316993,2023,,75.872357,1,COCA COLA CO,21344,2840,0.082674,0.254026,0000021344
380,ROST,90.487421,2023,,51.221610,1,"ROSS STORES, INC.",745732,5651,0.031641,0.629155,0000745732


In [19]:
# Get the tickers in df_merged and df_esgscores
tickers_merged = set(df_mergednew['ticker'].unique())
tickers_esgscores = set(df_esgscores['ticker'].unique())

# Get the tickers in the resulting df_final
tickers_final = set(df_final['ticker'].unique())

# Find the tickers that were left out
tickers_left_out = (tickers_merged | tickers_esgscores) - tickers_final

# Display the tickers that were left out
print("Tickers left out in the merge:")
print(tickers_left_out)

Tickers left out in the merge:
{'T', 'ELV', 'ORCL', 'LW', 'GLW', 'EL', 'SWK', 'RCL', 'SO', 'SWKS'}


In [20]:
# Find tickers left out in the merge
tickers_left_out = (tickers_merged | tickers_esgscores) - tickers_final

tickers_in_merged = tickers_left_out.intersection(tickers_merged)
tickers_in_esgscores = tickers_left_out.intersection(tickers_esgscores)

print("Tickers left out that were present in df_merged:")
print(tickers_in_merged)

print("Tickers left out that were present in df_esgscores:")
print(tickers_in_esgscores)

print('\nTotal Tickers df_merged\n',df_mergednew['ticker'].unique())
print('\nTotal Tickers df_esgscores\n', df_esgscores['ticker'].unique())

Tickers left out that were present in df_merged:
{'ELV', 'GLW', 'ORCL', 'SO', 'SWKS'}
Tickers left out that were present in df_esgscores:
{'T', 'LW', 'EL', 'SWK', 'RCL'}

Total Tickers df_merged
 ['KO' 'CDNS' 'XRAY' 'CHTR' 'GM' 'HII' 'XYL' 'APTV' 'ABBV' 'NCLH' 'ZTS'
 'PEP' 'IQV' 'SLB' 'HLT' 'ANET' 'GLW' 'SO' 'ABT' 'EXC' 'JNJ' 'PPG' 'PPL'
 'FE' 'WEC' 'PEG' 'EVRG' 'AEP' 'PNW' 'OMC' 'WMB' 'NI' 'CMI' 'SWKS' 'ADP'
 'DHR' 'RHI' 'EFX' 'WMT' 'VZ' 'ADBE' 'TRMB' 'REGN' 'AES' 'BIIB' 'BSX'
 'MCHP' 'ORLY' 'ORCL' 'IT' 'PENN' 'DLTR' 'ELV' 'TTWO' 'APH' 'BBY' 'CTSH'
 'BKNG' 'AKAM' 'ISRG' 'MDLZ' 'AAP' 'KMX' 'WYNN' 'CF' 'UAL' 'ROST' 'PM'
 'FTNT']

Total Tickers df_esgscores
 ['ABT' 'AEP' 'ADP' 'KO' 'CMI' 'OMC' 'EFX' 'PEP' 'PPG' 'SLB' 'SWK' 'UAL'
 'WMT' 'WMB' 'JNJ' 'DHR' 'RHI' 'VZ' 'T' 'ROST' 'IT' 'BBY' 'PNW' 'WEC'
 'PEG' 'ADBE' 'CDNS' 'XRAY' 'APH' 'MCHP' 'TRMB' 'REGN' 'AES' 'BIIB' 'RCL'
 'BSX' 'ORLY' 'PENN' 'PPL' 'DLTR' 'TTWO' 'EL' 'FE' 'ISRG' 'CTSH' 'BKNG'
 'AKAM' 'CHTR' 'MDLZ' 'EXC' 'NI' 'AAP' 'KMX' 'W

#### Regressions

In [21]:
df_final

Unnamed: 0,ticker,ESG Controversies Score,year,ISIN,ESG Score,missing,company,cik,SIC,Overall_ESG_Exposure,Overall_ESG_Sentiment,cik_str
0,KO,4.166667,2022,US1912161007,79.266819,0,COCA COLA CO,21344,2840,0.078912,0.229123,0000021344
1,KO,41.666667,2021,US1912161007,82.877818,0,COCA COLA CO,21344,2840,0.313117,0.205587,0000021344
2,KO,23.529412,2020,US1912161007,82.474984,0,COCA COLA CO,21344,2086,0.308487,0.215993,0000021344
3,KO,86.666667,2019,US1912161007,66.467946,0,COCA COLA CO,21344,2086,0.338436,0.174795,0000021344
4,KO,5.555556,2018,US1912161007,68.274220,0,COCA COLA CO,21344,2086,0.340771,0.133088,0000021344
...,...,...,...,...,...,...,...,...,...,...,...,...
377,AAP,82.258065,2023,,55.641350,1,ADVANCE AUTO PARTS INC NT,1158449,5531,0.071080,0.318056,0001158449
378,KMX,88.679245,2023,,50.420308,1,CARMAX INC,1170010,5521,0.054342,0.168597,0001170010
379,KO,32.316993,2023,,75.872357,1,COCA COLA CO,21344,2840,0.082674,0.254026,0000021344
380,ROST,90.487421,2023,,51.221610,1,"ROSS STORES, INC.",745732,5651,0.031641,0.629155,0000745732


In [22]:
#i want to ckeck for industy effects, trying to check if different sic's groups have better prediction power

def classify_industry(sic):
    if 1 <= sic <= 999:
        return "Agriculture"
    elif 1000 <= sic <= 1499:
        return "Mining"
    elif 1500 <= sic <= 1799:
        return "Construction"
    elif 2000 <= sic <= 3999:
        return "Manufacturing"
    elif 4000 <= sic <= 4899:
        return "Transportation"
    elif 4900 <= sic <= 4999:
        return "Utilities"
    elif 5000 <= sic <= 5199:
        return "Wholesale"
    elif 5200 <= sic <= 5999:
        return "Retail"
    elif 6000 <= sic <= 6799:
        return "Finance"
    elif 7000 <= sic <= 8999:
        return "Services"
    elif 9000 <= sic <= 9999:
        return "Public"
    else:
        return "Missing"

def assign_macro_sector(industry):
    if industry in ['Manufacturing', 'Transportation']:
        return 'Industrials'
    elif industry in ['Retail', 'Services']:
        return 'Consumer'
    elif industry in ['Utilities', 'Mining']:
        return 'Energy & Utilities'
    else:
        return 'Other'
    
df_finalcopy = df_final.copy()
df_finalcopy['Industry'] = df_finalcopy['SIC'].apply(classify_industry)
df_finalcopy['Macro_Sector'] = df_finalcopy['Industry'].apply(assign_macro_sector)
df_finalcopy

Unnamed: 0,ticker,ESG Controversies Score,year,ISIN,ESG Score,missing,company,cik,SIC,Overall_ESG_Exposure,Overall_ESG_Sentiment,cik_str,Industry,Macro_Sector
0,KO,4.166667,2022,US1912161007,79.266819,0,COCA COLA CO,21344,2840,0.078912,0.229123,0000021344,Manufacturing,Industrials
1,KO,41.666667,2021,US1912161007,82.877818,0,COCA COLA CO,21344,2840,0.313117,0.205587,0000021344,Manufacturing,Industrials
2,KO,23.529412,2020,US1912161007,82.474984,0,COCA COLA CO,21344,2086,0.308487,0.215993,0000021344,Manufacturing,Industrials
3,KO,86.666667,2019,US1912161007,66.467946,0,COCA COLA CO,21344,2086,0.338436,0.174795,0000021344,Manufacturing,Industrials
4,KO,5.555556,2018,US1912161007,68.274220,0,COCA COLA CO,21344,2086,0.340771,0.133088,0000021344,Manufacturing,Industrials
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,AAP,82.258065,2023,,55.641350,1,ADVANCE AUTO PARTS INC NT,1158449,5531,0.071080,0.318056,0001158449,Retail,Consumer
378,KMX,88.679245,2023,,50.420308,1,CARMAX INC,1170010,5521,0.054342,0.168597,0001170010,Retail,Consumer
379,KO,32.316993,2023,,75.872357,1,COCA COLA CO,21344,2840,0.082674,0.254026,0000021344,Manufacturing,Industrials
380,ROST,90.487421,2023,,51.221610,1,"ROSS STORES, INC.",745732,5651,0.031641,0.629155,0000745732,Retail,Consumer


In [23]:
import pandas as pd
import statsmodels.api as sm
from pathlib import Path

# Prepare the dataset
df = df_finalcopy.copy()
df = df.sort_values(by=["ticker", "year"])  
df["ESG_disclosed_t1"] = df.groupby("ticker")["ESG Score"].shift(-1)
df["Controversy_t1"] = df.groupby("ticker")["ESG Controversies Score"].shift(-1)
df["ESGScore_x_Sentiment"] = df["Overall_ESG_Exposure"] * df["Overall_ESG_Sentiment"]

# Define sectors (only the ones that exist in the Macro_Sector column)
sectors = ["Consumer", "Industrials", "Energy & Utilities"]

# Define model specifications (y, [x1, x2, ...])
models = {
    "(1)": ("ESG_disclosed_t1", ["Overall_ESG_Exposure"]),
    "(2)": ("Controversy_t1", ["Overall_ESG_Sentiment"]),
    "(3)": ("ESG_disclosed_t1", ["Overall_ESG_Exposure", "Overall_ESG_Sentiment"]),
    "(4)": ("ESG_disclosed_t1", ["Overall_ESG_Exposure", "Overall_ESG_Sentiment", "ESGScore_x_Sentiment"])
}

# Collect results
results = {"All": {}}
for sector in sectors:
    results[sector] = {}

for label, (y, X) in models.items():
    # Drop NA rows
    full_df = df[[y] + X].dropna()
    y_full = full_df[y]
    X_full = sm.add_constant(full_df[X])
    results["All"][label] = sm.OLS(y_full, X_full).fit()

    for sector in sectors:
        sector_df = df[df["Macro_Sector"] == sector][[y] + X].dropna()
        if sector_df.empty:
            results[sector][label] = None
            continue
        y_sec = sector_df[y]
        X_sec = sm.add_constant(sector_df[X])
        results[sector][label] = sm.OLS(y_sec, X_sec).fit()

def significance_stars(tval):
    if pd.isna(tval): return ""
    abs_t = abs(tval)
    if abs_t >= 2.58: return "***"
    elif abs_t >= 1.96: return "**"
    elif abs_t >= 1.65: return "*"
    else: return ""

def format_tstat(tval):
    return f"({tval:.2f})" if pd.notna(tval) else ""


# Coefficients to extract
variables = [
    ("const", r"\textbf{Constant}"),
    ("Overall_ESG_Exposure", r"\textbf{ENV Score}"),
    ("Overall_ESG_Sentiment", r"\textbf{ENV Sentiment}"),
    ("ESGScore_x_Sentiment", r"\textbf{Exposure $\times$ Sentiment}")
]

# Build LaTeX table
lines = [
    r"\begin{landscape}",
    r"\begin{table}[ht]",
    r"\centering",
    r"\small",
    r"\renewcommand{\arraystretch}{1.2}",
    r"\setlength{\tabcolsep}{6pt}",
    r"\resizebox{\linewidth}{!}{%",
    r"\begin{tabular}{lcccccccccccccccc}",
    r"\toprule",
    r"& \multicolumn{4}{c}{\textbf{All}} & \multicolumn{4}{c}{\textbf{Consumer}} & \multicolumn{4}{c}{\textbf{Industrials}} & \multicolumn{4}{c}{\textbf{Energy \& Utilities}} \\",
    r"\cmidrule(lr){2-5} \cmidrule(lr){6-9} \cmidrule(lr){10-13} \cmidrule(lr){14-17}",
    r"& (1) & (2) & (3) & (4) & (1) & (2) & (3) & (4) & (1) & (2) & (3) & (4) & (1) & (2) & (3) & (4) \\",
    r"\midrule"
]

for var, label in variables:
    coef_row = [label]
    tstat_row = [""]
    for group in ["All", "Consumer", "Industrials", "Energy & Utilities"]:
        for model_id in ["(1)", "(2)", "(3)", "(4)"]:
            model = results[group].get(model_id)
            if model and var in model.params:
                coef = model.params[var]
                tval = model.tvalues[var]
                stars = significance_stars(tval)
                coef_row.append(f"{coef:.3f}{stars}")
                tstat_row.append(format_tstat(tval))
            else:
                coef_row.append("")
                tstat_row.append("")
    lines.append(" & ".join(coef_row) + r" \\")
    lines.append(" & ".join(tstat_row) + r" \\")


# Add summary statistics
stats = [
    ("nobs", "Observations"),
    ("rsquared", "R-squared"),
    ("rsquared_adj", "Adj. R-squared"),
    ("fvalue", "F-statistic"),
    ("f_pvalue", "p-value (F-statistic)"),
    ("aic", "AIC"),
    ("bic", "BIC")
]

for attr, label in stats:
    row = [label]
    for group in ["All", "Consumer", "Industrials", "Energy & Utilities"]:
        for model_id in ["(1)", "(2)", "(3)", "(4)"]:
            model = results[group].get(model_id)
            val = getattr(model, attr, None) if model else None
            if pd.notna(val):
                if attr == "nobs":
                    row.append(f"{int(val)}")
                else:
                    row.append(f"{val:.3f}")
            else:
                row.append("")
    lines.append(" & ".join(row) + r" \\")

# Finalize table
lines += [
    r"\bottomrule",
    r"\end{tabular}",
    r"} % end resizebox",
    r"\caption{Text-based variables predictive power. Regression results by model specification and macro sector.}",
    r"\vspace{2ex}",
    r"\begin{minipage}{\linewidth}",
    r"\footnotesize",
    r"\textit{Note:} Models (1), (3), and (4) employ $ESG\_Score_{t+1}$ as the dependent variable, whereas Model (2) uses $ESG\_Controversies\_Score_{t+1}$.\vspace*{0.5ex} \par In particular:\vspace*{0.5ex} \par Model (1): $ESG\_Score_{i,t+1} = \beta_0 + \beta_1ENV\_Score_{i,t} $\vspace*{0.5ex} \par Model (2): $ESG\_Controversies\_Score_{i,t+1} = \beta_0 + \beta_1ENV\_Sentiment_{i,t} $\vspace*{0.5ex} \par Model (3): $ESG\_Score_{i,t+1} = \beta_0 + \beta_1ENV\_Score_{i,t} + \beta_2ENV\_Sentiment_{i,t} $\vspace*{0.5ex} \par Model (4): $ESG\_Score_{i,t+1} = \beta_0 + \beta_1ENV\_Score_{i,t} + \beta_2ENV\_Sentiment_{i,t} + \beta_3(ENV\_Score_{i,t}\times ENV\_Sentiment_{i,t} )$\vspace*{1ex} \par Each specification is estimated on the full sample (“All”) and separately by macro sector. The ESG Score and ESG Controversies Score variables were obtained from Refinitiv for the period 2018–2023. Standard errors are shown in parentheses. $^{*}p<0.1$, $^{**}p<0.05$, $^{***}p<0.01$.",
    r"\end{minipage}",
    r"\label{tab:all_models}",
    r"\end{table}",
    r"\end{landscape}"
]

# Save to file
with open("Output/sector_model_regressions.tex", "w") as f:
    f.write("\n".join(lines))
