In [1]:
# Importing necessary packages
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Creating the GMB Factors Using Different Percentiles

* The Baseline cases use the 95th and 5th percentiles
* "Stricter Case" uses 97.5th and 2.5th percentiles
* "Wider Case" uses 90th and 10th percentiles

In [2]:
data = pd.read_csv('relevantdata.csv')
data['monthyear'] = data['year'].astype(str) + data['Month'].astype(str).str.zfill(2)

In [3]:
# Drop rows where column 'sector' is equal to 'DidNotIdentify' (can't make assumptions if we don't know the sector)
data = data[data['sector'] != 'DidNotIdentify']

# Drop sectors that comprise of less than 1% of the total dataset
data = data[data['sector'] != 'Management']
data = data[data['sector'] != 'Agriculture']
data = data[data['sector'] != 'ArtsEntRec']

In [4]:
def categorize_method(method):
    if method == 'Reported':
        return 'Reported'
    else:
        return 'Estimated'

data['Method'] = data['AnalyticCO2EstimationMethod'].apply(categorize_method)

In [5]:
dates = pd.date_range(start='2002-01-01', end='2023-12-01', freq='MS')
monthyears = dates.strftime('%Y%m').tolist()
monthyears = np.array(monthyears)

In [6]:
cleaned = pd.get_dummies(data, columns=['Method'], dtype = int)
cleaned.drop('AnalyticCO2EstimationMethod', axis=1, inplace=True)
cleaned.rename(columns={'AnalyticEstimatesCO2EquivalentsEmissionTotal': 'CO2_Total'}, inplace=True)

In [7]:
# Calculate CO2 Emissions Data Transformations
import numpy as np
cleaned['ShiftedCO2'] = cleaned['CO2_Total'] + 0.00000001
cleaned['LogCO2'] = np.log(cleaned['ShiftedCO2'])
cleaned['CO2_Intensity'] = cleaned['CO2_Total']/cleaned['TotalAssets']

# Only need one indicator
cleaned.drop('Method_Reported', axis=1, inplace=True)

In [8]:
def create_FF_factor(data, ESGmeasure, newcolumnname, lowerquant, upperquant):
    portfolio_stocks_cusips = pd.DataFrame()
    portfolio_stocks_sectors = pd.DataFrame()
    portfolio_stocks = pd.DataFrame()
    monthly_factors = np.zeros(shape=(len(monthyears), 2))
    for i in range(len(monthyears)):
        monthly_factors[i][0] = monthyears[i]
        subset = (data[data['monthyear'] == monthyears[i]]).copy()
        
        # Calculate the Lower and Upper Quantiles (as specified)
        q_lower = subset[ESGmeasure].quantile(lowerquant)
        q_upper = subset[ESGmeasure].quantile(upperquant)
        
        # Filter the DataFrame
        # filtered = subset[(subset[ESGmeasure] < q_lower) | (subset[ESGmeasure] > q_upper)]
        # brown stocks have high CO2 emissions
        brown = (subset[(subset[ESGmeasure] > q_upper)]).copy()
        # green stocks have low CO2 emissions
        green = (subset[(subset[ESGmeasure] < q_lower)]).copy()
        
        # subset['rank'] = subset[ESGmeasure].rank(method='first')
        # n = len(subset)
        # bottom_n = int(n*lowerquant)
        # upper_n = int(n*upperquant)
        # green = subset.nsmallest(bottom_n, ESGmeasure).copy()
        # brown = subset.nlargest(upper_n, ESGmeasure).copy()
        
        filtered = pd.concat([green, brown], axis=0)
        portfolio_stocks = pd.concat([portfolio_stocks, filtered], axis=0)
        
        new_stocks = pd.DataFrame({monthyears[i]: filtered['cusip'].to_list()})
        portfolio_stocks_cusips = pd.concat([portfolio_stocks_cusips, new_stocks], axis=1)
        
        new_sectors = pd.DataFrame({monthyears[i]: filtered['sector'].to_list()})
        portfolio_stocks_sectors = pd.concat([portfolio_stocks_sectors, new_sectors], axis=1)

        # Calculate the (equal) weight of each of the stocks in the selected portfolio
        equalweight = 1/filtered.shape[0]

        # Calculate long and short returns
        green['WeightedReturn'] = green['ExcessReturn'] * equalweight
        brown['WeightedReturn'] = brown['ExcessReturn'] * equalweight
        
        green_sum = green['WeightedReturn'].sum()
        brown_sum = brown['WeightedReturn'].sum()

        # Long green stocks and short brown stocks
        monthly_factor = green_sum - brown_sum
        monthly_factors[i][1] = monthly_factor * 100
        
    df = pd.DataFrame(monthly_factors)
    df.columns = ['monthyear', newcolumnname]
    df['monthyear'] = df['monthyear'].astype(str) 
    df['monthyear'] = df['monthyear'].str.replace(r'\.0$', '', regex=True)
    return df, portfolio_stocks_cusips, portfolio_stocks_sectors, portfolio_stocks

In [9]:
cleaned_strict = cleaned.copy()
cleaned_wide = cleaned.copy()

## "Stricter Case" (97.5th and 2.5th percentiles) - Factor Construction

In [10]:
factordata_strict = cleaned_strict[['cusip', 'year', 'Month', 'monthyear', 'Mkt-RF', 'SMB', 'HML', 'RF', 'RMW', 'CMA', 'RET', 'ExcessReturn', 'sector', 'CO2_Total', 'TotalAssets', 'LogCO2', 'CO2_Intensity', 'Method_Estimated']]

In [11]:
l_quant_strict = 0.025
u_quant_strict = 0.975

# LogCO2
LogCO2_factors_strict, LogCO2_cusips_strict, LogCO2_sectors_strict, LogCO2_portfolio_strict = create_FF_factor(cleaned_strict, 'LogCO2', 'GMB_U', l_quant_strict, u_quant_strict)
factordata_strict = pd.merge(factordata_strict, LogCO2_factors_strict, on=['monthyear'])

In [12]:
# CO2 Intensity
CO2Intensity_factors_strict, CO2Intensity_cusips_strict, CO2Intensity_sectors_strict, CO2Intensity_portfolio_strict = create_FF_factor(cleaned_strict, 'CO2_Intensity', 'GMB_S', l_quant_strict, u_quant_strict)
factordata_strict = pd.merge(factordata_strict, CO2Intensity_factors_strict, on=['monthyear'])

In [13]:
# Export factors to .csv file
factordata_strict.to_csv('factordata_strict.csv', index=False)

## "Wider Case" (90th and 10th percentiles) - Factor Construction

In [14]:
factordata_wide = cleaned_wide[['cusip', 'year', 'Month', 'monthyear', 'Mkt-RF', 'SMB', 'HML', 'RF', 'RMW', 'CMA', 'RET', 'ExcessReturn', 'sector', 'CO2_Total', 'TotalAssets', 'LogCO2', 'CO2_Intensity', 'Method_Estimated']]

In [15]:
l_quant_wide = 0.10
u_quant_wide = 0.90

# LogCO2
LogCO2_factors_wide, LogCO2_cusips_wide, LogCO2_sectors_wide, LogCO2_portfolio_wide = create_FF_factor(cleaned_wide, 'LogCO2', 'GMB_U', l_quant_wide, u_quant_wide)
factordata_wide = pd.merge(factordata_wide, LogCO2_factors_wide, on=['monthyear'])

In [16]:
# CO2 Intensity
CO2Intensity_factors_wide, CO2Intensity_cusips_wide, CO2Intensity_sectors_wide, CO2Intensity_portfolio_wide = create_FF_factor(cleaned_wide, 'CO2_Intensity', 'GMB_S', l_quant_wide, u_quant_wide)
factordata_wide = pd.merge(factordata_wide, CO2Intensity_factors_wide, on=['monthyear'])

In [17]:
# Export factors to .csv file
factordata_wide.to_csv('factordata_wide.csv', index=False)