In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
working_directory = 'C:/cnolan-thesis/' #set location using back slashes

os.chdir(working_directory)

print("Current working directory: {0}".format(os.getcwd()))


def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
            output_path = os.makedirs(directory)
            print(output_path)
    except OSError:
        print ('Error: Creating directory. ' +  directory)
        

# Folder where outputs will be saved (by default a folder within the working directory) 
createFolder('./output/') 
output_path = working_directory +'./output/'


In [None]:
main_data_determinants = pd.merge(morningstar_data[['ticker', 'morningstar_name', 'year', 'capex', 'eoy_price', 'eps', 'ltdebt', 'marketcap', 'ppe', 'revenue', 'roe', 'stdebt', 'assets', 'liabilities']],
                                  matched_ngerdata[['year', 'nger_name', 'scope1', 'scope2', 'energy_consumption', 'total_emissions', 'morningstar_name', 'match_score', 'ticker']],
                                  on = ['ticker','year'],
                                  how = 'left')

In [None]:
"Construct Dependent Variables"

"Construct Log(emissions)"

#log scope1, scope2, total_emissions, and energy consumption
main_data_determinants['log_scope1'] = np.log(main_data_determinants['scope1'])
main_data_determinants['log_scope2'] = np.log(main_data_determinants['scope2'])
main_data_determinants['log_total_emissions'] = np.log(main_data_determinants['total_emissions'])
main_data_determinants['log_energy_consumption'] = np.log(main_data_determinants['energy_consumption'])

"Construct year by year change in emissions"

#get a list of unique company names
unique_firms_main_data_determinants = (main_data_determinants['ticker'].unique())

#clone each unique company for each observation year (2009-2021)
firms_2009 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2009'})
firms_2010 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2010'})
firms_2011 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2011'})
firms_2012 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2012'})
firms_2013 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2013'})
firms_2014 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2014'})
firms_2015 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2015'})
firms_2016 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2016'})
firms_2017 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2017'})
firms_2018 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2018'})
firms_2019 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2019'})
firms_2020 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2020'})
firms_2021 = pd.DataFrame({'ticker':unique_firms_main_data_determinants, 'year': '2021'})

#create dataframe to add all cloned firm years
firms_allyears = pd.DataFrame(columns = ['ticker', 'year'])

#add all clone firm year dataframes to a list
firm_years_list = [firms_2009, firms_2010, firms_2011, firms_2012, firms_2013, firms_2014, firms_2015, firms_2016, firms_2017, firms_2018, firms_2019, firms_2020, firms_2021]

#concatenate dataframes
firms_allyears = pd.concat(firm_years_list)

#convert year column to date time format
firms_allyears['year'] =  pd.to_datetime(firms_allyears['year'], format='%Y').dt.to_period("Y")

#create dummy level index
main_data_determinants["dummy_index"] = main_data_determinants["dummy_index"] = 1
firms_allyears['dummy_index'] = firms_allyears['dummy_index'] = 2

#concenate cloned firm years with main data file
main_data_determinants = pd.concat([main_data_determinants, firms_allyears])

#sort dataframe by corporation name and year
main_data_determinants = main_data_determinants.sort_values(by=['ticker', 'year','dummy_index'])

#drop duplicates keeping first row (main  data file) as by dummy_index sorting
main_data_determinants = main_data_determinants.drop_duplicates(['ticker', 'year'], keep='first')

#sort dataframe by ticker and year
main_data_determinants = main_data_determinants.sort_values(by=['ticker', 'year'])

#calculate yearly change in emissions by corporation for scope1, scope2, total_emissions, and energy consumption
main_data_determinants['change_scope1'] = main_data_determinants.groupby(['ticker'])['scope1'].diff()
main_data_determinants['change_scope2'] = main_data_determinants.groupby(['ticker'])['scope2'].diff()
main_data_determinants['change_total_emissions'] = main_data_determinants.groupby(['ticker'])['total_emissions'].diff()
main_data_determinants['change_energy_consumption'] = main_data_determinants.groupby(['ticker'])['energy_consumption'].diff()

"Construct Intensity (Emissions)"

## int = (tons CO 2 e/AUD m.)
main_data_determinants['revenue(m)'] = main_data_determinants['revenue'] / 1000000
main_data_determinants['scope1_int'] = main_data_determinants['scope1'] /  main_data_determinants['revenue(m)']
main_data_determinants['scope2_int'] = main_data_determinants['scope2'] /  main_data_determinants['revenue(m)']
main_data_determinants['total_emissions_int'] = main_data_determinants['total_emissions'] /  main_data_determinants['revenue(m)']
main_data_determinants['energy_consumption_int'] = main_data_determinants['energy_consumption'] / main_data_determinants['revenue(m)']

In [None]:
"Construct Control Variables"

#LOGSIZE
main_data_determinants['logsize'] = np.log(main_data_determinants['marketcap'])

#B/M
main_data_determinants['bm'] = (main_data_determinants['assets'] - main_data_determinants['liabilities']) / main_data_determinants['marketcap']

#ROE (already exisits 'roe')

#LEVERAGE
main_data_determinants['totaldebt'] = main_data_determinants.stdebt.fillna(0) + main_data_determinants.ltdebt.fillna(0) #calculate total debt, skipping nan values (this means total debt can be constructed from ltdebt, stdebt, or both)
main_data_determinants['leverage'] = main_data_determinants.totaldebt / main_data_determinants.assets #calculate leverage

#INVEST/A
main_data_determinants['investa'] = main_data_determinants.capex / main_data_determinants.assets

#HHI
#HHI WILL NOT BE INCLUDED DUE TO ABSENCE OF SEGEMENTED BUSINESS REVENUES

#LOGPPE
main_data_determinants['logppe'] = np.log(main_data_determinants['ppe'])

#SALESGR
main_data_determinants = main_data_determinants.sort_values(by=['ticker', 'year']) #sort dataframe by ticker and year
main_data_determinants['revenue_change'] = main_data_determinants.groupby(['ticker'])['revenue'].diff() #calculate yearly change in revenue by firm
main_data_determinants['salesgr'] = main_data_determinants.revenue_change / main_data_determinants.marketcap #salesgr = change in annual revenue normailzed by marketcap

#EPSGR 
main_data_determinants = main_data_determinants.sort_values(by=['ticker', 'year']) #sort dataframe by ticker and year
main_data_determinants['eps_change'] = main_data_determinants.groupby(['ticker'])['eps'].diff() #calculate yearly change in eps by firm
main_data_determinants['epsgr'] = main_data_determinants.eps_change / main_data_determinants.eoy_price

main_data_determinants = main_data_determinants.dropna(axis=0, how= 'any', subset=['match_score'])

final_data = main_data_determinants.loc[(main_data_determinants['year'] >= '2009') & (main_data_determinants['year'] <= '2021')]
final_data = final_data[['ticker', 'morningstar_name_x', 'year', 'log_scope1', 'log_scope2', 'log_total_emissions', 'log_energy_consumption', 'change_scope1', 'change_scope2', 'change_total_emissions', 'change_energy_consumption', 'scope1_int', 'scope2_int', 'total_emissions_int', 'energy_consumption_int', 'logsize', 'bm', 'roe','leverage', 'investa', 'logppe', 'salesgr', 'epsgr']]
final_data = final_data.sort_values(by=['ticker', 'year']).reset_index(drop=True)


In [None]:
final_data_all_vars = final_data.dropna(how = 'any',subset=['log_scope1', 'log_scope2', 'log_total_emissions', 'log_energy_consumption', 'change_scope1', 'change_scope2', 'change_total_emissions', 'change_energy_consumption', 'scope1_int', 'scope2_int', 'total_emissions_int', 'energy_consumption_int', 'logsize', 'bm', 'roe', 'leverage', 'investa', 'logppe', 'salesgr', 'epsgr'])
final_data_all_vars = final_data_all_vars.reset_index(drop=True)

final_data_all_determinants = final_data.dropna(how = 'any',subset= ['logsize', 'bm', 'roe', 'leverage', 'investa', 'logppe', 'salesgr', 'epsgr'])
final_data_all_determinants = final_data_all_determinants.dropna(how='all',subset=['log_scope1', 'log_scope2', 'log_total_emissions', 'log_energy_consumption', 'change_scope1', 'change_scope2', 'change_total_emissions', 'change_energy_consumption', 'scope1_int', 'scope2_int', 'total_emissions_int', 'energy_consumption_int'])
final_data_all_determinants = final_data_all_determinants.reset_index(drop=True)

# Observations with no missing values

In [None]:
print('Number of Observations:')
print (len(final_data_all_vars))

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(final_data_all_vars)


# Observations with no determinant values

I.E some missing carbon variables within the firm year observation

In [None]:
print('Number of Observations:')
print (len(final_data_all_determinants))

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(final_data_all_determinants)