This notebook matches unique nger firm names to publically listed firms during the sample pariod and appends the corresponding ticker. This allows for nger data to be merged with firm data for further analysis.

In [1]:
import os
import pandas as pd
import numpy as np
from fuzzywuzzy import process

In [2]:
working_directory = 'C:/cnolan-thesis/' #set location using back slashes

os.chdir(working_directory)

print("Current working directory: {0}".format(os.getcwd()))


def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
            output_path = os.makedirs(directory)
            print(output_path)
    except OSError:
        print ('Error: Creating directory. ' +  directory)
        

# Folder where outputs will be saved (by default a folder within the working directory) 
createFolder('./output/') 
output_path = working_directory +'./output/'

Current working directory: C:\cnolan-thesis


# Import Data

In [3]:
#THIS DATA IS COLLATED AND PROCESSED FROM NOTEBOOK: 1. Collate Data
nger_data = pd.read_csv('https://raw.githubusercontent.com/connorpn/cnolan-thesis/main/output/nger_data.csv',encoding = "ISO-8859-1")
ms_data = pd.read_csv ('https://raw.githubusercontent.com/connorpn/cnolan-thesis/main/output/ms_data.csv', encoding='latin1')


# Fuzzy Match NGER and Morningstar Firm Names (This Process Takes A Long Time)

In [4]:
ms_firms = ms_data[['ticker', 'morningstar_name']]
ms_firms = ms_firms.drop_duplicates(subset=['morningstar_name'])

ms_firms_list = []
ms_firms_list = ms_data['morningstar_name'].unique().tolist()

nger_firms_list = []
nger_firms_list = nger_data['nger_name'].unique().tolist()


threshold = 95 #note: in preliminary analysis a threshold of 95 provided matching without mismatching 
names_response = []
for name in nger_firms_list:
    resp_match =  process.extractOne(name,ms_firms_list)
    if resp_match[1] > threshold:
         row = {'nger_name':name,'morningstar_name':resp_match[0], 'match_score':resp_match[1]}
         names_response.append(row)

matched_firms = pd.DataFrame(names_response)

matched_firms_tickers = pd.merge(matched_firms[['nger_name', 'morningstar_name', 'match_score']],
                            ms_firms[['ticker', 'morningstar_name']],
                                          on = ['morningstar_name'],
                                          how = 'left')

matched_firms_tickers["matched"] = 1
matched_firms_tickers = matched_firms_tickers.reindex(columns=['nger_name', 'morningstar_name', 'ticker','matched','match_score'])

"Save Matched Firms File"
output_filename = 'matched_firms_tickers.csv'
outputname = output_path + output_filename
matched_firms_tickers.to_csv(outputname, mode='w')
print("Exported File: " + outputname)

#ticker list for datastream series search
matched_tickers = matched_firms_tickers['ticker']

"Save Matched Firms List File"
output_filename = 'matched_tickers_list.csv'
outputname = output_path + output_filename
matched_tickers_list.to_csv(outputname, mode='w')
print("Exported File: " + outputname)

Exported File: C:/cnolan-thesis/./output/matched_firms_tickers.csv
Exported File: C:/cnolan-thesis/./output/matched_tickers_list.csv


# Append Matched Data to NGER Data and Filter (Matched Only NGER Data)

In [5]:
nger_data_matched = pd.merge(nger_data[['year', 'nger_name', 'scope1', 'scope2', 'energy_consumption', 'total_emissions']], matched_firms_tickers[['nger_name', 'morningstar_name', 'match_score', 'ticker','matched']], on = ['nger_name'], how = 'left')
nger_data_matched = nger_data_matched.dropna(axis=0, how= 'any', subset=['matched'])
print(list(nger_data_matched))

['year', 'nger_name', 'scope1', 'scope2', 'energy_consumption', 'total_emissions', 'morningstar_name', 'match_score', 'ticker', 'matched']


In [9]:
nger_data_matched = nger_data_matched.reindex(columns=['year', 'nger_name', 'morningstar_name', 'ticker', 'match_score', 'matched', 'scope1', 'scope2', 'energy_consumption', 'total_emissions'])
nger_data_matched = nger_data_matched.reset_index(drop=True)

"Save Matched NGER Data"
output_filename = 'nger_data_matched.csv'
outputname = output_path + output_filename
nger_data_matched.to_csv(outputname, mode='w', index=False)
print("Exported File: " + outputname)

Exported File: C:/cnolan-thesis/./output/nger_data_matched.csv


# Display Matched Firms

In [7]:
print('Number of Matched Firms:')
print(len(matched_firms_tickers))
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(matched_firms_tickers)

Number of Matched Firms:
91


Unnamed: 0,nger_name,morningstar_name,ticker,matched,match_score
0,abb grain,abb grain,ABB1,1,100
1,aditya birla minerals,aditya birla minerals,ABY1,1,100
2,agl energy,agl energy,AGL,1,100
3,amp,amp,AMP,1,100
4,arrow energy,arrow energy,AOE,1,100
5,asciano,asciano,AIO,1,100
6,babcock & brown infrastructure,babcock & brown infrastructure,BBI,1,100
7,bluescope steel,bluescope steel,BSL,1,100
8,boral,boral,BLD,1,100
9,bradken,bradken,BKN,1,100
