In [1]:
import pandas as pd
from pandas import Series, DataFrame
import os
import country_converter as coco

Data source

- https://www.irena.org/publications/2022/Jul/Renewable-Power-Generation-Costs-in-2021

Get working directory and file paths

In [2]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))


## Find path to folder for inflation
inflation_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'inflation'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [3]:

target_inflation_file = "/A001RG3A086NBEA.xls"
target_inflation_filepath = inflation_data_path + target_inflation_file

target_file = "/cost data/IRENA-Datafile-RenPwrGenCosts-in-2021-v1-0.xlsx"
target_filepath = raw_data_path + target_file

In [4]:
## adjusting for inflation from 2021 USD to 2022
nipa = pd.read_excel(target_inflation_filepath, header=10)

year_list = []
for x in nipa['observation_date']:
    x = int(str(x)[:4])
    year_list.append(x)
nipa['Year'] = year_list
nipa.set_index('Year', drop=True, inplace=True)
nipa.drop(columns='observation_date', inplace=True)
nipa = nipa.transpose()

infl_2021_2022 = float(nipa[2022]/nipa[2021])

  infl_2021_2022 = float(nipa[2022]/nipa[2021])


CSP

In [5]:
# csp
csp = pd.read_excel(target_filepath, sheet_name='Fig 5.1', 
                    header=8, index_col=0, usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13],
                    skiprows=[9,11,12,13,14,15,16,17,18,19,20,21,22,23,25])
for col in csp.columns:
    csp[col] = csp[col] * infl_2021_2022
csp['Metric'] = ['Total Installed Cost', 'Levelized Cost of Energy']
csp['Technology Name'] = 'Concentrating Solar Power'
csp['Country Name'] = 'World'
csp['Country Code'] = 'World'
csp['Spatial Scale'] = 'Global'
csp['Data Source'] = 'IRENA'
csp['Unit'] = ['2022 USD/kW', '2022 USD/kWh']
csp['ID'] = csp['Technology Name'] + '_' + csp['Metric'] + '_' + csp['Country Code']
csp.set_index('ID', inplace=True)

In [6]:
output_file = 'csp_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

csp.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/csp_price.csv


Geothermal

In [7]:
# geothermal total installed cost
geothermal_total_install = pd.read_excel(target_filepath, 
                           sheet_name='Fig 7.2', header=4, usecols=[1,3], index_col=0)
geothermal_total_install = geothermal_total_install.transpose()
for col in geothermal_total_install.columns:
    geothermal_total_install[col] = geothermal_total_install[col] * infl_2021_2022
geothermal_total_install['Technology Name'] = 'Geothermal Energy'
geothermal_total_install['Country Name'] = 'World'
geothermal_total_install['Country Code'] = 'World'
geothermal_total_install['Spatial Scale'] = 'Global'
geothermal_total_install['Data Source'] = 'IRENA'
geothermal_total_install['Metric'] = 'Total Installed Cost'
geothermal_total_install['Unit'] = '2022 USD/kW'
geothermal_total_install['ID'] = geothermal_total_install['Technology Name'] + '_' + geothermal_total_install['Metric'] + '_' + geothermal_total_install['Country Code']
geothermal_total_install.set_index('ID', inplace=True)
geothermal_total_install.columns.name=None

In [8]:
output_file = 'geothermal_total_installed_cost.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

geothermal_total_install.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/geothermal_total_installed_cost.csv


Geothermal LCOE

In [9]:
# geothermal LCOE
geothermal_lcoe = pd.read_excel(target_filepath, 
                           sheet_name='Fig 7.4', header=5, usecols=[1,3], index_col=0)
geothermal_lcoe = geothermal_lcoe.transpose()
for col in geothermal_lcoe.columns:
    geothermal_lcoe[col] = geothermal_lcoe[col] * infl_2021_2022
geothermal_lcoe['Technology Name'] = 'Geothermal Energy'
geothermal_lcoe['Country Name'] = 'World'
geothermal_lcoe['Country Code'] = 'World'
geothermal_lcoe['Spatial Scale'] = 'Global'
geothermal_lcoe['Data Source'] = 'IRENA'
geothermal_lcoe['Metric'] = 'Levelized Cost of Energy'
geothermal_lcoe['Unit'] = '2022 USD/kWh'
geothermal_lcoe['ID'] = geothermal_lcoe['Technology Name'] + '_' + geothermal_lcoe['Metric'] + '_' + geothermal_lcoe['Country Code']
geothermal_lcoe.set_index('ID', inplace=True)
geothermal_lcoe.columns.name=None

In [10]:
output_file = 'geothermal_lcoe.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

geothermal_lcoe.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/geothermal_lcoe.csv


Hydro

In [11]:
# hydro
hydro = pd.read_excel(target_filepath,
                      sheet_name='Fig 6.1', header=6, index_col=0, usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13],
                      skiprows=[7,9,10,11,12,13,14,15,16,17,18,19,20,21,23])
for col in hydro.columns:
    hydro[col] = hydro[col] * infl_2021_2022
hydro['Metric'] = ['Total Installed Cost', 'Levelized Cost of Energy']
hydro['Technology Name'] = 'Hydropower'
hydro['Country Name'] = 'World'
hydro['Country Code'] = 'World'
hydro['Spatial Scale'] = 'Global'
hydro['Data Source'] = 'IRENA'
hydro['Unit'] = ['2022 USD/kW', '2022 USD/kWh']
hydro['ID'] = hydro['Technology Name'] + '_' + hydro['Metric'] + '_' + hydro['Country Code']
hydro.set_index('ID', inplace=True)


In [12]:
output_file = 'hydro_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

hydro.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/hydro_price.csv


Offshore Wind Total Installed

In [13]:
# offshore wind total installed cost
offshore_wind_total_installed = pd.read_excel(target_filepath,
                      sheet_name='Fig 4.6',header=4,usecols=[1,3],index_col=0)
offshore_wind_total_installed = offshore_wind_total_installed.transpose()
for col in offshore_wind_total_installed.columns:
    offshore_wind_total_installed[col] = offshore_wind_total_installed[col] * infl_2021_2022
offshore_wind_total_installed['Metric'] = 'Total Installed Cost'
offshore_wind_total_installed['Technology Name'] = 'Offshore Wind Energy'
offshore_wind_total_installed['Country Name'] = 'World'
offshore_wind_total_installed['Country Code'] = 'World'
offshore_wind_total_installed['Spatial Scale'] = 'Global'
offshore_wind_total_installed['Data Source'] = 'IRENA'
offshore_wind_total_installed['Unit'] = '2022 USD/kW'
offshore_wind_total_installed['ID'] = offshore_wind_total_installed['Technology Name'] + '_' + offshore_wind_total_installed['Metric'] + '_' + offshore_wind_total_installed['Country Code']
offshore_wind_total_installed.set_index('ID', inplace=True)
offshore_wind_total_installed.columns.name=None


In [14]:
output_file = 'offshore_wind_total_installed_cost.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

offshore_wind_total_installed.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/offshore_wind_total_installed_cost.csv


Offshore Wind LCOE

In [15]:
# offshore wind lcoe
offshore_wind_lcoe = pd.read_excel(target_filepath,
                      sheet_name='Fig 4.13',header=3,usecols=[1,3],index_col=0)
offshore_wind_lcoe = offshore_wind_lcoe.transpose()
for col in offshore_wind_lcoe.columns:
    offshore_wind_lcoe[col] = offshore_wind_lcoe[col] * infl_2021_2022
offshore_wind_lcoe['Metric'] = 'Levelized Cost of Energy'
offshore_wind_lcoe['Technology Name'] = 'Offshore Wind Energy'
offshore_wind_lcoe['Country Name'] = 'World'
offshore_wind_lcoe['Country Code'] = 'World'
offshore_wind_lcoe['Spatial Scale'] = 'Global'
offshore_wind_lcoe['Data Source'] = 'IRENA'
offshore_wind_lcoe['Unit'] = '2022 USD/kWh'
offshore_wind_lcoe['ID'] = offshore_wind_lcoe['Technology Name'] + '_' + offshore_wind_lcoe['Metric'] + '_' + offshore_wind_lcoe['Country Code']
offshore_wind_lcoe.set_index('ID', inplace=True)
offshore_wind_lcoe.columns.name=None

In [16]:
output_file = 'offshore_wind_lcoe_global.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

offshore_wind_lcoe.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/offshore_wind_lcoe_global.csv


Onshore wind TIC

In [17]:
# onshore wind, total installed cost, global weighted average
onshore_tic_global = pd.read_excel(target_filepath,
                      sheet_name='Fig 2.4',header=6,usecols=[1,3],index_col=0)
onshore_tic_global = onshore_tic_global.transpose()
for col in onshore_tic_global.columns:
    onshore_tic_global[col] = onshore_tic_global[col] * infl_2021_2022
onshore_tic_global['Metric'] = 'Total Installed Cost'
onshore_tic_global['Technology Name'] = 'Onshore Wind Energy'
onshore_tic_global['Country Name'] = 'World'
onshore_tic_global['Country Code'] = 'World'
onshore_tic_global['Spatial Scale'] = 'Global'
onshore_tic_global['Data Source'] = 'IRENA'
onshore_tic_global['Unit'] = '2022 USD/kW'
onshore_tic_global['ID'] = onshore_tic_global['Technology Name'] + '_' + onshore_tic_global['Metric'] + '_' + onshore_tic_global['Country Code']
onshore_tic_global.set_index('ID', inplace=True)
onshore_tic_global.columns.name=None


In [18]:
output_file = 'onshore_wind_tic_global.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

onshore_tic_global.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/onshore_wind_tic_global.csv


Onshore wind LCOE Global

In [19]:
# onshore wind, lcoe, global weighted average
onshore_lcoe_global = pd.read_excel(target_filepath,
                      sheet_name='Fig 2.12',header=3,usecols=[1,2],index_col=0)
onshore_lcoe_global = onshore_lcoe_global.transpose()
for col in onshore_lcoe_global.columns:
    onshore_lcoe_global[col] = onshore_lcoe_global[col] * infl_2021_2022
onshore_lcoe_global['Metric'] = 'Levelized Cost of Energy'
onshore_lcoe_global['Technology Name'] = 'Onshore Wind Energy'
onshore_lcoe_global['Country Name'] = 'World'
onshore_lcoe_global['Country Code'] = 'World'
onshore_lcoe_global['Spatial Scale'] = 'Global'
onshore_lcoe_global['Data Source'] = 'IRENA'
onshore_lcoe_global['Unit'] = '2022 USD/kWh'
onshore_lcoe_global['ID'] = onshore_lcoe_global['Technology Name'] + '_' + onshore_lcoe_global['Metric'] + '_' + onshore_lcoe_global['Country Code']
onshore_lcoe_global.set_index('ID', inplace=True)
onshore_lcoe_global.columns.name=None

In [20]:
output_file = 'onshore wind_lcoe.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

onshore_lcoe_global.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/onshore wind_lcoe.csv


Onshore TIC Country

In [21]:
# onshore wind, total installed cost, country-level
onshore_tic_country = pd.read_excel(target_filepath,
                    sheet_name='Fig 2.5',header=6,
                    usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39],
                    index_col=0)
for col in onshore_tic_country.columns:
    onshore_tic_country[col] = onshore_tic_country[col] * infl_2021_2022
onshore_tic_country['Metric'] = 'Total Installed Cost'
onshore_tic_country['Technology Name'] = 'Onshore Wind Energy'
onshore_tic_country['Spatial Scale'] = 'National'
onshore_tic_country['Data Source'] = 'IRENA'
onshore_tic_country['Unit'] = '2022 USD/kW'
onshore_tic_country['Country Name'] = onshore_tic_country.index
country_codes = []
for country in onshore_tic_country.index:
    code = coco.convert(names=country, to='iso2')
    country_codes.append(code)
onshore_tic_country['Country Code'] = country_codes
onshore_tic_country['ID'] = onshore_tic_country['Technology Name'] + '_' + onshore_tic_country['Metric'] + '_' + onshore_tic_country['Country Code']
onshore_tic_country.set_index('ID', inplace=True)


In [22]:
output_file = 'onshore_tic_country.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

onshore_tic_country.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/onshore_tic_country.csv


Onshore Wind LCOE Country

In [23]:
# onshore wind, lcoe, country-level
onshore_lcoe_country = pd.read_excel(target_filepath,
                      sheet_name='Fig 2.13',header=6,
                    usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39],
                    index_col=0)
for col in onshore_lcoe_country.columns:
    onshore_lcoe_country[col] = onshore_lcoe_country[col] * infl_2021_2022
onshore_lcoe_country['Metric'] = 'Levelized Cost of Energy'
onshore_lcoe_country['Technology Name'] = 'Onshore Wind Energy'
onshore_lcoe_country['Spatial Scale'] = 'National'
onshore_lcoe_country['Data Source'] = 'IRENA'
onshore_lcoe_country['Unit'] = '2022 USD/kWh'
onshore_lcoe_country['Country Name'] = onshore_lcoe_country.index
country_codes = []
for country in onshore_lcoe_country.index:
    code = coco.convert(names=country, to='iso2')
    country_codes.append(code)
onshore_lcoe_country['Country Code'] = country_codes
onshore_lcoe_country['ID'] = onshore_lcoe_country['Technology Name'] + '_' + onshore_lcoe_country['Metric'] + '_' + onshore_lcoe_country['Country Code']
onshore_lcoe_country.set_index('ID', inplace=True)


In [24]:
output_file = 'onshore_lcoe_country.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

onshore_lcoe_country.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/onshore_lcoe_country.csv


Solar PV Global

In [25]:
# solar pv, 3.1, global weighted average tic and lcoe
solar_pv_global = pd.read_excel(target_filepath,
                      sheet_name='Fig 3.1',header=8,usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13], index_col=0,
                skiprows=[9,11,12,13,14,15,16,17,18,19,20,21,22,23,25])
for col in solar_pv_global.columns:
    solar_pv_global[col] = solar_pv_global[col] * infl_2021_2022
solar_pv_global['Metric'] = ['Total Installed Cost', 'Levelized Cost of Energy']
solar_pv_global['Technology Name'] = 'Solar Photovoltaic'
solar_pv_global['Country Name'] = 'World'
solar_pv_global['Country Code'] = 'World'
solar_pv_global['Spatial Scale'] = 'Global'
solar_pv_global['Data Source'] = 'IRENA'
solar_pv_global['Unit'] = ['2022 USD/kW', '2022 USD/kWh']
solar_pv_global['ID'] = solar_pv_global['Technology Name'] + '_' + solar_pv_global['Metric'] + '_' + solar_pv_global['Country Code']
solar_pv_global.set_index('ID', inplace=True)


In [26]:
output_file = 'solar_pv_global_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

solar_pv_global.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/solar_pv_global_price.csv


Solar TIC Country

In [27]:
# solar pv, 3.4, tic country
solar_tic_country = pd.read_excel(target_filepath,
                      sheet_name='Figure 3.4',header=3,usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13], index_col=0)
for col in solar_tic_country.columns:
    solar_tic_country[col] = solar_tic_country[col] * infl_2021_2022
solar_tic_country['Metric'] = 'Total Installed Cost'
solar_tic_country['Technology Name'] = 'Solar Photovoltaic'
solar_tic_country['Spatial Scale'] = 'National'
solar_tic_country['Data Source'] = 'IRENA'
solar_tic_country['Unit'] = '2022 USD/kW'
solar_tic_country['Country Name'] = solar_tic_country.index
country_codes = []
for country in solar_tic_country.index:
    code = coco.convert(names=country, to='iso2')
    country_codes.append(code)
solar_tic_country['Country Code'] = country_codes
solar_tic_country['ID'] = solar_tic_country['Technology Name'] + '_' + solar_tic_country['Metric'] + '_' + solar_tic_country['Country Code']
solar_tic_country.set_index('ID', inplace=True)


In [28]:


output_file = 'solar_pv_tic_country.csv.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

solar_tic_country.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/solar_pv_tic_country.csv.csv


Solar LCOE

In [29]:
# solar pv, 3.8, lcoe country
solar_lcoe_country = pd.read_excel(target_filepath,
                      sheet_name='Fig 3.8',header=5,usecols=[2,3,4,5,6,7,8,9,10,11,12,13,14],index_col=0)
for col in solar_lcoe_country.columns:
    solar_lcoe_country[col] = solar_lcoe_country[col] * infl_2021_2022
solar_lcoe_country['Metric'] = 'Levelized Cost of Energy'
solar_lcoe_country['Technology Name'] = 'Solar Photovoltaic'
solar_lcoe_country['Spatial Scale'] = 'National'
solar_lcoe_country['Data Source'] = 'IRENA'
solar_lcoe_country['Unit'] = '2022 USD/kWh'
solar_lcoe_country['Country Name'] = solar_lcoe_country.index
country_codes = []
for country in solar_lcoe_country.index:
    code = coco.convert(names=country, to='iso2')
    country_codes.append(code)
solar_lcoe_country['Country Code'] = country_codes
solar_lcoe_country['ID'] = solar_lcoe_country['Technology Name'] + '_' + solar_lcoe_country['Metric'] + '_' + solar_lcoe_country['Country Code']
solar_lcoe_country.set_index('ID', inplace=True)


In [30]:
output_file = 'solar_pv_lcoe_country.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

solar_lcoe_country.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/solar_pv_lcoe_country.csv


Bioenergy

In [31]:
# bioenergy
bioenergy = pd.read_excel(target_filepath, 
                          sheet_name='Fig 8.1', header=6, usecols=[1,2,3,4,5,6,7,8,9,10,11,12,13],index_col=0,
                         skiprows=[7,9,10,11,12,13,14,15,16,17,18,19,20,21,23])
for col in bioenergy.columns:
    bioenergy[col] = bioenergy[col] * infl_2021_2022
bioenergy['Metric'] = ['Total Installed Cost', 'Levelized Cost of Energy']
bioenergy['Technology Name'] = 'Bioenergy'
bioenergy['Country Name'] = 'World'
bioenergy['Country Code'] = 'World'
bioenergy['Spatial Scale'] = 'Global'
bioenergy['Data Source'] = 'IRENA'
bioenergy['Unit'] = ['2022 USD/kW', '2022 USD/kWh']
bioenergy['ID'] = bioenergy['Technology Name'] + '_' + bioenergy['Metric'] + '_' + bioenergy['Country Code']
bioenergy.set_index('ID', inplace=True)


In [32]:
output_file = 'bioenergy_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

bioenergy.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/bioenergy_price.csv
