In [14]:
import pandas as pd
from pandas import Series, DataFrame
import os
import warnings
warnings.filterwarnings("ignore")

Original data sources
- https://www.bp.com/en/global/corporate/energy-economics/statistical-review-of-world-energy.html
- Statistical Review of World Energy - all data, 1965-2021
- https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2022-all-data.xlsx 


In [5]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

## Find path to folder for inflation
inflation_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'inflation'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [13]:
## ADJUST BASED ON EACH TECHNOLOGY

# Find and read the CSV file from the raw_data folder
target_file = 'bp-stats-review-2022-all-data.xlsx'
target_file_path = os.path.join(raw_data_path, target_file)

target_inflation_file = 'A001RG3A086NBEA.xls'
target_inflation_path = os.path.join(inflation_data_path, target_inflation_file)

Adjust for inflation

In [15]:
## adjusting for inflation from 2021 USD to 2022
nipa = pd.read_excel(target_inflation_path, header=10)

year_list = []
for x in nipa['observation_date']:
    x = int(str(x)[:4])
    year_list.append(x)
nipa['Year'] = year_list
nipa.set_index('Year', drop=True, inplace=True)
nipa.drop(columns='observation_date', inplace=True)
nipa = nipa.transpose()

infl_2021_2022 = float(nipa[2022]/nipa[2021])
infl_2021_2022

1.069781328847771

## Crude oil price

In [16]:
# crude oil
# numbers appear nominal but bp doesn't say for sure
crude_oil = pd.read_excel(target_file_path, 
                          sheet_name='Oil crude prices since 1861', header=3, usecols=[0,2], skipfooter=5,
                         index_col=0)
crude_oil = crude_oil.transpose()
for col in crude_oil.columns:
    crude_oil[col] = crude_oil[col] * infl_2021_2022
crude_oil['Unit'] = '2022 USD/barrel'
crude_oil['Metric'] = 'Price'
crude_oil['Spatial Scale'] = 'Global'
crude_oil['Technology Name'] = 'Crude Oil'
crude_oil['Data Source'] = 'BP'
crude_oil['Country Name'] = 'World'
crude_oil['Country Code'] = 'World'
crude_oil['ID'] = crude_oil['Technology Name'] + '_' + crude_oil['Metric'] + '_' + crude_oil['Country Code']
crude_oil.set_index('ID', inplace=True)
crude_oil.columns.name = None


Save crude oil file

In [17]:

## Change file name to save
output_file = 'crude_oil_price.csv'

output_file_path = os.path.join(cleaned_data_path, output_file)

crude_oil.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/crude_oil_price.csv


## LNG  price

In [19]:
# liquefied natural gas
# numbers appear nominal but bp doesn't say for sure
lng = pd.read_excel(target_file_path, 
                    sheet_name='Gas Prices ', usecols=[0,1],
                   header=4, index_col=0, skipfooter=8)
lng = lng.transpose()
lng['Technology Name'] = 'Liquefied Natural Gas (LNG)'
lng['Data Source'] = 'BP'
lng['Metric'] = 'Price'
lng['Unit'] = 'USD/million Btu'
lng['Country Name'] = 'Japan'
lng['Country Code'] = 'JP'
lng['Spatial Scale'] = 'National'
lng['ID'] = lng['Technology Name'] + '_' + lng['Metric'] + '_' + lng['Country Code']
lng.set_index('ID', inplace=True)
lng

Unnamed: 0_level_0,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2019,2020,2021,Technology Name,Data Source,Metric,Unit,Country Name,Country Code,Spatial Scale
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Liquefied Natural Gas (LNG)_Price_JP,5.1,5.234653,4.10198,3.352638,3.344109,3.279231,3.644573,3.985942,3.622635,3.522022,...,9.944666,7.781412,10.072739,Liquefied Natural Gas (LNG),BP,Price,USD/million Btu,Japan,JP,National


Save LNG File

In [20]:
output_file = 'lng_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

lng.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/lng_price.csv


## Natural Gas price

In [22]:
# natural gas
# numbers appear nominal but bp doesn't say for sure
nat_gas = pd.read_excel(target_file_path, 
                        sheet_name='Gas Prices ', header=3,
                        index_col=0, usecols=[0,3,6,7], skiprows=[4], na_values='-',skipfooter=8)
nat_gas = nat_gas.transpose()
nat_gas.loc['Rest of World'] = (nat_gas.iloc[0] + nat_gas.iloc[1] + nat_gas.iloc[2]) / 3
nat_gas['Technology Name'] = 'Natural Gas Production'
nat_gas['Data Source'] = 'BP'
nat_gas['Metric'] = 'Price'
nat_gas['Unit'] = 'USD/million Btu'
nat_gas['Country Name'] = ['Germany', 'US', 'Canada', 'Rest of World']
nat_gas['Country Code'] = ['DE', 'US', 'CA', 'Rest of World']
nat_gas['Spatial Scale'] = ['National', 'National','National','Global']
nat_gas['ID'] = nat_gas['Technology Name'] + '_' + nat_gas['Metric'] + '_' + nat_gas['Country Code']
nat_gas.set_index('ID', inplace=True)
nat_gas

Unnamed: 0_level_0,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2019,2020,2021,Technology Name,Data Source,Metric,Unit,Country Name,Country Code,Spatial Scale
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Natural Gas Production_Price_DE,3.995635,4.253587,3.928619,2.547503,2.220359,2.000514,2.775893,3.233289,2.698698,2.511376,...,5.029917,4.062832,8.93667,Natural Gas Production,BP,Price,USD/million Btu,Germany,DE,National
Natural Gas Production_Price_US,,,,,,1.696667,1.638333,1.486667,1.771667,2.120833,...,2.5119,1.988582,3.839473,Natural Gas Production,BP,Price,USD/million Btu,US,US,National
Natural Gas Production_Price_CA,,,,,,,1.05,0.888333,0.979167,1.6925,...,1.267677,1.581569,2.753414,Natural Gas Production,BP,Price,USD/million Btu,Canada,CA,National
Natural Gas Production_Price_Rest of World,,,,,,,1.821409,1.86943,1.81651,2.108236,...,2.936498,2.544328,5.176519,Natural Gas Production,BP,Price,USD/million Btu,Rest of World,Rest of World,Global


Save Nat gas file

In [23]:
output_file = 'nat_gas_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

nat_gas.to_csv(output_file_path)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/nat_gas_price.csv


## Coal Price

In [25]:
# coal
# numbers appear nominal but bp doesn't say for sure
coal = pd.read_excel(target_file_path, 
                     sheet_name='Coal Prices', header=1,
                     index_col=0, usecols=[0,1,2,6], na_values='-',skiprows=[2],skipfooter=6)
coal = coal.transpose()
coal.loc['Rest of World'] = (coal.iloc[0] + coal.iloc[1] + coal.iloc[2]) / 3
coal['Technology Name'] = 'Coal Production'
coal['Data Source'] = 'BP'
coal['Metric'] = 'Price'
coal['Unit'] = 'USD/metric ton'
coal['Country Name'] = ['Europe', 'North America', 'Asia', 'Rest of World']
coal['Country Code'] = ['Europe', 'North America', 'Asia', 'Rest of World']
coal['Spatial Scale'] = ['National', 'National','National','Global']
coal['ID'] = coal['Technology Name'] + '_' + coal['Metric'] + '_' + coal['Country Code']
coal.set_index('ID', inplace=True)
coal

US dollars per tonne,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2019,2020,2021,Technology Name,Data Source,Metric,Unit,Country Name,Country Code,Spatial Scale
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Coal Production_Price_Europe,31.3,39.94,42.08,43.48,42.8,38.53,33.68,37.18,44.5,41.25,...,60.85525,50.164269,121.699808,Coal Production,BP,Price,USD/metric ton,Europe,Europe,National
Coal Production_Price_North America,,,,31.591918,29.010532,28.534539,29.853958,31.716177,27.00635,29.862309,...,57.163984,42.76637,68.538142,Coal Production,BP,Price,USD/metric ton,North America,North America,National
Coal Production_Price_Asia,41.281667,42.465833,48.8625,50.814167,50.295833,48.454167,45.711667,43.661667,47.575,49.535833,...,108.580833,80.5,130.367266,Coal Production,BP,Price,USD/metric ton,Asia,Asia,National
Coal Production_Price_Rest of World,,,,41.962028,40.702122,38.506235,36.415208,37.519281,39.693783,40.216047,...,75.533356,57.810213,106.868405,Coal Production,BP,Price,USD/metric ton,Rest of World,Rest of World,Global


Save coal file

In [26]:
output_file = 'coal_price.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

coal.to_csv(output_file_path)
print("Data saved to:", output_file_path)


Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/coal_price.csv
