In [1]:
import pandas as pd
from pandas import Series, DataFrame
import string
import country_converter as coco

In [2]:
# https://www.bp.com/en/global/corporate/energy-economics/statistical-review-of-world-energy.html
# Statistical Review of World Energy - all data, 1965-2021
# https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2022-all-data.xlsx 


In [3]:
def read_bp(sheet):
    missing_values = ['-', '^','♦']
    df = pd.read_excel('raw data/bp-stats-review-2022-all-data.xlsx', sheet_name=sheet, header=2, 
                       index_col=0, na_values=missing_values)
    omit = []
    for col in df.columns:
        if type(col) != int:
            omit.append(col)
    df = df.drop(columns=omit)
    df = df.dropna(how='all')
    idx = []
    for country in df.index:
        x = country.strip(string.digits)
        idx.append(x)
    df.insert(0, 'Country Name', idx)
    unit = df.index.name.rstrip('*')
    unit = unit.strip(string.digits)
    df.insert(1, 'Unit', unit)
    df.insert(2, 'Data Source', 'BP')
    for country in df.index:
        strings = ['Total', 'Rest of World', 'Other', 'European Union', 'OECD', 'Central America', \
                   'Eastern Africa', 'Middle Africa', 'Western Africa', 'OPEC']
        for s in strings:
            if s in country:
                df = df.drop(country)
    iso2 = []
    for country in df.index:
        if country=='USSR':
            iso2.append('SU')
        elif country=='Netherlands Antilles':
            iso2.append('AN')
        else:
            iso2.append(coco.convert(names=country, to='iso2'))
    df.insert(1, 'Country Code', iso2)
    df.reset_index(drop=True, inplace=True)
    return df

In [4]:
def tech_name(df, tech):
    df.insert(1, 'Technology Name', tech)
    df.insert(0, 'ID', df['Technology Name'] + ', '+ df['Unit'] + ': ' + df['Country Code'])
    df.set_index('ID', inplace=True)
    return df

In [5]:
bp_oil_production = read_bp('Oil Production - Tonnes')
bp_oil_production = tech_name(bp_oil_production, 'Oil Production')
bp_oil_production.to_csv('cleaned data/bp.csv')

In [6]:
bp_oil_refining = read_bp('Oil - Refining capacity')
bp_oil_refining = tech_name(bp_oil_refining, 'Oil Refining Capacity')
bp_oil_refining.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [7]:
bp_gas = read_bp('Gas Production - Bcm')
bp_gas = tech_name(bp_gas, 'Gas Production')
bp_gas.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [8]:
bp_coal = read_bp('Coal Production - Tonnes')
bp_coal = tech_name(bp_coal, 'Coal Production')
bp_coal.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [9]:
bp_nuclear = read_bp('Nuclear Generation - TWh')
bp_nuclear = tech_name(bp_nuclear, 'Nuclear Generation')
bp_nuclear.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [10]:
bp_hydro = read_bp('Hydro Generation - TWh')
bp_hydro = tech_name(bp_hydro, 'Hydro Generation')
bp_hydro.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [11]:
bp_renewable = read_bp('Renewable power - TWh')
bp_renewable = tech_name(bp_renewable, 'Renewable Power Generation')
bp_renewable.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [12]:
bp_wind_gen = read_bp('Wind Generation - TWh')
bp_wind_gen = tech_name(bp_wind_gen, 'Wind Generation')
bp_wind_gen.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [13]:
bp_solar_gen = read_bp('Solar Generation - TWh')
bp_solar_gen = tech_name(bp_solar_gen, 'Solar Generation')
bp_solar_gen.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [14]:
bp_electricity_gen = read_bp('Electricity Generation')
bp_electricity_gen = tech_name(bp_electricity_gen, 'Electricity Generation')
bp_electricity_gen.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [15]:
bp_lithium = read_bp('Lithium Production-Reserves')
bp_lithium = tech_name(bp_lithium, 'Lithium Production')
bp_lithium.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [16]:
bp_cobalt = read_bp('Cobalt Production-Reserves')
bp_cobalt = tech_name(bp_cobalt, 'Cobalt Production')
bp_cobalt.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [17]:
bp_rare_earth = read_bp('Rare Earth Production-Reserves')
bp_rare_earth = tech_name(bp_rare_earth, 'Rare Earth Production')
bp_rare_earth.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [18]:
bp_graphite = read_bp('Graphite Production-Reserves')
bp_graphite = tech_name(bp_graphite, 'Graphite Production')
bp_graphite.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [19]:
def read_bp2(sheet):
    missing_values = ['-', '^','♦']
    df = pd.read_excel('raw data/bp-stats-review-2022-all-data.xlsx', sheet_name=sheet, header=3, 
                       index_col=0, na_values=missing_values)
    omit = []
    for col in df.columns:
        if type(col) != int:
            omit.append(col)
    df = df.drop(columns=omit)
    df = df.dropna(how='all')
    idx = []
    for country in df.index:
        x = country.strip(string.digits)
        idx.append(x)
    df.insert(0, 'Country Name', idx)
    unit = df.index.name.rstrip('*')
    unit = unit.strip(string.digits)
    df.insert(1, 'Unit', unit)
    df.insert(2, 'Data Source', 'BP')
    for country in df.index:
        strings = ['Total', 'Rest of World', 'Other', 'European Union', 'OECD', 'Central America', \
                   'Eastern Africa', 'Middle Africa', 'Western Africa', 'OPEC']
        for s in strings:
            if s in country:
                df = df.drop(country)
    iso2 = []
    for country in df.index:
        if country=='USSR':
            iso2.append('SU')
        elif country=='Netherlands Antilles':
            iso2.append('AN')
        else:
            iso2.append(coco.convert(names=country, to='iso2'))
    df.insert(1, 'Country Code', iso2)
    df.reset_index(drop=True, inplace=True)
    return df

In [20]:
bp_solar = read_bp2('Solar Capacity')
bp_solar = tech_name(bp_solar, 'Solar Capacity')
bp_solar.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [21]:
bp_wind = read_bp2('Wind Capacity')
bp_wind = tech_name(bp_wind, 'Wind Capacity')
bp_wind.to_csv('cleaned data/bp.csv', mode='a', header=False)

In [22]:
def read_bp3(sheet):
    missing_values = ['-', '^','♦']
    df = pd.read_excel('raw data/bp-stats-review-2022-all-data.xlsx', sheet_name=sheet, header=2, index_col=0, 
                       na_values=missing_values, skipfooter=40)
    omit = []
    for col in df.columns:
        if type(col) != int:
            omit.append(col)
    df = df.drop(columns=omit)
    df = df.dropna(how='all')
    idx = []
    for country in df.index:
        x = country.strip(string.digits)
        idx.append(x)
    df.insert(0, 'Country Name', idx)
    unit = df.index.name.rstrip('*')
    unit = unit.strip(string.digits)
    df.insert(1, 'Unit', unit)
    df.insert(2, 'Data Source', 'BP')
    for country in df.index:
        strings = ['Total', 'Rest of World', 'Other', 'European Union', 'OECD', 'Central America', \
                   'Eastern Africa', 'Middle Africa', 'Western Africa', 'OPEC']
        for s in strings:
            if s in country:
                df = df.drop(country)
    iso2 = []
    for country in df.index:
        if country=='USSR':
            iso2.append('SU')
        elif country=='Netherlands Antilles':
            iso2.append('AN')
        else:
            iso2.append(coco.convert(names=country, to='iso2'))
    df.insert(1, 'Country Code', iso2)
    df.reset_index(drop=True, inplace=True)
    return df

In [23]:
bp_biofuels = read_bp3('Biofuels production - PJ')
bp_biofuels = tech_name(bp_biofuels, 'Biofuels Production')
bp_biofuels.to_csv('cleaned data/bp.csv', mode='a', header=False)

NameError: name 'read_bp3' is not defined