In [1]:
import pandas as pd

# Load file
df = pd.read_excel("raw_data/natural_gas_and_electricity.xlsx")

In [2]:
df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Period,Natural Gas,Electricity,Electricity.1
0,,Location,,MMBTU,KWH,USD Converted
1,BU7,Singapore,Jan to Dec 2021,35467,4395600,
2,BU8,Indonesia,Jan to Dec 2021,34545,6572103,
3,BU9,United Kingdom,Jan to Dec 2021,,926347,
4,BU10,Singapore,Jan to Dec 2021,83408,,26902
5,,,,,,
6,,,,,,
7,,BU9,,,,
8,,Month,kwh,,,
9,,202101,76997,,,


### Data Cleaning

In [3]:
df.dtypes

Unnamed: 0       object
Unnamed: 1       object
Period           object
Natural Gas      object
Electricity      object
Electricity.1    object
dtype: object

In [4]:
df.columns = ["Business Unit", "Location", "Period", "Natural Gas (MMBTU)", "Electricity (kWh)", "Electricity Cost"]

df["Natural Gas (MMBTU)"] = pd.to_numeric(df["Natural Gas (MMBTU)"], errors="coerce")
df["Electricity (kWh)"] = pd.to_numeric(df["Electricity (kWh)"], errors="coerce")

In [5]:
df.head()

Unnamed: 0,Business Unit,Location,Period,Natural Gas (MMBTU),Electricity (kWh),Electricity Cost
0,,Location,,,,USD Converted
1,BU7,Singapore,Jan to Dec 2021,35467.0,4395600.0,
2,BU8,Indonesia,Jan to Dec 2021,34545.0,6572103.0,
3,BU9,United Kingdom,Jan to Dec 2021,,926347.0,
4,BU10,Singapore,Jan to Dec 2021,83408.0,,26902


In [14]:
df["Natural Gas (MMBTU)"].value_counts(), df["Electricity (kWh)"].value_counts()

(Natural Gas (MMBTU)
 35467.0    1
 34545.0    1
 83408.0    1
 Name: count, dtype: int64,
 Electricity (kWh)
 4395600.0    1
 6572103.0    1
 926347.0     1
 Name: count, dtype: int64)

In [8]:
EMISSION_FACTOR_GAS = 0.05311    # tCO2e per MMBTU (natural gas)
EMISSION_FACTOR_ELEC = 0.00023314  # tCO2e per kWh (UK electricity average, 2021)

df["Emissions: Natural Gas (tCO2e)"] = df["Natural Gas (MMBTU)"] * EMISSION_FACTOR_GAS
df["Emissions: Electricity (tCO2e)"] = df["Electricity (kWh)"] * EMISSION_FACTOR_ELEC

In [15]:
df["Total Scope 2 Emissions (tCO2e)"] = (
    df["Emissions: Natural Gas (tCO2e)"].fillna(0) + 
    df["Emissions: Electricity (tCO2e)"].fillna(0)
)

scope2_summary = df[[
    "Business Unit", "Location", "Natural Gas (MMBTU)", "Electricity (kWh)",
    "Emissions: Natural Gas (tCO2e)", "Emissions: Electricity (tCO2e)", "Total Scope 2 Emissions (tCO2e)"
]]

In [16]:
scope2_summary

Unnamed: 0,Business Unit,Location,Natural Gas (MMBTU),Electricity (kWh),Emissions: Natural Gas (tCO2e),Emissions: Electricity (tCO2e),Total Scope 2 Emissions (tCO2e)
0,,Location,,,,,0.0
1,BU7,Singapore,35467.0,4395600.0,1883.65237,1024.790184,2908.442554
2,BU8,Indonesia,34545.0,6572103.0,1834.68495,1532.220093,3366.905043
3,BU9,United Kingdom,,926347.0,,215.96854,215.96854
4,BU10,Singapore,83408.0,,4429.79888,,4429.79888
5,,,,,,,0.0
6,,,,,,,0.0
7,,BU9,,,,,0.0
8,,Month,,,,,0.0
9,,202101,,,,,0.0
