# 08: Collect and transform production costs

In this script, we collect all raw data on production costs, and then combine and transform them to a consistent dataset.

We then generate a sample from that dataset.

In [1]:
%run common_definitions.py

In [2]:
import pandas as pd
import numpy as np

### Biomass production costs for bioethanol production costs

In [3]:
biomass_costs = pd.read_csv("../data/production_costs_biomass.csv", header=1)
biomass_costs

Unnamed: 0,type,cost,currency,year,unit
0,managed forests,1.74,dollar,2018,GJ
1,grass energy crops,2.37,dollar,2018,GJ
2,wood energy crops,2.72,dollar,2018,GJ
3,forest residues,3.53,dollar,2018,GJ
4,agricultural residues,3.7,dollar,2018,GJ


Convert to 2020 euros. Here, we first convert to euros, then adjust for inflation.

In [4]:
HICP = pd.read_csv("../data/HICPdata.csv").set_index("year")
HICP = HICP["average HICP"]
PPP = pd.read_csv("../data/PPPdata.csv").set_index("year")["PPP"]
CPI = pd.read_csv("../data/CPI_US.csv").set_index("year")["CPI"]

def convert_to_euros(currency, year, target_year):
    if currency == "euro":
        return HICP[target_year] / HICP[year]
    if currency == "dollar":
        if year in PPP.index:
            # convert currency then adjust for inflation
            return PPP[year] * (HICP[target_year] / HICP[year])
        else:
            # convert to last year in PPP data
            last_year = max(PPP.index)
            return (CPI[last_year]/CPI[year]) * PPP[last_year] * (HICP[target_year] / HICP[last_year])

In [5]:
target_year = 2020
biomass_costs["conversion factor"] = biomass_costs.apply(lambda x: convert_to_euros(x.currency, x.year, target_year), axis=1)
biomass_costs["cost"] = biomass_costs["cost"] * biomass_costs["conversion factor"]
biomass_costs["currency"] = "euro"
biomass_costs["year"] = target_year

biomass_costs["cost"] = biomass_costs["cost"] * 3.6
biomass_costs["unit"] = "MWh"

The other costs are from the Advanced biofuels report:

In [6]:
other_costs = pd.DataFrame(
    {
        "estimate": ["min", "central", "max"],
        "cost": [60, 70, 108],
    }
)
other_costs["currency"] = "euro"
other_costs["year"] = 2020
other_costs["unit"] = "MWh"

Map between biomass types and the ecoinvent technologies:

In [7]:
tech2type = {
    "bioethanol, switchgrass": "grass energy crops",
    "bioethanol, eucalyptus": "wood energy crops",
    "bioethanol, miscanthus": "grass energy crops",
    "bioethanol, forest residue": "forest residues",
    "bioethanol, willow": "wood energy crops",
    "bioethanol, wheat straw": "agricultural residues",
    "bioethanol, corn stover": "agricultural residues"
}

Putting the two together:

In [8]:
cost = []
estimate = []
names = []
for tech, type in tech2type.items():
    feedstock_cost = biomass_costs.set_index("type").loc[type]["cost"] / 0.4
    for est in ["min", "central", "max"]:
        other = other_costs.set_index("estimate").loc[est]["cost"]
        cost.append(feedstock_cost+other)
        estimate.append(est)
        names.append(tech)

bioethanol_costs = pd.DataFrame(
    {
        "short name": names,
        "cost": cost,
        "estimate": estimate
    }
)

bioethanol_costs["currency"] = "euro"
bioethanol_costs["sector"] = "liquids"
bioethanol_costs["year"] = 2020
bioethanol_costs["unit"] = "MWh"
bioethanol_costs["source"] = "Advanced biofuels + Domingues et al"

bioethanol_costs = bioethanol_costs[["short name", "sector", "cost", "estimate", "currency", "unit", "year", "source"]]

### Refinery production costs

In [9]:
refinery_costs = pd.read_csv("../data/production_costs_refinery_end-user.csv")
refinery_costs

Unnamed: 0,product name,cost,currency,unit,year,source
0,kerosene-type jet fuel,1.97,dollar,gal,2019,https://www.eia.gov/dnav/pet/pet_pri_refoth_dc...
1,motor gasoline,2.245,dollar,gal,2019,https://www.eia.gov/dnav/pet/pet_pri_refoth_dc...
2,no. 2 diesel fuel,2.115,dollar,gal,2019,https://www.eia.gov/dnav/pet/pet_pri_refoth_dc...
3,residual fuel oil,1.584,dollar,gal,2019,https://www.eia.gov/dnav/pet/pet_pri_refoth_dc...


Map fuel names:

In [10]:
fuel2ei = {
    "kerosene-type jet fuel": "kerosene",
    "motor gasoline": "petrol, unleaded",
    "no. 2 diesel fuel": "diesel",
    "residual fuel oil": "heavy fuel oil"
}

refinery_costs["product name"] = refinery_costs["product name"].map(fuel2ei)

### Convert to costs per megajoule

In [11]:
densities = pd.read_csv("../data/fuel_densities.csv")
ncvs = pd.read_csv("../data/NCVs_v2.csv")

densities_dict = dict(zip(densities["product name"], densities["density in kg/l"]))
ncvs_dict = dict(zip(ncvs["product name"], ncvs["NCV in MJ/product"]))

In [12]:
refinery_costs["conversion factor"] = 1 / (refinery_costs["product name"].apply(lambda x: 3.7854 * densities_dict[x] * ncvs_dict[x]))
refinery_costs["cost"] = refinery_costs["cost"] * refinery_costs["conversion factor"]

product2name = {
    "kerosene": "kerosene, refinery",
    "petrol, unleaded": "petrol, refinery",
    "diesel": "diesel, refinery",
    "heavy fuel oil": "heavy fuel oil, refinery"
}

refinery_costs["short name"] = refinery_costs["product name"].map(product2name)
refinery_costs["currency"] = "dollar"
refinery_costs["sector"] = "liquids"
refinery_costs["unit"] = "MJ"
refinery_costs["estimate"] = "central"

refinery_costs = refinery_costs[["short name", "sector", "cost", "estimate", "currency", "unit", "year", "source"]]

### Put all costs together

In [13]:
df = pd.concat([
        pd.read_csv("../data/production_costs.csv"),
        bioethanol_costs,
        refinery_costs,
], axis=0).set_index(["short name", "sector", "source"])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cost,estimate,currency,unit,year
short name,sector,source,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PV commercial,electricity production,LAZARD LCOE v16,24.000000,min,dollar,MWh,2023
PV commercial,electricity production,LAZARD LCOE v16,96.000000,max,dollar,MWh,2023
PV residential,electricity production,LAZARD LCOE v16,117.000000,min,dollar,MWh,2023
PV residential,electricity production,LAZARD LCOE v16,282.000000,max,dollar,MWh,2023
geothermal,electricity production,LAZARD LCOE v16,61.000000,min,dollar,MWh,2023
...,...,...,...,...,...,...,...
"bioethanol, corn stover",liquids,Advanced biofuels + Domingues et al,132.087368,max,euro,MWh,2020
"kerosene, refinery",liquids,https://www.eia.gov/dnav/pet/pet_pri_refoth_dcu_nus_a.htm,0.014742,central,dollar,MJ,2019
"petrol, refinery",liquids,https://www.eia.gov/dnav/pet/pet_pri_refoth_dcu_nus_a.htm,0.018525,central,dollar,MJ,2019
"diesel, refinery",liquids,https://www.eia.gov/dnav/pet/pet_pri_refoth_dcu_nus_a.htm,0.015438,central,dollar,MJ,2019


### Convert to euro 2022 and per MJ

In [14]:
unit2MJ = {
    "MWh": 3600,
    "GJ": 1e03,
    "MJ": 1.0,
    "MBtu": 1055
}

In [15]:
df["conversion factor"] = (df.apply(lambda x: convert_to_euros(x.currency, x.year, EURO_REF_YEAR), axis=1) 
                           / df["unit"].map(unit2MJ))
df["cost"] = df["cost"] * df["conversion factor"]
df["currency"] = "euro"
df["year"] = EURO_REF_YEAR
df["unit"] = "MJ"

In [16]:
df.reset_index().drop("conversion factor",
                      axis=1).to_csv("../data/production_costs_euro_{}.csv".format(EURO_REF_YEAR),
                                     index=False)

## Generate a sample

In [17]:
def draw_costs_from_source(df, size, distribution_type="triangular"):
    """
    Draw 
    """
    df.set_index("estimate", inplace=True)
    if set(df.index) == {"central"}:
        return np.ones(size) * df.loc["central"]["cost"]
    elif set(df.index) == {"min", "max"}:
        low = df.loc["min"]["cost"]
        high = df.loc["max"]["cost"]
        if distribution_type == "triangular":
            return np.random.triangular(low, (high+low)/2, high, size)
    elif set(df.index) == {"central", "min", "max"}:
        low = df.loc["min"]["cost"]
        high = df.loc["max"]["cost"]
        mode = df.loc["central"]["cost"]
        if distribution_type == "triangular":
            return np.random.triangular(low, mode, high, size)
    else: # treat as individual samples
        a = np.array(df["cost"])
        return np.random.choice(a, size)
    
def draw_production_cost_sample(df, size, distribution_type="triangular"):
    """
    Draw a random sample of monetization methods for a given impact category.

    :param df: dataframe containing the monetary valuation coefficients.
    :param impact_category: impact category for which to draw.
    :param size: size of the sample
    :return: list of dictionaries of LCIA method: mvc
    """
    new_idx = df.groupby(["short name", "sector"]).size().index
    samples = []

    for idx in new_idx:
        name = idx[0]
        sector = idx[1]
        sel = df.set_index(["short name", "sector", "source"]).loc[pd.IndexSlice[name, sector, :]].reset_index()                                                         
        options = sel["source"].unique()
        choices = np.random.choice(options, size)
        uniques, counts = np.unique(choices, return_counts=True)
        sample = []
        for m, s in zip(uniques, counts):
            method_df = sel[sel["source"] == m]
            sample += list(draw_costs_from_source(method_df, s, distribution_type=distribution_type))

        samples.append(sample)

    sample_df = pd.DataFrame(
        np.array(samples),
        index=new_idx,
        columns=list(range(size))
    )

    return sample_df
    


In [18]:
df_prodcosts = pd.read_csv("../data/production_costs_euro_{}.csv".format(EURO_REF_YEAR))
df_sample = draw_production_cost_sample(df_prodcosts, MC_SAMPLE_SIZE)

df_sample.melt(
    var_name="sample index",
    value_name="cost",
    ignore_index=False).reset_index().to_csv("../data/production_costs_monte_carlo_sample_euro_{}.csv".format(EURO_REF_YEAR),
                                                                 index=False)