In [1]:
import pandas as pd
import numpy as np
import random

## Utility functions


In [7]:
# Generate TCs
def generate_tcs(countries, elements, years, processes):
    result = []
    for country in countries:
        for element in elements:
            for year in years:
                nb_process = random.randint(2, len(processes))
                processFrom = random.sample(processes, k=nb_process)
                for proc_in in processFrom:
                    nb_outflows = random.randint(2, 6)
                    processTo = random.sample(processes, k=nb_outflows)
                    outflow_sum = 0
                    for proc_out in processTo[:-1]:
                        tc = np.round(random.uniform(0, 1 - outflow_sum), 3)
                        outflow_sum += tc
                        result.append((country, element, year, proc_in, proc_out, tc))
                    result.append(
                        (
                            country,
                            element,
                            year,
                            proc_in,
                            processTo[-1],
                            1 - outflow_sum,
                        )
                    )
    return pd.DataFrame(
        result,
        columns=["country", "material", "year", "processFrom", "processTo", "tc"],
    )


# Generate inflows
def generate_inflows(countries, elements, years, processes):
    result = []
    for country in countries:
        for element in elements:
            for year in years:
                nb_process = random.randint(1, 3)
                processTo = random.sample(processes, k=nb_process)
                for proc_out in processTo:
                    inflow = np.int32(random.uniform(0, 1) * 1000)
                    result.append((country, element, year, "EXT", proc_out, inflow))
    return pd.DataFrame(
        result,
        columns=["country", "material", "year", "processFrom", "processTo", "value"],
    )


# Sanity check
def check_TC_outflows(df, source, target, value):
    sum_TC_outflows = df.pivot_table(
        index=list(df.columns.difference([target, value])), columns=target, values=value
    ).sum(axis=1)
    if not np.allclose(sum_TC_outflows, 1):
        MASK = ~np.isclose(sum_TC_outflows, 1)
        raise ValueError(
            f"sum(TCs) != 1 for the outflows of the following process:\n{sum_TC_outflows.loc[MASK].index}"
        )

## Generate synthetic data


In [None]:
processes = [
    "collection",
    "Sorting",
    "Shredding",
    "Melting",
    "Fabrication",
    "Recycling",
    "Reuse",
    "Pyrolysis",
    "Gasification",
    "Inciniration",
    "Leaching",
    "Landfilling",
]

countries = ["NL", "DE", "FR", "IT", "ES", "UK"]
elements = ["Nickel", "Aluminium", "Cobalt", "Zinc", "Copper", "Iron"]
years = [2023, 2022, 2021, 2020]

In [None]:
tcs = generate_tcs(countries, elements, years, processes)
inflow = generate_inflows(countries, elements, years, processes)
check_TC_outflows(pd.DataFrame(tcs), "processFrom", "processTo", "tc")

## Save data


In [None]:
writer = pd.ExcelWriter("../data/dummy_long.xlsx")
inflow.to_excel(writer, sheet_name="inputs", index=False)
tcs.to_excel(writer, sheet_name="trans_coeff", index=False)
writer.close()