In [16]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
rng = np.random.default_rng(42)  # reproducible randomness


## Create Semi-Annual Supply Chain Snapshots for the years 2020-2024

In [17]:
# Define oil, gas, electricity NACE4 lists
oil_nace4_list = [4671, 4730, 610, 1920]
gas_nace4_list = [3521, 3522, 3521]
electricity_nace4_list = [3511, 3512, 3513, 3514]

# Number of firms
n = 100

# Create firm list (100 firms with random NACE4 codes from all categories + filler industries)
all_nace4 = oil_nace4_list + gas_nace4_list + electricity_nace4_list + list(range(100, 2000, 37))
firm_list = pd.DataFrame({
    "id": range(1, n + 1),
    "nace4": rng.choice(all_nace4, size=n, replace=True),
})
firm_list["nace2"] = firm_list["nace4"] // 100

# Function to create energy purchases snapshot for a given semester
def create_snapshot(year, semester):
    G = nx.DiGraph()
    # Add nodes with firm information
    for _, row in firm_list.iterrows():
        G.add_node(row["id"], nace4=row["nace4"], nace2=row["nace2"])

    edges = []

    # General energy purchases: every firm buys some gas + electricity
    for firm_id in firm_list["id"]:
        if firm_id > 3:  # skip the suppliers
            edges.append((2, firm_id, np.random.randint(10**5, 5*10**5)))  # gas
            edges.append((3, firm_id, np.random.randint(10**5, 5*10**5)))  # electricity

    # Oil-dependent firms (say 20 firms)
    oil_firms = rng.choice(firm_list["id"], size=20, replace=False)
    for firm_id in oil_firms:
        edges.append((1, firm_id, np.random.randint(6*10**6, 9*10**6)))

    # Gas-dependent firms (20 firms) with growth
    gas_firms = rng.choice(list(set(firm_list["id"]) - set(oil_firms)), size=20, replace=False)
    for firm_id in gas_firms:
        initial_link = np.random.randint(6*10**6, 9*10**6)
        increment = np.random.randint(5, 10) / 100
        edges.append((2, firm_id, int(initial_link * (1 + increment))))

    # Electricity-dependent firms (20 firms) with growth
    elec_firms = rng.choice(list(set(firm_list["id"]) - set(oil_firms) - set(gas_firms)), size=20, replace=False)
    for firm_id in elec_firms:
        initial_link = np.random.randint(6*10**6, 9*10**6)
        increment = np.random.randint(5, 10) / 100
        edges.append((3, firm_id, int(initial_link * (1 + increment))))

    # Add edges to the graph
    for edge in edges:
        G.add_edge(edge[0], edge[1], weight=edge[2])

    return G

# Generate semi-annual snapshots from 2020_1 to 2024_2
snapshots = {}
for year in range(2020, 2025):
    for semester in [1, 2]:
        label = f"{year}_{semester}"
        snapshots[label] = create_snapshot(year, semester)

print("Created snapshots:", list(snapshots.keys()))
print("Example snapshot has", snapshots["2020_1"].number_of_nodes(), "nodes and", snapshots["2020_1"].number_of_edges(), "edges")


Created snapshots: ['2020_1', '2020_2', '2021_1', '2021_2', '2022_1', '2022_2', '2023_1', '2023_2', '2024_1', '2024_2']
Example snapshot has 100 nodes and 214 edges


## Plot Supply Chain Snapshots

## Aggregate Energy Purchases of Firms

In [14]:
# ------------------------------
# Aggregate purchases (semi-annual)
# ------------------------------

def aggregate_purchases():
    semesters = [f"{year}_{sem}" for year in range(2020, 2025) for sem in [1, 2]]
    
    purchases_dict = {"firm_id": list(range(1, n + 1))}
    
    for sem in semesters:
        G = snapshots[sem]

        oil_vals, gas_vals, elec_vals = [], [], []

        for firm_id in range(1, n + 1):
            oil_purchases = 0
            gas_purchases = 0
            electricity_purchases = 0

            for provider in G.predecessors(firm_id):
                link_weight = G[provider][firm_id]["weight"]

                if provider == 1:  # Oil provider
                    oil_purchases += link_weight
                elif provider == 2:  # Gas provider
                    gas_purchases += link_weight
                elif provider == 3:  # Electricity provider
                    electricity_purchases += link_weight

            oil_vals.append(oil_purchases)
            gas_vals.append(gas_purchases)
            elec_vals.append(electricity_purchases)

        purchases_dict[f"oil_purchases_{sem}"] = oil_vals
        purchases_dict[f"gas_purchases_{sem}"] = gas_vals
        purchases_dict[f"electricity_purchases_{sem}"] = elec_vals
    
    purchases_df = pd.DataFrame(purchases_dict)
    return purchases_df

df_purchases = aggregate_purchases()


# ------------------------------
# Add REVENUE and EMPLOYMENT (annual only)
# ------------------------------

years = range(2020, 2025)

### REVENUE
base_revenue = np.random.randint(10**8, 10**11, size=len(df_purchases), dtype=np.int64)
revenues = {"revenue_2020": base_revenue}
prev = base_revenue.astype(float)

for year in years[1:]:
    prev = prev * np.random.uniform(0.95, 1.05, len(df_purchases))  # ±5% yearly change
    revenues[f"revenue_{year}"] = prev.astype(np.int64)

for col, vals in revenues.items():
    df_purchases[col] = vals


### EMPLOYMENT
base_employment = np.random.randint(5, 501, size=len(df_purchases))
employments = {"employment_2020": base_employment}
prev = base_employment.astype(float)

for year in years[1:]:
    prev = prev * np.random.uniform(0.95, 1.05, len(df_purchases))  # ±5% yearly change
    employments[f"employment_{year}"] = np.round(prev).astype(int)

for col, vals in employments.items():
    df_purchases[col] = vals


### ETS Flag
# Initialize with zeros
df_purchases["ets_flag"] = 0
# Randomly choose 5 firm IDs
ets_firms = np.random.choice(df_purchases["firm_id"], size=5, replace=False)
# Mark them with flag = 1
df_purchases.loc[df_purchases["firm_id"].isin(ets_firms), "ets_flag"] = 1


# ------------------------------
# Add NACE hierarchy info
# ------------------------------
df_purchases["nace2"] = firm_list["nace2"]
df_purchases["nace4"] = firm_list["nace4"]

df_parent_nace = pd.read_csv("data/nace_parent.csv")
nace_parent_dict = dict(zip(df_parent_nace["nace"], df_parent_nace["parent_nace"]))
df_purchases["parent_nace"] = df_purchases["nace2"].map(nace_parent_dict)
df_purchases = df_purchases[df_purchases.parent_nace.notna()]


## Add up fossil purchases
df_purchases["fossil_purchases_2020"] = df_purchases["gas_purchases_2020_1"] + df_purchases["gas_purchases_2020_2"] + df_purchases["oil_purchases_2020_1"] + df_purchases["oil_purchases_2020_2"]
df_purchases["fossil_purchases_2021"] = df_purchases["gas_purchases_2021_1"] + df_purchases["gas_purchases_2021_2"] + df_purchases["oil_purchases_2021_1"] + df_purchases["oil_purchases_2021_2"]
df_purchases["fossil_purchases_2022"] = df_purchases["gas_purchases_2022_1"] + df_purchases["gas_purchases_2022_2"] + df_purchases["oil_purchases_2022_1"] + df_purchases["oil_purchases_2022_2"]
df_purchases["fossil_purchases_2023"] = df_purchases["gas_purchases_2023_1"] + df_purchases["gas_purchases_2023_2"] + df_purchases["oil_purchases_2023_1"] + df_purchases["oil_purchases_2023_2"]
df_purchases["fossil_purchases_2024"] = df_purchases["gas_purchases_2024_1"] + df_purchases["gas_purchases_2024_2"] + df_purchases["oil_purchases_2024_1"] + df_purchases["oil_purchases_2024_2"]

df_purchases


Unnamed: 0,firm_id,oil_purchases_2020_1,gas_purchases_2020_1,electricity_purchases_2020_1,oil_purchases_2020_2,gas_purchases_2020_2,electricity_purchases_2020_2,oil_purchases_2021_1,gas_purchases_2021_1,electricity_purchases_2021_1,...,employment_2024,ets_flag,nace2,nace4,parent_nace,fossil_purchases_2020,fossil_purchases_2021,fossil_purchases_2022,fossil_purchases_2023,fossil_purchases_2024
0,1,6204097,0,0,0,0,7043641,0,6578775,0,...,228,0,35,3522,D,6204097,13047488,6609505,8105922,0
1,2,0,0,0,6850213,0,0,0,0,6863650,...,495,0,14,1469,C,6850213,0,0,0,8981952
2,3,0,0,0,0,9167965,0,0,6906602,0,...,211,0,12,1210,C,9167965,15729501,0,14942532,6966002
3,4,7449272,439993,499757,0,489835,287877,0,380148,7679711,...,449,0,6,692,B,8379100,728681,811406,7056730,561392
4,5,0,8321647,496203,0,317022,199286,8577523,398794,317298,...,277,0,6,692,B,8638669,9377673,15148719,9096390,768168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,0,306805,430165,0,321893,7008354,0,284588,9088257,...,175,0,3,359,A,628698,443796,15261170,551910,14473736
96,97,0,226693,364978,0,7276247,461820,0,404524,122416,...,226,0,2,248,A,7502940,578589,335758,7244240,7327941
97,98,0,442195,299610,0,392816,307560,0,360891,311973,...,399,0,12,1247,C,835011,851993,9857225,6875139,7275589
98,99,0,247358,7876308,0,481485,6864687,0,146945,460926,...,83,0,11,1173,C,728843,9170117,8833915,8318500,7020702


In [15]:
df_purchases.to_csv("data/energy_purchases_revenue_firms_2020-2024.csv", index = False)