# Preprocessing IEA Electricity Statistics Data

In [None]:

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
    

## Load the Dataset

In [None]:

# Load the dataset
file_path = "MES_0824.csv"  # Update with your file path
data = pd.read_csv(file_path, encoding='latin1', skiprows=7)
    

## Clean and Structure Data

In [None]:

# Assign proper column names
data.columns = data.iloc[0]  # Use the first row as headers
data = data[1:]  # Remove the first row
data.columns = ["Country", "Time", "Balance", "Product", "Value", "Unit"]
    

## Convert Data Types

In [None]:

# Convert 'Value' column to numeric
data["Value"] = pd.to_numeric(data["Value"], errors="coerce")

# Drop rows with NaN in 'Value'
data = data.dropna(subset=["Value"])
    

## Extract Relevant Data

In [None]:

# Extract data for "Net Electricity Production" only
net_electricity = data[data["Balance"] == "Net Electricity Production"]
    

## Aggregate Data by Country and Product

In [None]:

country_product_summary = net_electricity.groupby(["Country", "Product"]).agg(
    {"Value": "sum"}
).reset_index()
    

## Plot Trends

In [None]:

plt.figure(figsize=(12, 8))
sns.barplot(data=country_product_summary, x="Country", y="Value", hue="Product")
plt.xticks(rotation=45)
plt.title("Net Electricity Production by Country and Product")
plt.xlabel("Country")
plt.ylabel("Electricity Production (GWh)")
plt.legend(title="Product")
plt.tight_layout()
plt.show()
    

## Save Preprocessed Data

In [None]:

output_path = "preprocessed_iea_data.csv"
data.to_csv(output_path, index=False)

print(f"Preprocessed data saved to {output_path}")
    