In [33]:
# 📦 Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [34]:
print(df.columns.tolist())


['countryCode', 'country', 'pollutantName', 'sectorCode', 'year', 'value', 'unit', 'notation', 'parentSectorCode', 'sectorName']


In [35]:
df.columns = df.columns.str.strip()  # Remove leading/trailing spaces
df.rename(columns={
    'Country': 'country',
    'Pollutant': 'pollutantName',
    'Sector': 'sectorName',
    'Year': 'year',
    'EmissionValue': 'value'
}, inplace=True)


In [36]:
print(df.columns.tolist())


['countryCode', 'country', 'pollutantName', 'sectorCode', 'year', 'value', 'unit', 'notation', 'parentSectorCode', 'sectorName']


In [None]:


# 📁 Constants
DATA_PATH = 'emissions_data.csv'  # Ensure this file exists in your working directory
COUNTRY = 'Austria'
POLLUTANT = 'As'
SECTOR_KEYWORD = 'manure'
RESTORATION_YEAR = 2025
REDUCTION_FACTOR = 0.7

# 📊 Load Dataset
def load_dataset(path):
    if not os.path.exists(path):
        raise FileNotFoundError(f"❌ Dataset not found at: {path}")
    df = pd.read_csv(path, sep='\t', low_memory=False)
    df.columns = df.columns.str.strip()
    return df

# 🔍 Validate Columns
def validate_columns(df, required_cols):
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        raise ValueError(f"❌ Missing columns in dataset: {missing}")

# 🔬 Filter Dataset
def filter_emissions(df, country, pollutant, sector):
    print("🔎 Available countries:", df['country'].unique())
    print("🔎 Available pollutants:", df['pollutantName'].unique())
    print("🔎 Available sectors:", df['sectorName'].unique())

    df_filtered = df[
        df['country'].str.contains(country, case=False, na=False) &
        df['pollutantName'].str.lower().str.contains(pollutant.lower(), na=False) &
        df['sectorName'].str.lower().str.contains(sector.lower(), na=False)
    ].copy()

    df_filtered['year'] = pd.to_numeric(df_filtered['year'], errors='coerce')
    df_filtered['value'] = pd.to_numeric(df_filtered['value'], errors='coerce')
    df_filtered = df_filtered.dropna(subset=['year', 'value'])

    return df_filtered

# 📈 Model Restoration Scenario
def model_restoration(df, year_threshold, reduction_factor):
    df_yearly = df.groupby('year')['value'].sum().reset_index()
    df_yearly['restored'] = df_yearly.apply(
        lambda row: row['value'] * reduction_factor if row['year'] >= year_threshold else row['value'],
        axis=1
    )
    return df_yearly

# 📊 Plot Emissions
def plot_emissions(df_yearly, country, pollutant):
    if df_yearly.empty:
        print("⚠️ No data to plot.")
        return

    plt.figure(figsize=(12, 6))
    sns.lineplot(data=df_yearly, x='year', y='value', label='Original', marker='o', color='darkred')
    sns.lineplot(data=df_yearly, x='year', y='restored', label='Restoration Scenario', marker='o', color='forestgreen')
    plt.title(f'{pollutant} Emissions from Manure Management in {country} with Restoration Scenario')
    plt.xlabel('Year')
    plt.ylabel('Emissions (kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# 🚀 Run Analysis
def run_emissions_analysis():
    df = load_dataset(DATA_PATH)
    validate_columns(df, ['country', 'pollutantName', 'sectorName', 'year', 'value'])

    df_filtered = filter_emissions(df, COUNTRY, POLLUTANT, SECTOR_KEYWORD)
    if df_filtered.empty:
        print("⚠️ No matching data found. Try adjusting filters or inspecting the dataset.")
        return

    df_yearly = model_restoration(df_filtered, RESTORATION_YEAR, REDUCTION_FACTOR)
    plot_emissions(df_yearly, COUNTRY, POLLUTANT)

    print("📊 Grouped emissions data:")
    print(df_yearly)

# 🧠 Execute
if __name__ == "__main__":
    run_emissions_analysis()



🔎 Available countries: ['Austria' 'Belgium' 'Bulgaria' 'Cyprus' 'Czechia' 'Germany' 'Denmark'
 'Estonia' 'Spain' 'Finland' 'France' 'Greece' 'Croatia' 'Hungary'
 'Ireland' 'Italy' 'Lithuania' 'Luxembourg' 'Latvia' 'Malta' 'Netherlands'
 'Poland' 'Portugal' 'Romania' 'Sweden' 'Slovenia' 'Slovakia'
 'Switzerland' 'Iceland' 'Liechtenstein' 'Norway' 'Türkiye' 'EEA32' 'EU27']
🔎 Available pollutants: ['As' 'BC' 'Benzo(a) Pyrene' 'Benzo(b) Fluoranthene'
 'benzo(k) Fluoranthene' 'Cd' 'CO' 'Cr' 'Cu' 'HCB' 'Ni' 'NMVOC'
 'PCDD/PCDF (dioxins/furans)' 'Total PAHs' 'Hg' 'PM10' 'NOx'
 'Indeno (1,2,3-cd) Pyrene' 'TSP' 'PM2.5' 'Pb' 'Zn' 'NH3' 'Se' 'PCBs'
 'SOx' 'Biomass' 'Gaseous Fuels' 'Liquid Fuels' 'Other Fuels'
 'Solid Fuels']
🔎 Available sectors: ['Manure management - Non-dairy cattle' 'Manure management - Sheep'
 'Volcanoes' 'Forest fires'
 'Other natural emissions (please specify in the IIR)'
 'Manure management - Swine' 'Manure management - Buffalo'
 'Public electricity and heat production' 'Ma