In [5]:
import pandas as pd

# CO₂ Total Emissions laden
df_co2 = pd.read_csv('../data/raw/co2_total.csv')

# Nur Jahre ab 1950
df_co2 = df_co2[df_co2['Year'] >= 1950]

# Länder ohne ISO-Code ausschließen
df_co2 = df_co2[df_co2['ISO 3166-1 alpha-3'].notna()]

# Zeilen ohne 'Total'-Wert entfernen
df_co2 = df_co2[df_co2['Total'].notna()]

# Index zurücksetzen
df_co2.reset_index(drop=True, inplace=True)

# Spalten umbenennen
df_co2.rename(columns={
    'ISO 3166-1 alpha-3': 'ISO3',
    'Per Capita': 'PerCapita'
}, inplace=True)

# Fehlende Unterwerte durch 0 ersetzen
cols_to_fill = ['Coal', 'Oil', 'Gas', 'Cement', 'Flaring', 'Other', 'PerCapita']
df_co2[cols_to_fill] = df_co2[cols_to_fill].fillna(0)

# Speichern
df_co2.to_csv('../data/processed/co2_cleaned_total.csv', index=False)
print("Datei gespeichert: co2_cleaned_total.csv")

# Ergebnis anzeigen
display(df_co2.head())

Datei gespeichert: co2_cleaned_total.csv


Unnamed: 0,Country,ISO3,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,PerCapita
0,Afghanistan,AFG,1950,0.084272,0.021068,0.063204,0.0,0.0,0.0,0.0,0.011266
1,Afghanistan,AFG,1951,0.0916,0.025648,0.065952,0.0,0.0,0.0,0.0,0.012098
2,Afghanistan,AFG,1952,0.0916,0.031708,0.059892,0.0,0.0,0.0,0.0,0.011946
3,Afghanistan,AFG,1953,0.106256,0.037949,0.068307,0.0,0.0,0.0,0.0,0.013685
4,Afghanistan,AFG,1954,0.106256,0.042502,0.063754,0.0,0.0,0.0,0.0,0.013511


In [2]:
# WEC-Datensatz laden
df_wec = pd.read_csv('../data/raw/World Energy Consumption.csv')

# Filter: ab 1965, gültiger ISO-Code, gültige Gesamtenergie
df_wec = df_wec[
    (df_wec['year'] >= 1965) &
    (df_wec['iso_code'].notna()) &
    (df_wec['primary_energy_consumption'].notna())
]

# Spalten umbenennen
df_wec.rename(columns={
    'country': 'Country',
    'iso_code': 'ISO3',
    'year': 'Year',
    'primary_energy_consumption': 'PrimaryEnergy',
    'fossil_fuel_consumption': 'FossilEnergy',
    'renewables_consumption': 'RenewablesEnergy',
    'nuclear_consumption': 'NuclearEnergy',
    'electricity_generation': 'ElectricityGenTotal',
    'population': 'Population',
    'gdp': 'GDP'
}, inplace=True)

# Nur relevante Spalten extrahieren
cols = ['Country', 'ISO3', 'Year', 'PrimaryEnergy', 'FossilEnergy',
        'RenewablesEnergy', 'NuclearEnergy', 'ElectricityGenTotal', 'Population', 'GDP']
df_wec_cleaned = df_wec[cols].copy()

# Fehlende Energiearten durch 0 ersetzen
cols_to_fill = ['FossilEnergy', 'RenewablesEnergy', 'NuclearEnergy', 'ElectricityGenTotal']
df_wec_cleaned[cols_to_fill] = df_wec_cleaned[cols_to_fill].fillna(0)

# Speichern
df_wec_cleaned.to_csv('../data/processed/wec_cleaned.csv', index=False)
print("Datei gespeichert: wec_cleaned.csv")

# Ergebnis anzeigen
display(df_wec_cleaned.head())

Datei gespeichert: wec_cleaned.csv


Unnamed: 0,Country,ISO3,Year,PrimaryEnergy,FossilEnergy,RenewablesEnergy,NuclearEnergy,ElectricityGenTotal,Population,GDP
103,Afghanistan,AFG,1980,7.791,0.0,0.0,0.0,0.0,12486640.0,15329840000.0
104,Afghanistan,AFG,1981,8.777,0.0,0.0,0.0,0.0,11155196.0,15645340000.0
105,Afghanistan,AFG,1982,9.348,0.0,0.0,0.0,0.0,10088290.0,15980410000.0
106,Afghanistan,AFG,1983,11.436,0.0,0.0,0.0,0.0,9951447.0,16755330000.0
107,Afghanistan,AFG,1984,11.489,0.0,0.0,0.0,0.0,10243689.0,17072150000.0


In [None]:
# Merge Bereich: Zusammenführung bereinigter Datensätze

In [5]:
import os
import pandas as pd

# Merge co2-total und World Energy Consumption
# Bereinigte Datensätze laden
df_co2 = pd.read_csv('../data/processed/co2_cleaned_total.csv')
df_wec = pd.read_csv('../data/processed/wec_cleaned.csv')

# Merge nach ISO3 und Year
df_merged = pd.merge(df_co2, df_wec, on=['ISO3', 'Year'], how='inner')

# Ergebnis anzeigen
print("Merge erfolgreich:", df_merged.shape)
display(df_merged.head())

# Datei speichern
df_merged.to_csv('../data/merge/co2_wec_merged.csv', index=False)
print("Datei gespeichert: co2_wec_merged.csv im Ordner data/merge/")

Merge erfolgreich: (9540, 19)


Unnamed: 0,Country_x,ISO3,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,PerCapita,Country_y,PrimaryEnergy,FossilEnergy,RenewablesEnergy,NuclearEnergy,ElectricityGenTotal,Population,GDP
0,Afghanistan,AFG,1980,1.756302,0.315762,0.925256,0.187254,0.023284,0.304747,0.0,0.140655,Afghanistan,7.791,0.0,0.0,0.0,0.0,12486640.0,15329840000.0
1,Afghanistan,AFG,1981,1.978463,0.333424,1.014928,0.304112,0.032879,0.29312,0.0,0.177358,Afghanistan,8.777,0.0,0.0,0.0,0.0,11155196.0,15645340000.0
2,Afghanistan,AFG,1982,2.094581,0.38472,0.992944,0.395712,0.039077,0.282128,0.0,0.207625,Afghanistan,9.348,0.0,0.0,0.0,0.0,10088290.0,15980410000.0
3,Afghanistan,AFG,1983,2.519954,0.38472,1.220112,0.615552,0.00645,0.29312,0.0,0.253225,Afghanistan,11.436,0.0,0.0,0.0,0.0,9951447.0,16755330000.0
4,Afghanistan,AFG,1984,2.82154,0.392556,1.133644,0.931863,0.047963,0.315513,0.0,0.275442,Afghanistan,11.489,0.0,0.0,0.0,0.0,10243689.0,17072150000.0


Datei gespeichert: co2_wec_merged.csv im Ordner data/merge/


In [None]:
# World Development Indicators Data Preparation

In [1]:
import pandas as pd

# 1. Rohdaten einlesen
df_wdi       = pd.read_csv('data/raw/WDIData.csv')      # alle Indikatoren, Länder & Jahre
df_countries = pd.read_csv('data/raw/WDICountry.csv')   # Metadaten zu Länder-Codes
df_series    = pd.read_csv('data/raw/WDISeries.csv')    # Metadaten zu Serien-Codes

# 2. Erste Übersicht
print("WDIData shape:", df_wdi.shape)
print("WDIData columns:", df_wdi.columns.tolist())
print("Countries head:\n", df_countries.head())
print("Series head:\n", df_series.head())



FileNotFoundError: [Errno 2] No such file or directory: 'data/raw/WDIData.csv'