In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

# Country Configuration laden
import sys
sys.path.append('./utils')
from country_config import country_colors, g7, eu_core, extra_countries, get_selected_countries

# Plotting-Konfiguration
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

print("Bibliotheken und Country Configuration geladen.")

Bibliotheken und Country Configuration geladen.


In [2]:
# CO2 & ENERGIE DATENANALYSE

In [7]:
# OWID CO2 & Energy Dataset Understanding
df_co2_energy = pd.read_csv('../data/processed/co2_energy_data.csv')

print(f"CO2 & Energy Dataset analysiert: {df_co2_energy.shape}")
print(f"Zeitraum: {df_co2_energy['Year'].min()} - {df_co2_energy['Year'].max()}")
print(f"Länder: {df_co2_energy['Country'].nunique()}")

# Deskriptive Statistik
key_vars = ['CO2_total', 'CO2_per_capita', 'Primary_energy', 'Population', 'GDP']
print(f"\nDESKRIPTIVE STATISTIK")
desc_stats = df_co2_energy[key_vars].describe()
display(desc_stats)

# Zeitliche Datenverfügbarkeit
year_coverage = df_co2_energy.groupby('Year').size()
print(f"\nZeitliche Datenverfügbarkeit:")
print(f"Durchschnittliche Länder pro Jahr: {year_coverage.mean():.1f}")
print(f"Datenverfügbarkeit: {year_coverage.min()} - {year_coverage.max()} Länder pro Jahr")

# Top CO2-Emittenten Analyse
latest_year = df_co2_energy['Year'].max()
latest_data = df_co2_energy[df_co2_energy['Year'] == latest_year].copy()

print(f"\nCO2-EMISSIONEN ANALYSE ({latest_year})")

# Strategische Länderauswahl anwenden
selected_countries = get_selected_countries(df_co2_energy, 'CO2_total')
print(f"Strategisch ausgewählte Länder für Analyse: {len(selected_countries)}")
print(f"G7: {g7}")
print(f"EU Core: {eu_core}")
print(f"Extra Countries: {extra_countries}")

# Top 15 global vs. strategische Länder
top_emitters_global = latest_data.nlargest(15, 'CO2_total')[['Country', 'CO2_total', 'CO2_per_capita']]
print(f"\nTop 15 CO2-Emittenten global:")
display(top_emitters_global)

# Strategische Länder in den Daten prüfen
selected_data = latest_data[latest_data['Country'].isin(selected_countries)]
print(f"\nStrategische Länder in den Daten: {len(selected_data)} von {len(selected_countries)}")
if len(selected_data) > 0:
    selected_emissions = selected_data[['Country', 'CO2_total', 'CO2_per_capita']].sort_values('CO2_total', ascending=False)
    print(f"CO2-Emissionen der strategischen Länder:")
    display(selected_emissions)

# CO2-Quellen Analyse
co2_sources = ['CO2_coal', 'CO2_oil', 'CO2_gas', 'CO2_cement', 'CO2_flaring', 'CO2_other']
available_sources = [col for col in co2_sources if col in df_co2_energy.columns]

if available_sources:
    print(f"\nCO2-Emissionen nach Quellen (globale Summen):")
    source_totals = latest_data[available_sources].sum().sort_values(ascending=False)
    for source, total in source_totals.items():
        percentage = (total / source_totals.sum()) * 100
        print(f"  {source}: {total:.1f} Mt ({percentage:.1f}%)")

# Energie-Analyse
energy_countries = df_co2_energy[df_co2_energy['Primary_energy'].notna()]
print(f"\nENERGIE-ANALYSE")
print(f"Länder mit Energiedaten: {energy_countries['Country'].nunique()}")

if len(energy_countries) > 0:
    top_energy = latest_data.nlargest(10, 'Primary_energy')[['Country', 'Primary_energy', 'Energy_per_capita']]
    print(f"Top 10 Primärenergie-Verbraucher:")
    display(top_energy)

# Zeitliche Trends  
print(f"\nZEITLICHE ENTWICKLUNG")
# Globale Summen durch Aggregation aller Länder
global_trends = df_co2_energy.groupby('Year')['CO2_total'].sum()
if len(global_trends) > 0:
    co2_start = global_trends.iloc[0]
    co2_end = global_trends.iloc[-1]
    growth_rate = ((co2_end / co2_start) - 1) * 100
    
    print(f"Globale CO2-Entwicklung (aggregiert):")
    print(f"  {global_trends.index[0]}: {co2_start:.1f} Mt")
    print(f"  {global_trends.index[-1]}: {co2_end:.1f} Mt")
    print(f"  Gesamtwachstum: {growth_rate:.1f}%")

CO2 & Energy Dataset analysiert: (12862, 20)
Zeitraum: 1960 - 2018
Länder: 218

DESKRIPTIVE STATISTIK


Unnamed: 0,CO2_total,CO2_per_capita,Primary_energy,Population,GDP
count,12173.0,12102.0,8673.0,12744.0,9343.0
mean,106.05964,5.322128,589.358992,24347080.0,313115900000.0
std,512.499997,12.651802,2404.597315,101894600.0,1175574000000.0
min,0.0,0.0,0.0,552.0,70560000.0
25%,0.566,0.46225,5.683,456010.2,10055940000.0
50%,5.164,2.1185,50.25,4234795.0,35770130000.0
75%,42.162,6.74775,300.531,13494640.0,166059600000.0
max,10332.992,376.469,38409.645,1419009000.0,22294290000000.0



Zeitliche Datenverfügbarkeit:
Durchschnittliche Länder pro Jahr: 218.0
Datenverfügbarkeit: 218 - 218 Länder pro Jahr

CO2-EMISSIONEN ANALYSE (2018)
Strategisch ausgewählte Länder für Analyse: 19
G7: ['United States', 'Germany', 'France', 'Canada', 'Japan', 'United Kingdom']
EU Core: ['Spain', 'Poland', 'Netherlands', 'Sweden']
Extra Countries: ['China', 'India', 'Russia', 'Brazil', 'Vietnam', 'Malaysia', 'Singapore', 'United Arab Emirates']

Top 15 CO2-Emittenten global:


Unnamed: 0,Country,CO2_total,CO2_per_capita
2477,China,10332.992,7.282
12271,United States,5377.797,16.05
5368,India,2593.058,1.886
9498,Russia,1712.494,11.695
5840,Japan,1142.481,9.002
4542,Germany,760.046,9.117
5486,Iran,689.783,8.01
10796,South Korea,670.169,12.977
10088,Saudi Arabia,637.487,20.994
5427,Indonesia,594.101,2.201



Strategische Länder in den Daten: 19 von 19
CO2-Emissionen der strategischen Länder:


Unnamed: 0,Country,CO2_total,CO2_per_capita
2477,China,10332.992,7.282
12271,United States,5377.797,16.05
5368,India,2593.058,1.886
9498,Russia,1712.494,11.695
5840,Japan,1142.481,9.002
4542,Germany,760.046,9.117
2182,Canada,579.079,15.527
1710,Brazil,478.379,2.321
12212,United Kingdom,379.73,5.689
9262,Poland,336.069,8.788



CO2-Emissionen nach Quellen (globale Summen):
  CO2_coal: 14740.6 Mt (41.6%)
  CO2_oil: 11000.3 Mt (31.0%)
  CO2_gas: 7469.9 Mt (21.1%)
  CO2_cement: 1554.3 Mt (4.4%)
  CO2_flaring: 407.1 Mt (1.1%)
  CO2_other: 303.2 Mt (0.9%)

ENERGIE-ANALYSE
Länder mit Energiedaten: 205
Top 10 Primärenergie-Verbraucher:


Unnamed: 0,Country,Primary_energy,Energy_per_capita
2477,China,38409.645,27104.984
12271,United States,26768.986,80595.484
5368,India,9088.107,6638.485
9498,Russia,8432.208,57892.727
5840,Japan,5316.556,42109.379
2182,Canada,4090.168,110439.844
4542,Germany,3787.682,45691.598
10796,South Korea,3503.051,67787.555
1710,Brazil,3475.968,16539.109
10088,Saudi Arabia,3101.677,88573.469



ZEITLICHE ENTWICKLUNG
Globale CO2-Entwicklung (aggregiert):
  1960: 9148.8 Mt
  2018: 35475.3 Mt
  Gesamtwachstum: 287.8%


In [4]:
# WIRTSCHAFTSINDIKATOREN DATENANALYSE

In [9]:
# WDI Economic Indicators Understanding  
df_economic = pd.read_csv('../data/processed/economic_indicators.csv')

print(f"Economic Indicators Dataset analysiert: {df_economic.shape}")
print(f"Zeitraum: {df_economic['Year'].min()} - {df_economic['Year'].max()}")
print(f"Länder: {df_economic['Country'].nunique()}")

# Wirtschaftsentwicklung
econ_latest_year = df_economic['Year'].max()
econ_latest = df_economic[df_economic['Year'] == econ_latest_year]

print(f"\nWIRTSCHAFTSENTWICKLUNG ({econ_latest_year})")

# GDP-Analysen
gdp_cols = [col for col in df_economic.columns if 'GDP' in col]
print(f"Verfügbare GDP-Indikatoren: {gdp_cols}")

# Debugging: GDP-Daten prüfen
print(f"\nDebugging GDP-Daten für {econ_latest_year}:")
print(f"Gesamt Länder: {len(econ_latest)}")
print(f"Länder mit GDP-Daten: {econ_latest['GDP_current_USD'].notna().sum()}")

if 'GDP_current_USD' in df_economic.columns:
    # Alle Jahre mit GDP-Daten prüfen
    gdp_by_year = df_economic.groupby('Year')['GDP_current_USD'].count()
    print(f"GDP-Verfügbarkeit nach Jahren (letzte 5):")
    for year in sorted(gdp_by_year.index)[-5:]:
        print(f"  {year}: {gdp_by_year[year]} Länder")
    
    # Bestes Jahr für GDP-Analyse finden
    best_gdp_year = gdp_by_year.idxmax()
    best_year_data = df_economic[df_economic['Year'] == best_gdp_year]
    
    print(f"\nBestes Jahr für GDP-Analyse: {best_gdp_year} ({gdp_by_year[best_gdp_year]} Länder)")
    
    # Top 10 Volkswirtschaften aus bestem Jahr
    top_economies = best_year_data.nlargest(10, 'GDP_current_USD')[['Country', 'GDP_current_USD', 'GDP_per_capita_USD']]
    if len(top_economies) > 0:
        print(f"Top 10 Volkswirtschaften ({best_gdp_year}):")
        display(top_economies)
    else:
        print("Keine GDP-Daten verfügbar in WDI-Dataset")
else:
    print("GDP_current_USD Spalte nicht gefunden")

# Wirtschaftsstruktur
structure_cols = ['Industry_share_GDP', 'Manufacturing_share_GDP', 'Services_share_GDP']
available_structure = [col for col in structure_cols if col in df_economic.columns]

print(f"\nWIRTSCHAFTSSTRUKTUR")
if available_structure:
    structure_stats = df_economic[available_structure].describe()
    print(f"Wirtschaftsstruktur-Statistik (% des BIP):")
    display(structure_stats)

# Urbanisierung
if 'Urban_population_pct' in df_economic.columns:
    print(f"\nURBANISIERUNG")
    urban_stats = df_economic['Urban_population_pct'].describe()
    print(f"Urbanisierung - Deskriptive Statistik:")
    print(f"  Median: {urban_stats['50%']:.1f}%")
    print(f"  Durchschnitt: {urban_stats['mean']:.1f}%")
    print(f"  Spannweite: {urban_stats['min']:.1f}% - {urban_stats['max']:.1f}%")
    
    # Debugging: Urban-Daten für verschiedene Jahre prüfen
    urban_by_year = df_economic.groupby('Year')['Urban_population_pct'].count()
    best_urban_year = urban_by_year.idxmax()
    print(f"Bestes Jahr für Urbanisierungs-Analyse: {best_urban_year} ({urban_by_year[best_urban_year]} Länder)")
    
    # Extremwerte aus bestem Jahr
    best_urban_data = df_economic[df_economic['Year'] == best_urban_year]
    latest_urban = best_urban_data[best_urban_data['Urban_population_pct'].notna()]
    
    if len(latest_urban) > 0:
        print(f"Extremwerte Urbanisierung ({best_urban_year}):")
        print(f"Höchste:")
        display(latest_urban.nlargest(5, 'Urban_population_pct')[['Country', 'Urban_population_pct']])
        print(f"Niedrigste:")
        display(latest_urban.nsmallest(5, 'Urban_population_pct')[['Country', 'Urban_population_pct']])
    else:
        print("Keine Urbanisierungsdaten verfügbar")

# Handel und Globalisierung
if 'Trade_share_GDP' in df_economic.columns:
    print(f"\nHANDEL UND GLOBALISIERUNG")
    trade_stats = df_economic['Trade_share_GDP'].describe()
    print(f"Handelsintensität (% des BIP):")
    print(f"  Median: {trade_stats['50%']:.1f}%")
    print(f"  Durchschnitt: {trade_stats['mean']:.1f}%")
    print(f"  Maximum: {trade_stats['max']:.1f}%")

Economic Indicators Dataset analysiert: (12695, 12)
Zeitraum: 1960 - 2018
Länder: 220

WIRTSCHAFTSENTWICKLUNG (2018)
Verfügbare GDP-Indikatoren: ['GDP_current_USD', 'GDP_growth_annual', 'GDP_per_capita_USD', 'Industry_share_GDP', 'Services_share_GDP', 'Trade_share_GDP']

Debugging GDP-Daten für 2018:
Gesamt Länder: 1
Länder mit GDP-Daten: 0
GDP-Verfügbarkeit nach Jahren (letzte 5):
  2014: 206 Länder
  2015: 205 Länder
  2016: 203 Länder
  2017: 197 Länder
  2018: 0 Länder

Bestes Jahr für GDP-Analyse: 2005 (208 Länder)
Top 10 Volkswirtschaften (2005):


Unnamed: 0,Country,GDP_current_USD,GDP_per_capita_USD
12132,United States,13093730000000.0,44307.920585
5840,Japan,4755411000000.0,37217.648728
4332,Germany,2861410000000.0,34696.620917
12074,United Kingdom,2520702000000.0,41732.64054
2424,China,2285966000000.0,1753.417829
4042,France,2196126000000.0,34760.187766
5724,Italy,1852662000000.0,31959.262952
2076,Canada,1169358000000.0,36189.588384
10624,Spain,1157276000000.0,26510.717453
6188,"Korea, Rep.",898137200000.0,18639.522205



WIRTSCHAFTSSTRUKTUR
Wirtschaftsstruktur-Statistik (% des BIP):


Unnamed: 0,Industry_share_GDP,Services_share_GDP
count,7502.0,6839.0
mean,26.705643,50.030741
std,12.787149,13.399808
min,2.073173,4.791639
25%,18.108612,40.779807
50%,24.882311,49.878316
75%,32.064024,59.123874
max,90.512958,99.971188



URBANISIERUNG
Urbanisierung - Deskriptive Statistik:
  Median: 49.1%
  Durchschnitt: 50.5%
  Spannweite: 2.1% - 100.0%
Bestes Jahr für Urbanisierungs-Analyse: 1990 (218 Länder)
Extremwerte Urbanisierung (1990):
Höchste:


Unnamed: 0,Country,Urban_population_pct
1248,Bermuda,100.0
2119,Cayman Islands,100.0
4433,Gibraltar,100.0
7681,Monaco,100.0
8087,Nauru,100.0


Niedrigste:


Unnamed: 0,Country,Urban_population_pct
9595,Rwanda,5.416
1829,Burundi,6.271
8145,Nepal,8.854
11885,Uganda,11.076
7043,Malawi,11.56



HANDEL UND GLOBALISIERUNG
Handelsintensität (% des BIP):
  Median: 68.1%
  Durchschnitt: 78.0%
  Maximum: 860.8%
