In [2]:
!pip install influxdb_client

Collecting influxdb_client
  Downloading influxdb_client-1.49.0-py3-none-any.whl.metadata (65 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/65.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting reactivex>=4.0.4 (from influxdb_client)
  Downloading reactivex-4.0.4-py3-none-any.whl.metadata (5.5 kB)
Downloading influxdb_client-1.49.0-py3-none-any.whl (746 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m746.3/746.3 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading reactivex-4.0.4-py3-none-any.whl (217 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.8/217.8 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reactivex, influxdb_client
Successfully installed influxdb_client-1.49.0 reactivex-4.0.4


In [2]:
#!pip install ydata_profiling # Optional for user. It generates HTML based report of the data.

Collecting ydata_profiling
  Downloading ydata_profiling-4.16.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting visions<0.8.2,>=0.7.5 (from visions[type_image_path]<0.8.2,>=0.7.5->ydata_profiling)
  Downloading visions-0.8.1-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata_profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata_profiling)
  Downloading phik-0.12.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata_profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata_profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata_profiling)
  Downloading dacite-1.9.2-py3-none-any.whl.metadata (17 kB)
Collecting puremagic (from visions<0.8.2,>=0.7.5->visions[type_image_path]<0.8.2,>=0.7.5->

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from influxdb_client import InfluxDBClient
import warnings
#from ydata_profiling import ProfileReport # Keep if user wants to generate reports
warnings.filterwarnings('ignore')

In [4]:
# Define InfluxDB connection parameters
url = "http://kammeyer.uk:8086"
token = "CPSJ6xw1U72IcJjfLgzaukP24o1CL3grIQuvaw-Zq1MK9htUYNPwFUdKEalwl2-xMHFrVKOgG8tRFLgIkoneBw=="
org = "591d9e9c3fc5e3ee"

In [5]:
# --- Establish InfluxDB Connection ---
try:
    client = InfluxDBClient(url=url, token=token, org=org)
    print("✅ Successfully connected to InfluxDB")
except Exception as e:
    print(f"❌ Error Connecting to InfluxDB: {e}")
    # It's better to raise an error or exit here if connection is critical
    raise

query_api = client.query_api()

✅ Successfully connected to InfluxDB


In [6]:
# --- Helper Function: Query and Pivot Data ---
def fetch_pivoted_data(bucket, start="-6mo"):
    """
    Fetches data from InfluxDB and pivots it into a DataFrame.
    Returns a list of DataFrames as query_data_frame can return multiple tables.
    """
    query = f'''
    from(bucket:"{bucket}")
    |> range(start: {start})
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    '''
    try:
        df = query_api.query_data_frame(query)
        if isinstance(df, list) and len(df) > 0:
            # Ensure _time is datetime and sort for all returned DFs
            for d in df:
                if '_time' in d.columns:
                    d['_time'] = pd.to_datetime(d['_time']).dt.floor('min') # Keep flooring for initial precision
                    d.sort_values('_time', inplace=True)
            return df
        elif not isinstance(df, list) and '_time' in df.columns:
            # Handle case where a single DataFrame is returned directly
            df['_time'] = pd.to_datetime(df['_time']).dt.floor('min') # Keep flooring for initial precision
            df.sort_values('_time', inplace=True)
            return [df]
        else:
            print(f"❌ No data or unexpected format from bucket '{bucket}'.")
            return None
    except Exception as e:
        print(f"❌ Error querying bucket '{bucket}': {e}")
        return None

In [7]:
# --- Load Data ---
# Fetch CO2 data. Based on Carbon_Intensity_Forecast_Success.ipynb, 'carbonIntensity' and
# 'fossilFuelPercentage' are expected in the first DataFrame returned from the 'co2' bucket.
co2_dfs = fetch_pivoted_data("co2")
if not co2_dfs or len(co2_dfs) == 0:
    print("❌ CO₂ data incomplete or not available.")
    raise ValueError("CO₂ data is essential and not available.")
co2_data_df = co2_dfs[0] # Assuming this DataFrame contains 'carbonIntensity' and 'fossilFuelPercentage'

energy_df_list = fetch_pivoted_data("energy")
price_df_list = fetch_pivoted_data("price")

if not energy_df_list or energy_df_list[0].empty:
    print("⚠️ Energy data not available. Exiting.")
    raise ValueError("Energy data is essential and not available.")
if not price_df_list or price_df_list[0].empty:
    print("⚠️ Price data not available. Exiting.")
    raise ValueError("Price data is essential and not available.")

energy_df = energy_df_list[0]
price_df = price_df_list[0]

In [8]:
# --- Preprocessing Function ---
def preprocess_energy_data_for_foundation_model(energy_df, price_df, co2_data_df):
    """
    Preprocess energy data for time series foundation models like LagLlama.

    Args:
        energy_df: Pandas DataFrame with energy production data.
        price_df: Pandas DataFrame with price data.
        co2_data_df: Pandas DataFrame with CO2 metrics (e.g., 'carbonIntensity', 'fossilFuelPercentage').

    Returns:
        dict: Contains processed datasets and metadata.
    """

    print("Step 1: Data Cleaning and Preparation")
    # Merge energy and price data
    merged_energy_price = pd.merge_asof(
        energy_df, price_df, on='_time', direction='nearest', tolerance=pd.Timedelta('2min')
    ).dropna(subset=['_time'])
    merged_energy_price.ffill(inplace=True) # Use ffill() instead of fillna(method='ffill')

    required_co2_cols = []
    if 'carbonIntensity' in co2_data_df.columns:
        required_co2_cols.append('carbonIntensity')
    else:
        raise ValueError("Column 'carbonIntensity' not found in CO2 data. This is a required target.")

    if 'fossilFuelPercentage' in co2_data_df.columns:
        required_co2_cols.append('fossilFuelPercentage')
    else:
        print("❗ Warning: 'fossilFuelPercentage' not found in CO2 data. Renewable percentage will be calculated, not derived.")

    # Merge the combined energy_price data with the CO2 data
    # Use 'inner' merge to ensure only matching timestamps are kept
    # df_raw_merged will now contain all columns from merged_energy_price + required_co2_cols
    df_raw_merged = pd.merge(merged_energy_price, co2_data_df[['_time'] + required_co2_cols],
                            on='_time', how='inner').set_index('_time')
    df_raw_merged.index.name = None


    # Define energy sources and ensure all expected columns are present
    energy_sources = [
        'Biomasse', 'Braunkohle', 'Erdgas', 'Photovoltaik', 'Pumpspeicher',
        'Sonstige Erneuerbare', 'Sonstige Konventionelle', 'Steinkohle',
        'Wasserkraft', 'Wind Offshore', 'Wind Onshore'
    ]
    # Ensure all energy_sources are numerical and available in df_raw_merged
    energy_sources = [col for col in energy_sources if col in df_raw_merged.columns]

    # Define the primary target columns as per meeting remarks (price and carbonIntensity)
    primary_target_columns = ['price.day_ahead_auction.price', 'carbonIntensity']
    # Ensure primary_target_columns are available in df_raw_merged
    primary_target_columns = [col for col in primary_target_columns if col in df_raw_merged.columns]


    # Identify all features that will be used by LagLlama (targets + dynamic real features)
    # This list should define the *final set of columns* we want to have *before* resampling.
    all_numerical_features_to_keep = list(set(energy_sources + primary_target_columns + required_co2_cols)) # required_co2_cols includes fossilFuelPercentage if present


    # Filter df_raw_merged to only include these intended numerical features
    # Ensure numerical conversion for these features before proceeding, especially for price
    df_raw_merged_numeric = df_raw_merged.copy() # Create a copy to avoid SettingWithCopyWarning
    for col in all_numerical_features_to_keep:
        if col in df_raw_merged_numeric.columns:
            # Convert to numeric, coercing errors (non-numeric values become NaN)
            df_raw_merged_numeric[col] = pd.to_numeric(df_raw_merged_numeric[col], errors='coerce')
        else:
            print(f"❗ Warning: Expected numerical feature '{col}' not found in df_raw_merged. It will be excluded from the model input.")

    # Drop columns that are not numerical after conversion, or are no longer in our `all_numerical_features_to_keep` list
    # Use select_dtypes again after potential conversions to ensure only numbers are kept.
    df_raw_merged_numeric = df_raw_merged_numeric[
        [col for col in all_numerical_features_to_keep if col in df_raw_merged_numeric.columns]
    ].select_dtypes(include=np.number)


    # --- Resample to hourly data as per meeting remarks (In [7]) ---
    # Apply resampling to the DataFrame containing only numerical columns
    df_processed = df_raw_merged_numeric.resample('H').mean()
    print("Data successfully resampled to hourly mean data.")

    print(f"Data shape after cleaning, merging and hourly resampling: {df_processed.shape}")
    print(f"Date range: {df_processed.index.min()} to {df_processed.index.max()}")
    print(f"Columns after selection for model and resampling: {list(df_processed.columns)}")

    # 2. Handle Missing Values (post-resampling)
    print("\nStep 2: Handling Missing Values (post-resampling)")
    missing_before_post_resample = df_processed.isnull().sum().sum()

    for col in df_processed.columns:
        df_processed[col] = df_processed[col].ffill().bfill()

    missing_after_post_resample = df_processed.isnull().sum().sum()
    print(f"Missing values - Before: {missing_before_post_resample}, After: {missing_after_post_resample}")

    if df_processed.isnull().sum().sum() > 0:
        print("❗ Warning: Some NaN values still exist after ffill/bfill. Filling with 0 as a last resort.")
        df_processed.fillna(0, inplace=True)

    # 3. Create Time-based Features
    print("\nStep 3: Creating Time-based Features")
    df_processed['year'] = df_processed.index.year
    df_processed['month'] = df_processed.index.month
    df_processed['day'] = df_processed.index.day
    df_processed['hour'] = df_processed.index.hour
    df_processed['dayofweek'] = df_processed.index.dayofweek
    df_processed['dayofyear'] = df_processed.index.dayofyear
    df_processed['quarter'] = df_processed.index.quarter

    # Cyclical encoding for time features (important for ML models)
    df_processed['month_sin'] = np.sin(2 * np.pi * df_processed['month'] / 12)
    df_processed['month_cos'] = np.cos(2 * np.pi * df_processed['month'] / 12)
    df_processed['hour_sin'] = np.sin(2 * np.pi * df_processed['hour'] / 24)
    df_processed['hour_cos'] = np.cos(2 * np.pi * df_processed['hour'] / 24)
    df_processed['dayofweek_sin'] = np.sin(2 * np.pi * df_processed['dayofweek'] / 7)
    df_processed['dayofweek_cos'] = np.cos(2 * np.pi * df_processed['dayofweek'] / 7)


    # 4. Handle Renewable Percentage and CO2 Intensity as per meeting remarks (In [11])
    print("\nStep 4: Handling Renewable Percentage and CO2 Intensity")

    # If 'fossilFuelPercentage' is available, derive 'renewable_percentage' directly
    if 'fossilFuelPercentage' in df_processed.columns: # Check if fossilFuelPercentage is in the already processed df
        df_processed['renewable_percentage'] = 100 - df_processed['fossilFuelPercentage']
        print("Using 'fossilFuelPercentage' to derive 'renewable_percentage'.")
    else:
        # Calculate if 'fossilFuelPercentage' not available (original method, less preferred now)
        # Total renewable energy (needed for calculation if not derived)
        renewable_sources_calc = [
            'Biomasse', 'Photovoltaik', 'Sonstige Erneuerbare',
            'Wasserkraft', 'Wind Offshore', 'Wind Onshore'
        ]
        renewable_sources_calc = [col for col in renewable_sources_calc if col in df_processed.columns]
        df_processed['total_renewable'] = df_processed[renewable_sources_calc].sum(axis=1)

        conventional_sources = ['Braunkohle', 'Erdgas', 'Sonstige Konventionelle', 'Steinkohle']
        conventional_sources = [col for col in conventional_sources if col in df_processed.columns]
        df_processed['total_conventional'] = df_processed[conventional_sources].sum(axis=1)

        df_processed['total_energy'] = df_processed['total_renewable'] + df_processed['total_conventional']
        if 'Pumpspeicher' in df_processed.columns:
            df_processed['total_energy'] += df_processed['Pumpspeicher'] # Add Pumpspeicher if available

        df_processed['renewable_percentage'] = (df_processed['total_renewable'] / df_processed['total_energy']) * 100
        df_processed['renewable_percentage'].fillna(0, inplace=True)
        print("Calculating 'renewable_percentage' as 'fossilFuelPercentage' not found.")

    # 'co2_intensity' is explicitly stated to be equivalent to 'carbonIntensity' (In [11] remarks)
    if 'carbonIntensity' in df_processed.columns:
        df_processed['co2_intensity'] = df_processed['carbonIntensity']
        print("Using 'carbonIntensity' directly for 'co2_intensity' as per remarks.")
    else:
        # Fallback calculation if carbonIntensity is missing (unlikely if required as target)
        print("❗ Error: 'carbonIntensity' not in processed DataFrame for co2_intensity calculation. This should not happen if it's a required target.")
        # If 'carbonIntensity' is truly missing and not in df_processed, then 'co2_intensity' won't be created.
        # This branch indicates a deeper data availability issue from the InfluxDB fetch or earlier merges.


    # 5. Create Different Dataset Formats for Foundation Models
    print("\nStep 5: Creating Foundation Model Ready Formats")

    # model_input_features now also includes 'renewable_percentage' and 'co2_intensity' if they were created
    # dynamically add them to model_input_features
    # Ensure this list is derived *after* the calculation steps in step 4.
    final_model_input_features = [f for f in df_processed.select_dtypes(include=np.number).columns if f not in ['year', 'month', 'day', 'hour',
                                                                        'dayofweek', 'dayofyear', 'quarter',
                                                                        'month_sin', 'month_cos', 'hour_sin', 'hour_cos',
                                                                        'dayofweek_sin', 'dayofweek_cos', 'fossilFuelPercentage',
                                                                        'total_renewable', 'total_conventional', 'total_energy' # Exclude intermediate sums
                                                                        ]]


    wide_format = df_processed[final_model_input_features].copy()

    # 6. Prepare Data for Specific Foundation Models (LagLlama Compatible Format)
    print("\nStep 6: Preparing LagLlama Compatible Format")

    # Frequency is now explicitly hourly ('H') as per meeting remarks (In [7])
    most_common_freq = 'H'

    # Create regular time index (already done by resample('H'))
    # Reindex to regular frequency (interpolate missing values linearly)
    lagllama_format = df_processed[final_model_input_features].copy() # This is the main DF for train/val/test splits

    # 7. Split data for training/validation/testing
    print("\nStep 7: Creating Train/Val/Test Splits")

    total_len = len(lagllama_format)
    # Keep 70% for train, 15% for validation, 15% for test
    train_size = int(0.70 * total_len)
    val_size = int(0.15 * total_len)

    train_data = lagllama_format.iloc[:train_size]
    val_data = lagllama_format.iloc[train_size : train_size + val_size]
    test_data = lagllama_format.iloc[train_size + val_size :]

    print(f"Train size: {len(train_data)}, Val size: {len(val_data)}, Test size: {len(test_data)}")

    # 8. Create metadata for foundation model
    print("\nStep 8: Creating Metadata for Foundation Model")
    metadata = {
        'num_series': len(lagllama_format.columns), # Use columns from the final lagllama_format
        'series_names': lagllama_format.columns.tolist(), # Ensure this is exactly what's in the CSV
        'frequency': str(most_common_freq),
        'start_date': str(df_processed.index.min()),
        'end_date': str(df_processed.index.max()),
        'train_start': str(train_data.index[0]),
        'train_end': str(train_data.index[-1]),
        'val_start': str(val_data.index[0]),
        'val_end': str(val_data.index[-1]),
        'test_start': str(test_data.index[0]),
        'test_end': str(test_data.index[-1]),
        # Ensure these lists only contain columns actually present in the final lagllama_format
        'renewable_sources': [col for col in energy_sources if col in lagllama_format.columns],
        'conventional_sources': [col for col in ['Braunkohle', 'Erdgas', 'Sonstige Konventionelle', 'Steinkohle'] if col in lagllama_format.columns],
        'co2_column_name': 'carbonIntensity', # Explicitly use 'carbonIntensity'
        'target_variables': [col for col in primary_target_columns if col in lagllama_format.columns] # Ensure targets are in final data
    }
    # Add prediction_length and context_length as per original LagLlama demos and user's processing notebook
    metadata['prediction_length'] = 24
    metadata['context_length'] = 504


    return {
        'processed_df': df_processed,
        'wide_format': wide_format,
        'lagllama_format': lagllama_format, # This is the main DF for train/val/test splits
        'train_data': train_data,
        'val_data': val_data,
        'test_data': test_data,
        'metadata': metadata
    }

In [9]:
# --- Execute Preprocessing ---
processed_data = preprocess_energy_data_for_foundation_model(energy_df, price_df, co2_data_df)

Step 1: Data Cleaning and Preparation
Data successfully resampled to hourly mean data.
Data shape after cleaning, merging and hourly resampling: (4382, 13)
Date range: 2024-12-10 15:00:00+00:00 to 2025-06-11 04:00:00+00:00
Columns after selection for model and resampling: ['Erdgas', 'Sonstige Konventionelle', 'Biomasse', 'Braunkohle', 'Pumpspeicher', 'Steinkohle', 'Sonstige Erneuerbare', 'Photovoltaik', 'Wind Offshore', 'Wind Onshore', 'carbonIntensity', 'fossilFuelPercentage', 'Wasserkraft']

Step 2: Handling Missing Values (post-resampling)
Missing values - Before: 0, After: 0

Step 3: Creating Time-based Features

Step 4: Handling Renewable Percentage and CO2 Intensity
Using 'fossilFuelPercentage' to derive 'renewable_percentage'.
Using 'carbonIntensity' directly for 'co2_intensity' as per remarks.

Step 5: Creating Foundation Model Ready Formats

Step 6: Preparing LagLlama Compatible Format

Step 7: Creating Train/Val/Test Splits
Train size: 3067, Val size: 657, Test size: 658

Ste

In [10]:
# --- Save Processed Data to CSVs ---
def save_processed_data(processed_data, output_dir='./processed_energy_data/'):
    """Save all processed datasets to CSV files"""
    import os

    os.makedirs(output_dir, exist_ok=True)

    # Save main datasets
    processed_data['processed_df'].to_csv(f'{output_dir}processed_energy_data.csv') # Index is now timestamp
    processed_data['wide_format'].to_csv(f'{output_dir}wide_format.csv')
    processed_data['lagllama_format'].to_csv(f'{output_dir}lagllama_format.csv')

    # Save train/val/test splits
    processed_data['train_data'].to_csv(f'{output_dir}train_data.csv')
    processed_data['val_data'].to_csv(f'{output_dir}val_data.csv')
    processed_data['test_data'].to_csv(f'{output_dir}test_data.csv')

    # Save metadata
    import json
    with open(f'{output_dir}metadata.json', 'w') as f:
        json.dump(processed_data['metadata'], f, indent=2)

    print(f"\nAll processed data saved to {output_dir}")
    print("Generated files:")
    for file in sorted(os.listdir(output_dir)):
        print(f"  - {file}")

save_processed_data(processed_data)

# --- Optional: Generate Profiling Report ---
# df_for_profiling = processed_data['lagllama_format']
# profile = ProfileReport(df_for_profiling, title="Processed Data Profiling Report")
# profile.to_file("processed_data_profiling_report.html")
# print("\nProfiling report saved to processed_data_profiling_report.html")

# --- Quick Data Exploration Function ---
def explore_processed_data(processed_data):
    """Quick exploration of the processed data"""

    print("\n=== PROCESSED DATA SUMMARY ===")
    print(f"Original data shape (from InfluxDB fetch, then resampled hourly): {processed_data['processed_df'].shape}")
    print(f"LagLlama format shape: {processed_data['lagllama_format'].shape}")
    print(f"Time range: {processed_data['metadata']['start_date']} to {processed_data['metadata']['end_date']}")
    print(f"Frequency: {processed_data['metadata']['frequency']}")

    print("\n=== LAGLLAMA FORMAT DATA STATISTICS ===")
    stats_df = processed_data['lagllama_format']
    print(stats_df.describe())

    print("\n=== MISSING VALUES CHECK (LagLlama Format) ===")
    missing_vals = processed_data['lagllama_format'].isnull().sum()
    print(missing_vals[missing_vals > 0])

    co2_col_name = processed_data['metadata']['co2_column_name']
    if co2_col_name in stats_df.columns:
        print(f"\n=== CORRELATION WITH {co2_col_name.upper()} ===")
        co2_corr = stats_df.corrwith(stats_df[co2_col_name])
        print(co2_corr.sort_values(key=abs, ascending=False))


All processed data saved to ./processed_energy_data/
Generated files:
  - lagllama_format.csv
  - metadata.json
  - processed_energy_data.csv
  - test_data.csv
  - train_data.csv
  - val_data.csv
  - wide_format.csv


In [11]:
save_processed_data(processed_data)


All processed data saved to ./processed_energy_data/
Generated files:
  - lagllama_format.csv
  - metadata.json
  - processed_energy_data.csv
  - test_data.csv
  - train_data.csv
  - val_data.csv
  - wide_format.csv


In [12]:
# --- Quick Data Exploration Function ---
def explore_processed_data(processed_data):
    """Quick exploration of the processed data"""

    print("\n=== PROCESSED DATA SUMMARY ===")
    print(f"Original data shape (from InfluxDB fetch, then resampled hourly): {processed_data['processed_df'].shape}")
    print(f"LagLlama format shape: {processed_data['lagllama_format'].shape}")
    print(f"Time range: {processed_data['metadata']['start_date']} to {processed_data['metadata']['end_date']}")
    print(f"Frequency: {processed_data['metadata']['frequency']}")

    print("\n=== LAGLLAMA FORMAT DATA STATISTICS ===")
    stats_df = processed_data['lagllama_format']
    print(stats_df.describe())

    print("\n=== MISSING VALUES CHECK (LagLlama Format) ===")
    missing_vals = processed_data['lagllama_format'].isnull().sum()
    print(missing_vals[missing_vals > 0])

    co2_col_name = processed_data['metadata']['co2_column_name']
    if co2_col_name in stats_df.columns:
        print(f"\n=== CORRELATION WITH {co2_col_name.upper()} ===")
        co2_corr = stats_df.corrwith(stats_df[co2_col_name])
        print(co2_corr.sort_values(key=abs, ascending=False))

In [17]:
explore_processed_data(processed_data)


=== PROCESSED DATA SUMMARY ===
Original data shape (from InfluxDB fetch, then resampled hourly): (4382, 28)
LagLlama format shape: (4382, 14)
Time range: 2024-12-10 05:00:00+00:00 to 2025-06-10 18:00:00+00:00
Frequency: H

=== LAGLLAMA FORMAT DATA STATISTICS ===
       Pumpspeicher  Wind Onshore       Erdgas     Biomasse   Braunkohle  \
count   4382.000000   4382.000000  4382.000000  4382.000000  4382.000000   
mean     315.943348   3059.025901  1911.341397  1050.053400  2047.987335   
std      424.090081   2471.525370  1096.469193    77.714545   770.396582   
min        0.000000     29.250000   384.750000   889.000000   486.000000   
25%       20.750000   1131.187500  1029.500000   988.500000  1373.312500   
50%       91.875000   2305.125000  1692.500000  1027.250000  2197.250000   
75%      472.750000   4433.375000  2573.750000  1101.937500  2712.250000   
max     1713.000000  11325.500000  5040.250000  1284.000000  3294.750000   

       Wind Offshore  carbonIntensity  Photovoltaik