In [60]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [61]:
import sys
from pathlib import Path
import pandas as pd
import os
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler

In [62]:
# Add the project root to Python path
project_root = str(Path(os.getcwd()).parent)
if project_root not in sys.path:
    sys.path.append(project_root)

from src.utils.feature_engineering import create_lag_and_rolling_features_for_columns, add_time_features, create_cyclical_features, add_time_of_day_features, add_weather_severity_feature, add_weather_intensity_feature, add_net_export_import_grid_feature

# Data Exploration

#### Read Merged Hourly Data

In [63]:
hourly_df = pd.read_csv('../data/processed/merged_hourly_data.csv', parse_dates=['timestamp'],index_col='timestamp')
hourly_df.head()


Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,wind_deg,clouds_all,weather_id,weather_main,weather_description,weather_icon,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh)
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,25,100,500,Rain,light rain,10n,23.0,93.25,0.0,70.25
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,10,99,500,Rain,light rain,10d,23.0,91.0,0.0,68.0
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,21,96,500,Rain,light rain,10d,23.0,96.0,0.0,73.0
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,19,86,500,Rain,light rain,10d,23.0,90.5,0.0,67.5
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,42,50,500,Rain,light rain,10d,23.0,96.25,0.0,73.25


In [64]:
hourly_df = add_net_export_import_grid_feature(hourly_df)
hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,clouds_all,weather_id,weather_main,weather_description,weather_icon,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),net_export_import_grid
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,100,500,Rain,light rain,10n,23.0,93.25,0.0,70.25,0.332667
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,99,500,Rain,light rain,10d,23.0,91.0,0.0,68.0,0.362333
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,96,500,Rain,light rain,10d,23.0,96.0,0.0,73.0,0.537
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,86,500,Rain,light rain,10d,23.0,90.5,0.0,67.5,0.437667
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,50,500,Rain,light rain,10d,23.0,96.25,0.0,73.25,0.327167


#### Time-based Features Extraction

In [65]:
hourly_df = add_time_features(hourly_df.reset_index(), 'timestamp').set_index('timestamp')

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,weather_icon,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),net_export_import_grid,hour,day_of_week,is_weekend,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,10n,23.0,93.25,0.0,70.25,0.332667,0,2,0,9
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,10d,23.0,91.0,0.0,68.0,0.362333,1,2,0,9
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,10d,23.0,96.0,0.0,73.0,0.537,2,2,0,9
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,10d,23.0,90.5,0.0,67.5,0.437667,3,2,0,9
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,10d,23.0,96.25,0.0,73.25,0.327167,4,2,0,9


### Cyclical Encoding of Time Features

In [66]:
hourly_df = create_cyclical_features(hourly_df, 'hour', 24)
hourly_df = create_cyclical_features(hourly_df, 'day_of_week', 7)
hourly_df = create_cyclical_features(hourly_df, 'month', 12)

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,hour,day_of_week,is_weekend,month,hour_sin,hour_cos,day_of_week_sin,day_of_week_cos,month_sin,month_cos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,0,2,0,9,0.0,1.0,0.974928,-0.222521,-1.0,-1.83697e-16
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,1,2,0,9,0.258819,0.965926,0.974928,-0.222521,-1.0,-1.83697e-16
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,2,2,0,9,0.5,0.866025,0.974928,-0.222521,-1.0,-1.83697e-16
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,3,2,0,9,0.707107,0.707107,0.974928,-0.222521,-1.0,-1.83697e-16
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,4,2,0,9,0.866025,0.5,0.974928,-0.222521,-1.0,-1.83697e-16


### Time-of-day Categories

In [67]:
hourly_df = add_time_of_day_features(hourly_df, 'hour')

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,hour_sin,hour_cos,day_of_week_sin,day_of_week_cos,month_sin,month_cos,time_of_day_night,time_of_day_morning,time_of_day_afternoon,time_of_day_evening
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,0.0,1.0,0.974928,-0.222521,-1.0,-1.83697e-16,1,0,0,0
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,0.258819,0.965926,0.974928,-0.222521,-1.0,-1.83697e-16,1,0,0,0
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,0.5,0.866025,0.974928,-0.222521,-1.0,-1.83697e-16,1,0,0,0
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,0.707107,0.707107,0.974928,-0.222521,-1.0,-1.83697e-16,1,0,0,0
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,0.866025,0.5,0.974928,-0.222521,-1.0,-1.83697e-16,1,0,0,0


### Lag & Rolling Features



Extracting lag and rolling features for Hourly Dataframe

In [68]:
hourly_lags = [1, 2, 3, 6, 12, 24, 48, 72]
hourly_windows = [3, 6, 24]

features = [
    'Battery State of Charge',
    'Battery Internal Temperature',
    'Studer Grid Net Export/Import - L1-1',
    'Studer Grid Net Export/Import - L2-2',
    'Studer Grid Net Export/Import - L3-3',
    'clouds_all',
    'temp',
    'humidity',
    'Energy Produced (Wh)',
    'Energy Consumed (Wh)',
    'Exported to Grid (Wh)',
    'Imported from Grid (Wh)'
]

In [69]:
hourly_df = create_lag_and_rolling_features_for_columns(hourly_df, features, hourly_lags, hourly_windows)

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,Imported from Grid (Wh)_lag12,Imported from Grid (Wh)_lag24,Imported from Grid (Wh)_lag48,Imported from Grid (Wh)_lag72,Imported from Grid (Wh)_roll_mean_3,Imported from Grid (Wh)_roll_std_3,Imported from Grid (Wh)_roll_mean_6,Imported from Grid (Wh)_roll_std_6,Imported from Grid (Wh)_roll_mean_24,Imported from Grid (Wh)_roll_std_24
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,,,,,,,,,,
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,,,,,,,,,,
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,,,,,70.416667,2.504163,,,,
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,,,,,69.5,3.041381,,,,
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,,,,,71.25,3.25,,,,


## Encoding Categorical Variables

### Weather severity mapping

In [70]:
hourly_df = add_weather_severity_feature(hourly_df, 'weather_main')

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,Imported from Grid (Wh)_lag24,Imported from Grid (Wh)_lag48,Imported from Grid (Wh)_lag72,Imported from Grid (Wh)_roll_mean_3,Imported from Grid (Wh)_roll_std_3,Imported from Grid (Wh)_roll_mean_6,Imported from Grid (Wh)_roll_std_6,Imported from Grid (Wh)_roll_mean_24,Imported from Grid (Wh)_roll_std_24,weather_severity
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,,,,,,,,,,5
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,,,,,,,,,,5
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,,,,70.416667,2.504163,,,,,5
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,,,,69.5,3.041381,,,,,5
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,,,,71.25,3.25,,,,,5


### Weather Intensity

Extract more information from weather_description

In [71]:
hourly_df = add_weather_intensity_feature(hourly_df, 'weather_description')

hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,Imported from Grid (Wh)_lag48,Imported from Grid (Wh)_lag72,Imported from Grid (Wh)_roll_mean_3,Imported from Grid (Wh)_roll_std_3,Imported from Grid (Wh)_roll_mean_6,Imported from Grid (Wh)_roll_std_6,Imported from Grid (Wh)_roll_mean_24,Imported from Grid (Wh)_roll_std_24,weather_severity,weather_intensity
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,,,,,,,,,5,0.5
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,,,,,,,,,5,0.5
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,,,70.416667,2.504163,,,,,5,0.5
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,,,69.5,3.041381,,,,,5,0.5
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,,,71.25,3.25,,,,,5,0.5


### Weather Impact Score

Combined weather impact score

In [72]:

hourly_df['weather_impact'] = hourly_df['weather_severity'] * hourly_df['weather_intensity']
hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,Imported from Grid (Wh)_lag72,Imported from Grid (Wh)_roll_mean_3,Imported from Grid (Wh)_roll_std_3,Imported from Grid (Wh)_roll_mean_6,Imported from Grid (Wh)_roll_std_6,Imported from Grid (Wh)_roll_mean_24,Imported from Grid (Wh)_roll_std_24,weather_severity,weather_intensity,weather_impact
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,,,,,,,,5,0.5,2.5
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,,,,,,,,5,0.5,2.5
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,,70.416667,2.504163,,,,,5,0.5,2.5
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,,69.5,3.041381,,,,,5,0.5,2.5
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,,71.25,3.25,,,,,5,0.5,2.5


### Save to CSV

In [75]:
if not os.path.exists('../data/processed'):
    os.makedirs('../data/processed')

hourly_df.dropna().to_csv(f"../data/processed/hourly_features_data.csv")

In [76]:
hourly_df.head()

Unnamed: 0_level_0,Studer Output Frequency - L1,Studer Output Frequency - L2,Studer Output Frequency - L3,Grid Input Frequency - L1,Grid Input Frequency - L2,Grid Input Frequency - L3,Grid Input Voltage - L2,Grid Input Voltage - L3,Studer Grid Status - L1,Studer Grid Status - L2,...,Imported from Grid (Wh)_lag72,Imported from Grid (Wh)_roll_mean_3,Imported from Grid (Wh)_roll_std_3,Imported from Grid (Wh)_roll_mean_6,Imported from Grid (Wh)_roll_std_6,Imported from Grid (Wh)_roll_mean_24,Imported from Grid (Wh)_roll_std_24,weather_severity,weather_intensity,weather_impact
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-06 00:00:00,50.036,50.035,50.0355,50.0355,50.035,50.0355,250.25,250.35,1.0,1.0,...,,,,,,,,5,0.5,2.5
2023-09-06 01:00:00,50.0135,50.0135,50.0135,50.014,50.0135,50.0125,251.916667,252.433333,1.0,1.0,...,,,,,,,,5,0.5,2.5
2023-09-06 02:00:00,49.9675,49.967,49.967,49.9675,49.967,49.967,252.383333,253.3,1.0,1.0,...,,70.416667,2.504163,,,,,5,0.5,2.5
2023-09-06 03:00:00,49.9975,49.9975,49.9975,49.9975,49.9975,49.997,254.65,253.283333,1.0,1.0,...,,69.5,3.041381,,,,,5,0.5,2.5
2023-09-06 04:00:00,50.0385,50.0385,50.037,50.0385,50.0385,50.037,256.266667,253.15,1.0,1.0,...,,71.25,3.25,,,,,5,0.5,2.5


In [77]:
print("Final features:", hourly_df.shape[1])
hourly_df.describe().T[['mean', 'std']].head()

Final features: 224


Unnamed: 0,mean,std
Studer Output Frequency - L1,48.679341,7.987676
Studer Output Frequency - L2,48.677981,7.995877
Studer Output Frequency - L3,49.998775,0.434713
Grid Input Frequency - L1,48.966959,6.717676
Grid Input Frequency - L2,47.376393,10.972552


In [78]:
hourly_df.head(100).dropna().to_csv(f"../data/processed/hourly_features_data_100.csv")