# Feature Engineering Electricity

In [1]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Assuming your notebook is in the 'feature_engin' directory,
# and you want to add 'main' to the path
main_dir = os.path.dirname(current_dir)
sys.path.append(main_dir)

from preproc.data import clean_data_electricity
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


In [2]:
electricity_df = clean_data_electricity()
electricity_df


Loaded germany_electricity_generation_2018-2023.csv successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207223 entries, 0 to 207222
Data columns (total 25 columns):
 #   Column                            Non-Null Count   Dtype 
---  ------                            --------------   ----- 
 0   Date (GMT+1)                      207217 non-null  object
 1   Hydro pumped storage consumption  206839 non-null  object
 2   Cross border electricity trading  207223 non-null  object
 3   Nuclear                           204639 non-null  object
 4   Hydro Run-of-River                207223 non-null  object
 5   Biomass                           207223 non-null  object
 6   Fossil brown coal / lignite       202615 non-null  object
 7   Fossil hard coal                  207223 non-null  object
 8   Fossil oil                        207223 non-null  object
 9   Fossil coal-derived gas           55966 non-null   object
 10  Fossil gas                        207223 non-null  object
 11 

Unnamed: 0,date_gmt+1,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,...,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
0,2017-12-31 23:00:00+00:00,-755.4,-14932.6,6347.5,2096.3,4955,8059.2,1849.9,302.3,390,...,1531,1221.3,2986.1,27633.3,0,49692.7,19073.3,61.8,77,-5.27
1,2017-12-31 23:15:00+00:00,-955.4,-14932.6,5957.7,2075.8,4947.7,8108.8,1812.6,302.3,504.7,...,1531.2,1212.2,3005.6,28215.1,0,49135,17914.4,62.5,79.1,-5.27
2,2017-12-31 23:30:00+00:00,-1004.8,-14932.6,5755.1,2068.3,4942.7,8142,1774.4,302.3,504.7,...,1531.3,1221.3,2933.6,28588.8,0,48754.6,17232.3,62.9,80.3,-5.27
3,2017-12-31 23:45:00+00:00,-1047.8,-14932.6,5363.2,2008.7,4944.1,8183.3,1760.3,302,504.7,...,1531.5,1221.3,2799.1,28861.6,0,48560.8,16900.1,63.2,80.8,-5.27
4,2018-01-01 00:00:00+00:00,-1063.4,-15844.4,4943.3,1936.5,4935.1,8248.9,1692.5,303.2,504.7,...,1531.2,1219.7,2912.2,29198.3,0,48239.3,16128.8,63.6,82.1,-29.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207216,2023-11-29 09:45:00+00:00,-7.4,-7640.4,0.0,2514.7,4964.9,15629.6,11570.9,897.9,0,...,1472.4,977.7,1919.2,20001.2,2676.3,75245.6,50649,36.1,43.6,133.72
207217,2023-11-29 10:00:00+00:00,-784.5,-4959,0.0,2512.9,4983,15647.4,11554,896,0,...,1472.5,984.5,1760.7,19946.4,3177.1,75877.4,50993.1,36.5,43.6,130
207218,2023-11-29 10:15:00+00:00,-1030.9,-4962.3,0.0,2514.4,4989.6,15774.6,11633.1,811.2,0,...,1470.9,990.7,1595,19669.4,3323.9,75744.2,51156,36.3,43.3,130
207219,2023-11-29 10:30:00+00:00,-842.3,-4962.3,0.0,2513,5010.6,15818.4,12020.4,807.9,0,...,1470.9,989.2,1497.1,19455.6,3848.6,76131.1,51329.8,36.3,43.4,130


In [3]:
# Count the number of rows with at least one NaN
num_rows_with_nan = electricity_df.isna().any(axis=1).sum()

print(f"Number of rows with at least one NaN: {num_rows_with_nan}")


Number of rows with at least one NaN: 5


In [4]:
electricity_df = electricity_df.dropna()
electricity_df


Unnamed: 0,date_gmt+1,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,...,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
0,2017-12-31 23:00:00+00:00,-755.4,-14932.6,6347.5,2096.3,4955,8059.2,1849.9,302.3,390,...,1531,1221.3,2986.1,27633.3,0,49692.7,19073.3,61.8,77,-5.27
1,2017-12-31 23:15:00+00:00,-955.4,-14932.6,5957.7,2075.8,4947.7,8108.8,1812.6,302.3,504.7,...,1531.2,1212.2,3005.6,28215.1,0,49135,17914.4,62.5,79.1,-5.27
2,2017-12-31 23:30:00+00:00,-1004.8,-14932.6,5755.1,2068.3,4942.7,8142,1774.4,302.3,504.7,...,1531.3,1221.3,2933.6,28588.8,0,48754.6,17232.3,62.9,80.3,-5.27
3,2017-12-31 23:45:00+00:00,-1047.8,-14932.6,5363.2,2008.7,4944.1,8183.3,1760.3,302,504.7,...,1531.5,1221.3,2799.1,28861.6,0,48560.8,16900.1,63.2,80.8,-5.27
4,2018-01-01 00:00:00+00:00,-1063.4,-15844.4,4943.3,1936.5,4935.1,8248.9,1692.5,303.2,504.7,...,1531.2,1219.7,2912.2,29198.3,0,48239.3,16128.8,63.6,82.1,-29.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207216,2023-11-29 09:45:00+00:00,-7.4,-7640.4,0.0,2514.7,4964.9,15629.6,11570.9,897.9,0,...,1472.4,977.7,1919.2,20001.2,2676.3,75245.6,50649,36.1,43.6,133.72
207217,2023-11-29 10:00:00+00:00,-784.5,-4959,0.0,2512.9,4983,15647.4,11554,896,0,...,1472.5,984.5,1760.7,19946.4,3177.1,75877.4,50993.1,36.5,43.6,130
207218,2023-11-29 10:15:00+00:00,-1030.9,-4962.3,0.0,2514.4,4989.6,15774.6,11633.1,811.2,0,...,1470.9,990.7,1595,19669.4,3323.9,75744.2,51156,36.3,43.3,130
207219,2023-11-29 10:30:00+00:00,-842.3,-4962.3,0.0,2513,5010.6,15818.4,12020.4,807.9,0,...,1470.9,989.2,1497.1,19455.6,3848.6,76131.1,51329.8,36.3,43.4,130


In [5]:
# Convert 'Date' to datetime with UTC timezone
electricity_df['date_gmt+1'] = pd.to_datetime(electricity_df['date_gmt+1'], utc=True)
electricity_df.set_index('date_gmt+1', inplace=True)

# Rename the index
electricity_df.index.rename('Date', inplace=True)

# Display the DataFrame
electricity_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  electricity_df['date_gmt+1'] = pd.to_datetime(electricity_df['date_gmt+1'], utc=True)


Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,...,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-31 23:00:00+00:00,-755.4,-14932.6,6347.5,2096.3,4955,8059.2,1849.9,302.3,390,4209.3,...,1531,1221.3,2986.1,27633.3,0,49692.7,19073.3,61.8,77,-5.27
2017-12-31 23:15:00+00:00,-955.4,-14932.6,5957.7,2075.8,4947.7,8108.8,1812.6,302.3,504.7,4215.7,...,1531.2,1212.2,3005.6,28215.1,0,49135,17914.4,62.5,79.1,-5.27
2017-12-31 23:30:00+00:00,-1004.8,-14932.6,5755.1,2068.3,4942.7,8142,1774.4,302.3,504.7,4214.4,...,1531.3,1221.3,2933.6,28588.8,0,48754.6,17232.3,62.9,80.3,-5.27
2017-12-31 23:45:00+00:00,-1047.8,-14932.6,5363.2,2008.7,4944.1,8183.3,1760.3,302,504.7,4187.4,...,1531.5,1221.3,2799.1,28861.6,0,48560.8,16900.1,63.2,80.8,-5.27
2018-01-01 00:00:00+00:00,-1063.4,-15844.4,4943.3,1936.5,4935.1,8248.9,1692.5,303.2,504.7,4173.8,...,1531.2,1219.7,2912.2,29198.3,0,48239.3,16128.8,63.6,82.1,-29.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,-7.4,-7640.4,0.0,2514.7,4964.9,15629.6,11570.9,897.9,0,27204.6,...,1472.4,977.7,1919.2,20001.2,2676.3,75245.6,50649,36.1,43.6,133.72
2023-11-29 10:00:00+00:00,-784.5,-4959,0.0,2512.9,4983,15647.4,11554,896,0,27137.5,...,1472.5,984.5,1760.7,19946.4,3177.1,75877.4,50993.1,36.5,43.6,130
2023-11-29 10:15:00+00:00,-1030.9,-4962.3,0.0,2514.4,4989.6,15774.6,11633.1,811.2,0,27078.8,...,1470.9,990.7,1595,19669.4,3323.9,75744.2,51156,36.3,43.3,130
2023-11-29 10:30:00+00:00,-842.3,-4962.3,0.0,2513,5010.6,15818.4,12020.4,807.9,0,27039.5,...,1470.9,989.2,1497.1,19455.6,3848.6,76131.1,51329.8,36.3,43.4,130


In [6]:
electricity_df.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 202608 entries, 2017-12-31 23:00:00+00:00 to 2023-11-29 10:45:00+00:00
Data columns (total 23 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   hydro_storage_in      202608 non-null  object 
 1   cross_border          202608 non-null  object 
 2   nuclear               202608 non-null  float64
 3   hydro                 202608 non-null  object 
 4   biomass               202608 non-null  object 
 5   lignite               202608 non-null  object 
 6   hard_coal             202608 non-null  object 
 7   oil                   202608 non-null  object 
 8   coal_gas              202608 non-null  object 
 9   nat_gas               202608 non-null  object 
 10  geothermal            202608 non-null  object 
 11  hydro_reservoir       202608 non-null  object 
 12  hydro_storage_out     202608 non-null  object 
 13  others                202608 non-null  object 
 14  waste 

In [7]:
pd.set_option('display.max_columns', None)
electricity_df


Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,-755.4,-14932.6,6347.5,2096.3,4955,8059.2,1849.9,302.3,390,4209.3,18.5,44.4,295.6,1531,1221.3,2986.1,27633.3,0,49692.7,19073.3,61.8,77,-5.27
2017-12-31 23:15:00+00:00,-955.4,-14932.6,5957.7,2075.8,4947.7,8108.8,1812.6,302.3,504.7,4215.7,18.6,45,180.7,1531.2,1212.2,3005.6,28215.1,0,49135,17914.4,62.5,79.1,-5.27
2017-12-31 23:30:00+00:00,-1004.8,-14932.6,5755.1,2068.3,4942.7,8142,1774.4,302.3,504.7,4214.4,18.6,41.9,145.1,1531.3,1221.3,2933.6,28588.8,0,48754.6,17232.3,62.9,80.3,-5.27
2017-12-31 23:45:00+00:00,-1047.8,-14932.6,5363.2,2008.7,4944.1,8183.3,1760.3,302,504.7,4187.4,18.5,48.5,295.5,1531.5,1221.3,2799.1,28861.6,0,48560.8,16900.1,63.2,80.8,-5.27
2018-01-01 00:00:00+00:00,-1063.4,-15844.4,4943.3,1936.5,4935.1,8248.9,1692.5,303.2,504.7,4173.8,18.4,46.9,624.8,1531.2,1219.7,2912.2,29198.3,0,48239.3,16128.8,63.6,82.1,-29.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,-7.4,-7640.4,0.0,2514.7,4964.9,15629.6,11570.9,897.9,0,27204.6,15.9,265.3,672.6,1472.4,977.7,1919.2,20001.2,2676.3,75245.6,50649,36.1,43.6,133.72
2023-11-29 10:00:00+00:00,-784.5,-4959,0.0,2512.9,4983,15647.4,11554,896,0,27137.5,16.3,209,269.4,1472.5,984.5,1760.7,19946.4,3177.1,75877.4,50993.1,36.5,43.6,130
2023-11-29 10:15:00+00:00,-1030.9,-4962.3,0.0,2514.4,4989.6,15774.6,11633.1,811.2,0,27078.8,16.3,219,323.3,1470.9,990.7,1595,19669.4,3323.9,75744.2,51156,36.3,43.3,130
2023-11-29 10:30:00+00:00,-842.3,-4962.3,0.0,2513,5010.6,15818.4,12020.4,807.9,0,27039.5,16.3,204,256.5,1470.9,989.2,1497.1,19455.6,3848.6,76131.1,51329.8,36.3,43.4,130


In [8]:
non_date_columns = electricity_df.columns

# Convert each column to floats
for column in non_date_columns:
    electricity_df[column] = pd.to_numeric(electricity_df[column], errors='coerce')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  electricity_df[column] = pd.to_numeric(electricity_df[column], errors='coerce')


In [9]:
electricity_df.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 202608 entries, 2017-12-31 23:00:00+00:00 to 2023-11-29 10:45:00+00:00
Data columns (total 23 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   hydro_storage_in      202608 non-null  float64
 1   cross_border          202608 non-null  float64
 2   nuclear               202608 non-null  float64
 3   hydro                 202608 non-null  float64
 4   biomass               202608 non-null  float64
 5   lignite               202608 non-null  float64
 6   hard_coal             202608 non-null  float64
 7   oil                   202608 non-null  float64
 8   coal_gas              202608 non-null  float64
 9   nat_gas               202608 non-null  float64
 10  geothermal            202608 non-null  float64
 11  hydro_reservoir       202608 non-null  float64
 12  hydro_storage_out     202608 non-null  float64
 13  others                202608 non-null  float64
 14  waste 

In [10]:
electricity_df


Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,-755.4,-14932.6,6347.5,2096.3,4955.0,8059.2,1849.9,302.3,390.0,4209.3,18.5,44.4,295.6,1531.0,1221.3,2986.1,27633.3,0.0,49692.7,19073.3,61.8,77.0,-5.27
2017-12-31 23:15:00+00:00,-955.4,-14932.6,5957.7,2075.8,4947.7,8108.8,1812.6,302.3,504.7,4215.7,18.6,45.0,180.7,1531.2,1212.2,3005.6,28215.1,0.0,49135.0,17914.4,62.5,79.1,-5.27
2017-12-31 23:30:00+00:00,-1004.8,-14932.6,5755.1,2068.3,4942.7,8142.0,1774.4,302.3,504.7,4214.4,18.6,41.9,145.1,1531.3,1221.3,2933.6,28588.8,0.0,48754.6,17232.3,62.9,80.3,-5.27
2017-12-31 23:45:00+00:00,-1047.8,-14932.6,5363.2,2008.7,4944.1,8183.3,1760.3,302.0,504.7,4187.4,18.5,48.5,295.5,1531.5,1221.3,2799.1,28861.6,0.0,48560.8,16900.1,63.2,80.8,-5.27
2018-01-01 00:00:00+00:00,-1063.4,-15844.4,4943.3,1936.5,4935.1,8248.9,1692.5,303.2,504.7,4173.8,18.4,46.9,624.8,1531.2,1219.7,2912.2,29198.3,0.0,48239.3,16128.8,63.6,82.1,-29.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,-7.4,-7640.4,0.0,2514.7,4964.9,15629.6,11570.9,897.9,0.0,27204.6,15.9,265.3,672.6,1472.4,977.7,1919.2,20001.2,2676.3,75245.6,50649.0,36.1,43.6,133.72
2023-11-29 10:00:00+00:00,-784.5,-4959.0,0.0,2512.9,4983.0,15647.4,11554.0,896.0,0.0,27137.5,16.3,209.0,269.4,1472.5,984.5,1760.7,19946.4,3177.1,75877.4,50993.1,36.5,43.6,130.00
2023-11-29 10:15:00+00:00,-1030.9,-4962.3,0.0,2514.4,4989.6,15774.6,11633.1,811.2,0.0,27078.8,16.3,219.0,323.3,1470.9,990.7,1595.0,19669.4,3323.9,75744.2,51156.0,36.3,43.3,130.00
2023-11-29 10:30:00+00:00,-842.3,-4962.3,0.0,2513.0,5010.6,15818.4,12020.4,807.9,0.0,27039.5,16.3,204.0,256.5,1470.9,989.2,1497.1,19455.6,3848.6,76131.1,51329.8,36.3,43.4,130.00


In [11]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler
scaler = StandardScaler()

# Fit the scaler to the data and transform
electricity_scaled = scaler.fit_transform(electricity_df)

# Convert the array back to a pandas DataFrame
electricity_scaled_df = pd.DataFrame(electricity_scaled, columns=electricity_df.columns, index=electricity_df.index)


In [12]:
electricity_scaled_df


Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,0.163187,-2.082295,0.150787,0.165885,0.405941,-0.917972,-1.032740,-1.401554,1.643448,-1.228916,0.230595,-0.856752,-0.483863,0.737850,0.382114,0.144950,1.834043,-0.645187,-0.950979,-1.591867,1.248237,1.628670,-0.965173
2017-12-31 23:15:00+00:00,-0.032721,-2.082295,0.013749,0.116759,0.380302,-0.906409,-1.042255,-1.401554,2.292131,-1.227543,0.253495,-0.850605,-0.622823,0.738390,0.343481,0.155485,1.898964,-0.645187,-1.003612,-1.679690,1.292751,1.738180,-0.965173
2017-12-31 23:30:00+00:00,-0.081111,-2.082295,-0.057477,0.098786,0.362740,-0.898670,-1.051999,-1.401554,2.292131,-1.227822,0.253495,-0.882363,-0.665878,0.738660,0.382114,0.116587,1.940664,-0.645187,-1.039512,-1.731380,1.318187,1.800757,-0.965173
2017-12-31 23:45:00+00:00,-0.123231,-2.082295,-0.195254,-0.044039,0.367657,-0.889042,-1.055595,-1.403613,2.292131,-1.233612,0.230595,-0.814748,-0.483983,0.739200,0.382114,0.043923,1.971104,-0.645187,-1.057802,-1.756554,1.337264,1.826830,-0.965173
2018-01-01 00:00:00+00:00,-0.138512,-2.236547,-0.342874,-0.217058,0.336047,-0.873749,-1.072890,-1.395378,2.292131,-1.236529,0.207695,-0.831140,-0.085726,0.738390,0.375321,0.105025,2.008675,-0.645187,-1.088143,-1.815003,1.362701,1.894622,-1.212148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,0.895882,-0.848653,-2.080745,1.168536,0.440713,0.846845,1.446882,2.685958,-0.562186,3.702497,-0.364800,1.406300,-0.027917,0.579646,-0.652070,-0.431445,0.982409,-0.346181,1.460566,0.800959,-0.386051,-0.113055,0.423462
2023-11-29 10:00:00+00:00,0.134682,-0.395032,-2.080745,1.164222,0.504285,0.850995,1.442571,2.672918,-0.562186,3.688107,-0.273201,0.829524,-0.515549,0.579916,-0.623201,-0.517075,0.976294,-0.290230,1.520191,0.827035,-0.360615,-0.113055,0.386296
2023-11-29 10:15:00+00:00,-0.106677,-0.395591,-2.080745,1.167817,0.527466,0.880648,1.462748,2.090949,-0.562186,3.675519,-0.273201,0.931971,-0.450362,0.575596,-0.596880,-0.606594,0.945384,-0.273828,1.507621,0.839380,-0.373333,-0.128699,0.386296
2023-11-29 10:30:00+00:00,0.078065,-0.395591,-2.080745,1.164462,0.601224,0.890858,1.561540,2.068301,-0.562186,3.667091,-0.273201,0.778301,-0.531150,0.575596,-0.603248,-0.659485,0.921527,-0.215207,1.544134,0.852550,-0.373333,-0.123484,0.386296


In [13]:
from feature_engin_electrcity import scale_electricity_data


In [14]:
electricity1 = scale_electricity_data(scaling_method='minmax')
electricity1


Loaded germany_electricity_generation_2018-2023.csv successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207223 entries, 0 to 207222
Data columns (total 25 columns):
 #   Column                            Non-Null Count   Dtype 
---  ------                            --------------   ----- 
 0   Date (GMT+1)                      207217 non-null  object
 1   Hydro pumped storage consumption  206839 non-null  object
 2   Cross border electricity trading  207223 non-null  object
 3   Nuclear                           204639 non-null  object
 4   Hydro Run-of-River                207223 non-null  object
 5   Biomass                           207223 non-null  object
 6   Fossil brown coal / lignite       202615 non-null  object
 7   Fossil hard coal                  207223 non-null  object
 8   Fossil oil                        207223 non-null  object
 9   Fossil coal-derived gas           55966 non-null   object
 10  Fossil gas                        207223 non-null  object
 11 

Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,0.854173,0.142562,0.666691,0.440771,0.612131,0.275608,0.068905,0.139807,0.633837,0.079402,0.583673,0.067943,0.054390,0.567601,0.652988,0.390800,0.624730,0.000000,0.345158,0.277050,0.684280,0.577158,0.360853
2017-12-31 23:15:00+00:00,0.815563,0.142562,0.625750,0.432105,0.608407,0.278133,0.066799,0.139807,0.820250,0.079618,0.587755,0.068884,0.033249,0.567714,0.647489,0.393352,0.637926,0.000000,0.335453,0.263494,0.693527,0.595466,0.360853
2017-12-31 23:30:00+00:00,0.806027,0.142562,0.604470,0.428934,0.605857,0.279824,0.064643,0.139807,0.820250,0.079574,0.587755,0.064020,0.026698,0.567770,0.652988,0.383929,0.646403,0.000000,0.328834,0.255516,0.698811,0.605929,0.360853
2017-12-31 23:45:00+00:00,0.797726,0.142562,0.563308,0.403737,0.606571,0.281926,0.063847,0.139645,0.820250,0.078662,0.583673,0.074376,0.054372,0.567882,0.652988,0.366326,0.652591,0.000000,0.325462,0.251630,0.702774,0.610288,0.360853
2018-01-01 00:00:00+00:00,0.794714,0.117971,0.519205,0.373214,0.601979,0.285266,0.060019,0.140295,0.820250,0.078202,0.579592,0.071866,0.114963,0.567714,0.652022,0.381128,0.660228,0.000000,0.319868,0.242608,0.708058,0.621622,0.342823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,0.998571,0.339230,0.000000,0.617655,0.617182,0.661041,0.617670,0.462556,0.000000,0.856567,0.477551,0.414561,0.123758,0.534617,0.505771,0.251171,0.451613,0.061432,0.789795,0.646390,0.344782,0.285963,0.462232
2023-11-29 10:00:00+00:00,0.848555,0.411546,0.000000,0.616894,0.626416,0.661947,0.616716,0.461526,0.000000,0.854299,0.493878,0.326220,0.049569,0.534673,0.509881,0.230428,0.450370,0.072927,0.800789,0.650415,0.350066,0.285963,0.459519
2023-11-29 10:15:00+00:00,0.800988,0.411457,0.000000,0.617528,0.629783,0.668423,0.621182,0.415574,0.000000,0.852315,0.493878,0.341911,0.059487,0.533772,0.513628,0.208742,0.444087,0.076297,0.798471,0.652320,0.347424,0.283348,0.459519
2023-11-29 10:30:00+00:00,0.837397,0.411457,0.000000,0.616936,0.640496,0.670653,0.643045,0.413786,0.000000,0.850987,0.493878,0.318374,0.047196,0.533772,0.512721,0.195930,0.439238,0.088341,0.805203,0.654353,0.347424,0.284220,0.459519


In [15]:
electricity2 = scale_electricity_data()
electricity2


Loaded germany_electricity_generation_2018-2023.csv successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207223 entries, 0 to 207222
Data columns (total 25 columns):
 #   Column                            Non-Null Count   Dtype 
---  ------                            --------------   ----- 
 0   Date (GMT+1)                      207217 non-null  object
 1   Hydro pumped storage consumption  206839 non-null  object
 2   Cross border electricity trading  207223 non-null  object
 3   Nuclear                           204639 non-null  object
 4   Hydro Run-of-River                207223 non-null  object
 5   Biomass                           207223 non-null  object
 6   Fossil brown coal / lignite       202615 non-null  object
 7   Fossil hard coal                  207223 non-null  object
 8   Fossil oil                        207223 non-null  object
 9   Fossil coal-derived gas           55966 non-null   object
 10  Fossil gas                        207223 non-null  object
 11 

Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,0.163187,-2.082295,0.150787,0.165885,0.405941,-0.917972,-1.032740,-1.401554,1.643448,-1.228916,0.230595,-0.856752,-0.483863,0.737850,0.382114,0.144950,1.834043,-0.645187,-0.950979,-1.591867,1.248237,1.628670,-0.965173
2017-12-31 23:15:00+00:00,-0.032721,-2.082295,0.013749,0.116759,0.380302,-0.906409,-1.042255,-1.401554,2.292131,-1.227543,0.253495,-0.850605,-0.622823,0.738390,0.343481,0.155485,1.898964,-0.645187,-1.003612,-1.679690,1.292751,1.738180,-0.965173
2017-12-31 23:30:00+00:00,-0.081111,-2.082295,-0.057477,0.098786,0.362740,-0.898670,-1.051999,-1.401554,2.292131,-1.227822,0.253495,-0.882363,-0.665878,0.738660,0.382114,0.116587,1.940664,-0.645187,-1.039512,-1.731380,1.318187,1.800757,-0.965173
2017-12-31 23:45:00+00:00,-0.123231,-2.082295,-0.195254,-0.044039,0.367657,-0.889042,-1.055595,-1.403613,2.292131,-1.233612,0.230595,-0.814748,-0.483983,0.739200,0.382114,0.043923,1.971104,-0.645187,-1.057802,-1.756554,1.337264,1.826830,-0.965173
2018-01-01 00:00:00+00:00,-0.138512,-2.236547,-0.342874,-0.217058,0.336047,-0.873749,-1.072890,-1.395378,2.292131,-1.236529,0.207695,-0.831140,-0.085726,0.738390,0.375321,0.105025,2.008675,-0.645187,-1.088143,-1.815003,1.362701,1.894622,-1.212148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,0.895882,-0.848653,-2.080745,1.168536,0.440713,0.846845,1.446882,2.685958,-0.562186,3.702497,-0.364800,1.406300,-0.027917,0.579646,-0.652070,-0.431445,0.982409,-0.346181,1.460566,0.800959,-0.386051,-0.113055,0.423462
2023-11-29 10:00:00+00:00,0.134682,-0.395032,-2.080745,1.164222,0.504285,0.850995,1.442571,2.672918,-0.562186,3.688107,-0.273201,0.829524,-0.515549,0.579916,-0.623201,-0.517075,0.976294,-0.290230,1.520191,0.827035,-0.360615,-0.113055,0.386296
2023-11-29 10:15:00+00:00,-0.106677,-0.395591,-2.080745,1.167817,0.527466,0.880648,1.462748,2.090949,-0.562186,3.675519,-0.273201,0.931971,-0.450362,0.575596,-0.596880,-0.606594,0.945384,-0.273828,1.507621,0.839380,-0.373333,-0.128699,0.386296
2023-11-29 10:30:00+00:00,0.078065,-0.395591,-2.080745,1.164462,0.601224,0.890858,1.561540,2.068301,-0.562186,3.667091,-0.273201,0.778301,-0.531150,0.575596,-0.603248,-0.659485,0.921527,-0.215207,1.544134,0.852550,-0.373333,-0.123484,0.386296


In [16]:
electricity3 = scale_electricity_data(scaling_method='robust')
electricity3


Loaded germany_electricity_generation_2018-2023.csv successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207223 entries, 0 to 207222
Data columns (total 25 columns):
 #   Column                            Non-Null Count   Dtype 
---  ------                            --------------   ----- 
 0   Date (GMT+1)                      207217 non-null  object
 1   Hydro pumped storage consumption  206839 non-null  object
 2   Cross border electricity trading  207223 non-null  object
 3   Nuclear                           204639 non-null  object
 4   Hydro Run-of-River                207223 non-null  object
 5   Biomass                           207223 non-null  object
 6   Fossil brown coal / lignite       202615 non-null  object
 7   Fossil hard coal                  207223 non-null  object
 8   Fossil oil                        207223 non-null  object
 9   Fossil coal-derived gas           55966 non-null   object
 10  Fossil gas                        207223 non-null  object
 11 

Unnamed: 0_level_0,hydro_storage_in,cross_border,nuclear,hydro,biomass,lignite,hard_coal,oil,coal_gas,nat_gas,geothermal,hydro_reservoir,hydro_storage_out,others,waste,wind_offshore,wind_onshore,solar,load,residual_load,renewable_share_gen,renewable_share_load,day_ahead_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2017-12-31 23:00:00+00:00,-0.173720,-1.410196,-0.085495,0.174175,0.231435,-0.757308,-0.539529,-1.851107,1.789812,-0.766098,0.177419,-0.438443,-0.052455,0.571341,0.232488,0.137383,1.607406,-0.010545,-0.575615,-1.179170,0.861224,1.209220,-0.801786
2017-12-31 23:15:00+00:00,-0.316935,-1.410196,-0.183397,0.141656,0.213500,-0.748704,-0.546026,-1.851107,2.316200,-0.765125,0.193548,-0.433678,-0.172040,0.571691,0.198031,0.143382,1.656337,-0.010545,-0.608288,-1.243483,0.889796,1.283688,-0.801786
2017-12-31 23:30:00+00:00,-0.352309,-1.410196,-0.234282,0.129759,0.201216,-0.742946,-0.552680,-1.851107,2.316200,-0.765323,0.193548,-0.458300,-0.209091,0.571865,0.232488,0.121231,1.687767,-0.010545,-0.630574,-1.281335,0.906122,1.326241,-0.801786
2017-12-31 23:45:00+00:00,-0.383101,-1.410196,-0.332712,0.035216,0.204656,-0.735782,-0.555137,-1.854125,2.316200,-0.769424,0.177419,-0.405878,-0.052559,0.572214,0.232488,0.079852,1.710710,-0.010545,-0.641928,-1.299770,0.918367,1.343972,-0.801786
2018-01-01 00:00:00+00:00,-0.394271,-1.517962,-0.438174,-0.079315,0.182544,-0.724403,-0.566947,-1.842052,2.316200,-0.771490,0.161290,-0.418586,0.290167,0.571691,0.226429,0.114647,1.739028,-0.010545,-0.660763,-1.342573,0.934694,1.390071,-1.141405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-29 09:45:00+00:00,0.361905,-0.548331,-1.679733,0.837881,0.255758,0.555852,1.153804,4.140845,0.000000,2.726877,-0.241935,1.316124,0.339916,0.469001,-0.689890,-0.190853,0.965522,0.280394,0.921416,0.573101,-0.187755,0.024823,1.107745
2023-11-29 10:00:00+00:00,-0.194558,-0.231416,-1.679733,0.835025,0.300227,0.558939,1.150860,4.121730,0.000000,2.716685,-0.177419,0.868944,-0.079723,0.469176,-0.664142,-0.239617,0.960913,0.334835,0.958431,0.592196,-0.171429,0.024823,1.056637
2023-11-29 10:15:00+00:00,-0.370999,-0.231806,-1.679733,0.837405,0.316442,0.581004,1.164639,3.268612,0.000000,2.707768,-0.177419,0.948372,-0.023626,0.466381,-0.640666,-0.290595,0.937616,0.350794,0.950627,0.601236,-0.179592,0.014184,1.056637
2023-11-29 10:30:00+00:00,-0.235947,-0.231806,-1.679733,0.835184,0.368036,0.588601,1.232104,3.235412,0.000000,2.701798,-0.177419,0.829230,-0.093149,0.466381,-0.646346,-0.320714,0.919635,0.407834,0.973294,0.610881,-0.179592,0.017730,1.056637
