In [12]:
# Standart libraries
import os

# Third-party libraries
import pandas as pd

# Machine learning libraries
from sklearn.preprocessing import StandardScaler
import joblib

# Enable auto-reload for modules during development
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Data path
file_path = "../data"
file = os.path.join(file_path, 'full_data_uk_merged_with_price.csv')

In [4]:
# Reading the data and droping price for now
dataframe = pd.read_csv(file)
df = dataframe.copy()
df = df.drop(columns=[col for col in df.columns if 'price' in col.lower()])

In [5]:
# Setting time as index
df['settlement_date'] = pd.to_datetime(df['settlement_date'], format='%Y-%m-%d %H:%M:%S')
df = df.set_index('settlement_date')

In [6]:
df.head()

Unnamed: 0_level_0,settlement_period,nd,tsd,is_bank_holiday,gas,coal,nuclear,wind,wind_emb,hydro,...,wind_speed_100m_km/h_lag_1_weeks,carbon_intensity_lag_1_weeks,year,day,month,day_of_week,season_Autumn,season_Spring,season_Summer,season_Winter
settlement_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-08 00:00:00,1,24585,26016,0,4566.0,0.0,6551.0,11618.0,2424.0,398,...,41.9,94.0,2019,8,1,2,0,0,0,1
2019-01-08 00:30:00,2,25066,26342,0,4899.0,0.0,6547.0,11572.0,2440.0,397,...,41.9,99.0,2019,8,1,2,0,0,0,1
2019-01-08 01:00:00,3,24841,26312,0,5034.0,0.0,6555.0,11409.0,2444.0,396,...,43.4,97.0,2019,8,1,2,0,0,0,1
2019-01-08 01:30:00,4,24193,25802,0,4455.0,0.0,6571.0,11463.0,2444.0,404,...,43.4,90.0,2019,8,1,2,0,0,0,1
2019-01-08 02:00:00,5,23792,25895,0,4589.0,0.0,6582.0,11487.0,2443.0,403,...,41.8,91.0,2019,8,1,2,0,0,0,1


In [13]:
# Scaling the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
df_scaled = pd.DataFrame(df_scaled, columns=df.columns, index=df.index)

# Saving the scaled data
df_scaled.to_csv(os.path.join(file_path, 'full_data_uk_scaled.csv'), index=True)

# Saving the scaler for later use
scaler_file = os.path.join(file_path, 'scaler.pkl')
joblib.dump(scaler, scaler_file)

df_scaled.head()

Unnamed: 0_level_0,settlement_period,nd,tsd,is_bank_holiday,gas,coal,nuclear,wind,wind_emb,hydro,...,wind_speed_100m_km/h_lag_1_weeks,carbon_intensity_lag_1_weeks,year,day,month,day_of_week,season_Autumn,season_Spring,season_Summer,season_Winter
settlement_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-08 00:00:00,-1.696331,-0.445172,-0.524362,-0.148368,-1.170435,-0.618874,1.666398,1.268298,0.742915,-0.038494,...,1.664755,-1.103419,-1.466864,-0.883241,-1.535988,-0.999984,-0.561958,-0.586974,-0.56602,1.68223
2019-01-08 00:30:00,-1.624146,-0.369646,-0.470695,-0.148368,-1.111619,-0.618874,1.662102,1.256762,0.75856,-0.042601,...,1.664755,-1.026098,-1.466864,-0.883241,-1.535988,-0.999984,-0.561958,-0.586974,-0.56602,1.68223
2019-01-08 01:00:00,-1.55196,-0.404975,-0.475633,-0.148368,-1.087775,-0.618874,1.670695,1.215886,0.762472,-0.046708,...,1.795315,-1.057026,-1.466864,-0.883241,-1.535988,-0.999984,-0.561958,-0.586974,-0.56602,1.68223
2019-01-08 01:30:00,-1.479775,-0.506724,-0.559592,-0.148368,-1.190041,-0.618874,1.687882,1.229428,0.762472,-0.013852,...,1.795315,-1.165275,-1.466864,-0.883241,-1.535988,-0.999984,-0.561958,-0.586974,-0.56602,1.68223
2019-01-08 02:00:00,-1.407589,-0.569689,-0.544282,-0.148368,-1.166373,-0.618874,1.699697,1.235446,0.761494,-0.017959,...,1.656051,-1.149811,-1.466864,-0.883241,-1.535988,-0.999984,-0.561958,-0.586974,-0.56602,1.68223
