In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [14]:
#read raw data csv file
data = pd.read_csv('data/Renewable_raw.csv')

In [15]:
#data overview
data.head()

Unnamed: 0,Time,Energy delta[Wh],GHI,temp,pressure,humidity,wind_speed,rain_1h,snow_1h,clouds_all,isSun,sunlightTime,dayLength,SunlightTime/daylength,weather_type,hour,month
0,2017-01-01 00:00:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
1,2017-01-01 00:15:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
2,2017-01-01 00:30:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
3,2017-01-01 00:45:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
4,2017-01-01 01:00:00,0,0.0,1.7,1020,100,5.2,0.0,0.0,100,0,0,450,0.0,4,1,1


In [16]:
print(data)

                       Time  Energy delta[Wh]   GHI  temp  pressure  humidity  \
0       2017-01-01 00:00:00                 0   0.0   1.6      1021       100   
1       2017-01-01 00:15:00                 0   0.0   1.6      1021       100   
2       2017-01-01 00:30:00                 0   0.0   1.6      1021       100   
3       2017-01-01 00:45:00                 0   0.0   1.6      1021       100   
4       2017-01-01 01:00:00                 0   0.0   1.7      1020       100   
...                     ...               ...   ...   ...       ...       ...   
196771  2022-08-31 16:45:00               118  23.7  18.6      1023        57   
196772  2022-08-31 17:00:00                82  15.6  18.5      1023        61   
196773  2022-08-31 17:15:00                51   8.0  18.5      1023        61   
196774  2022-08-31 17:30:00                24   2.1  18.5      1023        61   
196775  2022-08-31 17:45:00                 0   0.0  18.5      1023        61   

        wind_speed  rain_1h

In [6]:
#Column explanation

#Energy delta - The difference in energy consumption in Watt-hours (Wh) from the previous timestamp to the current timestamp.
#GHI - Global Horizontal Irradiance in Watts per square meter (W/m²) measured by a pyranometer
#rain_1h - The amount of precipitation in millimeters (mm) measured over the past hour.
#snow_1h - The amount of snowfall in millimeters

In [7]:
data.dtypes

Time                       object
Energy delta[Wh]            int64
GHI                       float64
temp                      float64
pressure                    int64
humidity                    int64
wind_speed                float64
rain_1h                   float64
snow_1h                   float64
clouds_all                  int64
isSun                       int64
sunlightTime                int64
dayLength                   int64
SunlightTime/daylength    float64
weather_type                int64
hour                        int64
month                       int64
dtype: object

In [8]:
data.describe()

Unnamed: 0,Energy delta[Wh],GHI,temp,pressure,humidity,wind_speed,rain_1h,snow_1h,clouds_all,isSun,sunlightTime,dayLength,SunlightTime/daylength,weather_type,hour,month
count,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0,196776.0
mean,573.008228,32.596538,9.790521,1015.29278,79.810566,3.937746,0.066035,0.007148,65.974387,0.519962,211.721094,748.644347,0.265187,3.198398,11.498902,6.298329
std,1044.824047,52.172018,7.995428,9.585773,15.604459,1.821694,0.278913,0.06971,36.628593,0.499603,273.902186,194.870208,0.329023,1.289939,6.921887,3.376066
min,0.0,0.0,-16.6,977.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,450.0,0.0,1.0,0.0,1.0
25%,0.0,0.0,3.6,1010.0,70.0,2.6,0.0,0.0,34.0,0.0,0.0,570.0,0.0,2.0,5.0,3.0
50%,0.0,1.6,9.3,1016.0,84.0,3.7,0.0,0.0,82.0,1.0,30.0,765.0,0.05,4.0,11.0,6.0
75%,577.0,46.8,15.7,1021.0,92.0,5.0,0.0,0.0,100.0,1.0,390.0,930.0,0.53,4.0,17.0,9.0
max,5020.0,229.2,35.8,1047.0,100.0,14.3,8.09,2.82,100.0,1.0,1020.0,1020.0,1.0,5.0,23.0,12.0


In [17]:
#handling missing value
data = data.dropna()

In [18]:
data.isnull().sum()

Time                      0
Energy delta[Wh]          0
GHI                       0
temp                      0
pressure                  0
humidity                  0
wind_speed                0
rain_1h                   0
snow_1h                   0
clouds_all                0
isSun                     0
sunlightTime              0
dayLength                 0
SunlightTime/daylength    0
weather_type              0
hour                      0
month                     0
dtype: int64

In [19]:
print(data)

                       Time  Energy delta[Wh]   GHI  temp  pressure  humidity  \
0       2017-01-01 00:00:00                 0   0.0   1.6      1021       100   
1       2017-01-01 00:15:00                 0   0.0   1.6      1021       100   
2       2017-01-01 00:30:00                 0   0.0   1.6      1021       100   
3       2017-01-01 00:45:00                 0   0.0   1.6      1021       100   
4       2017-01-01 01:00:00                 0   0.0   1.7      1020       100   
...                     ...               ...   ...   ...       ...       ...   
196771  2022-08-31 16:45:00               118  23.7  18.6      1023        57   
196772  2022-08-31 17:00:00                82  15.6  18.5      1023        61   
196773  2022-08-31 17:15:00                51   8.0  18.5      1023        61   
196774  2022-08-31 17:30:00                24   2.1  18.5      1023        61   
196775  2022-08-31 17:45:00                 0   0.0  18.5      1023        61   

        wind_speed  rain_1h

In [21]:
#feature engineering - convert Time datatype to datatime
data['Time'] = pd.to_datetime(data['Time'])

In [22]:
data.head()

Unnamed: 0,Time,Energy delta[Wh],GHI,temp,pressure,humidity,wind_speed,rain_1h,snow_1h,clouds_all,isSun,sunlightTime,dayLength,SunlightTime/daylength,weather_type,hour,month
0,2017-01-01 00:00:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
1,2017-01-01 00:15:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
2,2017-01-01 00:30:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
3,2017-01-01 00:45:00,0,0.0,1.6,1021,100,4.9,0.0,0.0,100,0,0,450,0.0,4,0,1
4,2017-01-01 01:00:00,0,0.0,1.7,1020,100,5.2,0.0,0.0,100,0,0,450,0.0,4,1,1


In [23]:
data.dtypes

Time                      datetime64[ns]
Energy delta[Wh]                   int64
GHI                              float64
temp                             float64
pressure                           int64
humidity                           int64
wind_speed                       float64
rain_1h                          float64
snow_1h                          float64
clouds_all                         int64
isSun                              int64
sunlightTime                       int64
dayLength                          int64
SunlightTime/daylength           float64
weather_type                       int64
hour                               int64
month                              int64
dtype: object