## WIND POWER GENERATION FORECASTING

In [1]:
# importing all the necessary libraries
import pandas as pd # Data Manipulation
import numpy as np # Numerical Python - Linear Algebra
import matplotlib.pyplot as plt # Visualization Lib
import seaborn as sns
from sklearn.model_selection import train_test_split # sklearn - ML
from sklearn.preprocessing import StandardScaler # Scaling

In [2]:
# Load the csv (Dataset) files
location1=pd.read_csv('Location1.csv')
location2=pd.read_csv('Location2.csv')
location3=pd.read_csv('Location3.csv')
location4=pd.read_csv('Location4.csv')

In [3]:
# See the location 1
location1.head() # Show you top 5 rows

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power
0,2017-01-02 00:00:00,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635
1,2017-01-02 01:00:00,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424
2,2017-01-02 02:00:00,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214
3,2017-01-02 03:00:00,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003
4,2017-01-02 04:00:00,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793


In [4]:
# Add a new column to identify the location
location1['Location']='Location1'
location2['Location']='Location2'
location3['Location']='Location3'
location4['Location']='Location4'

# Concatenate to a dataframe
merged_data=pd.concat([location1, location2, location3, location4], ignore_index=True)

merged_data.head()

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location
0,2017-01-02 00:00:00,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635,Location1
1,2017-01-02 01:00:00,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424,Location1
2,2017-01-02 02:00:00,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214,Location1
3,2017-01-02 03:00:00,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003,Location1
4,2017-01-02 04:00:00,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793,Location1


In [5]:
# Ssave the merged dataset to a new CSV file
merged_data.to_csv('merged_locations.csv', index=False)

In [6]:
# To see the basic information about the dataset
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175200 entries, 0 to 175199
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   Time                 175200 non-null  object 
 1   temperature_2m       175200 non-null  float64
 2   relativehumidity_2m  175200 non-null  int64  
 3   dewpoint_2m          175200 non-null  float64
 4   windspeed_10m        175200 non-null  float64
 5   windspeed_100m       175200 non-null  float64
 6   winddirection_10m    175200 non-null  int64  
 7   winddirection_100m   175200 non-null  int64  
 8   windgusts_10m        175200 non-null  float64
 9   Power                175200 non-null  float64
 10  Location             175200 non-null  object 
dtypes: float64(6), int64(3), object(2)
memory usage: 14.7+ MB


In [7]:
# To check the statistics of Dataset
merged_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
temperature_2m,175200.0,46.655994,22.012738,-31.2,30.2,46.9,65.3,101.7
relativehumidity_2m,175200.0,70.155291,17.326678,9.0,57.0,72.0,84.0,100.0
dewpoint_2m,175200.0,36.516241,20.933153,-36.2,21.9,36.2,54.6,78.7
windspeed_10m,175200.0,4.157639,2.012682,0.0,2.63,3.85,5.38,18.53
windspeed_100m,175200.0,6.879334,3.043964,0.0,4.7,6.68,8.8,24.59
winddirection_10m,175200.0,201.589446,99.63904,1.0,130.0,211.0,288.0,360.0
winddirection_100m,175200.0,201.53907,100.683777,0.0,129.0,211.0,290.0,360.0
windgusts_10m,175200.0,8.038205,3.617488,0.5,5.3,7.6,10.3,29.2
Power,175200.0,0.303429,0.257325,0.0,0.0877,0.2331,0.4742,0.9994


In [8]:
# Check the missing values
merged_data.isnull().sum()

Time                   0
temperature_2m         0
relativehumidity_2m    0
dewpoint_2m            0
windspeed_10m          0
windspeed_100m         0
winddirection_10m      0
winddirection_100m     0
windgusts_10m          0
Power                  0
Location               0
dtype: int64

In [9]:
# To check the duplicated rows
merged_data.duplicated().sum()

np.int64(0)

In [10]:
# Encode the categorical variables
merged_data=pd.get_dummies(merged_data, columns=['Location'], drop_first=True)
merged_data.head()

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location_Location2,Location_Location3,Location_Location4
0,2017-01-02 00:00:00,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635,False,False,False
1,2017-01-02 01:00:00,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424,False,False,False
2,2017-01-02 02:00:00,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214,False,False,False
3,2017-01-02 03:00:00,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003,False,False,False
4,2017-01-02 04:00:00,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793,False,False,False


In [11]:
merged_data.columns

Index(['Time', 'temperature_2m', 'relativehumidity_2m', 'dewpoint_2m',
       'windspeed_10m', 'windspeed_100m', 'winddirection_10m',
       'winddirection_100m', 'windgusts_10m', 'Power', 'Location_Location2',
       'Location_Location3', 'Location_Location4'],
      dtype='object')

In [12]:
# Let's remove time col
merged_data.drop('Time', axis=1, inplace=True)

In [13]:
merged_data.head()

Unnamed: 0,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location_Location2,Location_Location3,Location_Location4
0,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635,False,False,False
1,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424,False,False,False
2,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214,False,False,False
3,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003,False,False,False
4,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793,False,False,False


In [14]:
location2.head()

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location
0,2017-01-02 00:00:00,14.5,91,12.4,6.37,9.58,68,72,9.9,0.2574,Location2
1,2017-01-02 01:00:00,14.5,92,12.7,6.46,9.23,68,72,9.5,0.2535,Location2
2,2017-01-02 02:00:00,14.7,92,12.7,6.18,9.27,67,74,9.4,0.2497,Location2
3,2017-01-02 03:00:00,14.7,92,12.8,5.91,9.08,62,69,9.1,0.2458,Location2
4,2017-01-02 04:00:00,14.8,92,12.9,5.88,8.72,58,65,8.7,0.2419,Location2


In [15]:
location3.head()

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location
0,2017-01-02 00:00:00,29.7,55,15.4,4.96,8.46,124,129,8.8,0.3438,Location3
1,2017-01-02 01:00:00,29.6,56,15.6,4.97,8.43,120,124,8.5,0.3954,Location3
2,2017-01-02 02:00:00,29.8,56,16.1,5.69,9.18,121,123,9.7,0.4471,Location3
3,2017-01-02 03:00:00,29.8,58,16.8,5.49,8.88,123,126,9.9,0.4988,Location3
4,2017-01-02 04:00:00,30.2,59,17.8,5.19,8.36,124,127,9.4,0.5504,Location3


In [16]:
location4.head()

Unnamed: 0,Time,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location
0,2017-01-02 00:00:00,22.7,82,18.0,3.21,7.6,86,90,5.3,0.3047,Location4
1,2017-01-02 01:00:00,22.0,82,17.4,3.4,7.92,88,94,5.7,0.3516,Location4
2,2017-01-02 02:00:00,21.7,82,17.1,3.81,8.41,87,93,6.4,0.3985,Location4
3,2017-01-02 03:00:00,21.7,85,17.8,3.7,8.26,90,97,6.4,0.4454,Location4
4,2017-01-02 04:00:00,22.4,88,19.5,3.85,8.5,81,91,6.6,0.4922,Location4


In [18]:
merged_data.head()

Unnamed: 0,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location_Location2,Location_Location3,Location_Location4
0,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635,False,False,False
1,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424,False,False,False
2,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214,False,False,False
3,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003,False,False,False
4,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793,False,False,False


In [19]:
merged_data.tail()

Unnamed: 0,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location_Location2,Location_Location3,Location_Location4
175195,25.5,94,24.0,4.01,6.66,4,8,8.2,0.3417,False,False,True
175196,25.3,92,23.2,4.7,7.52,1,5,7.9,0.3342,False,False,True
175197,24.9,90,22.5,4.61,7.57,4,8,7.9,0.3267,False,False,True
175198,24.9,89,22.1,4.6,7.44,2,6,7.8,0.3192,False,False,True
175199,24.9,88,21.8,4.65,7.77,9,12,7.8,0.3117,False,False,True


In [20]:
merged_data.head(10)

Unnamed: 0,temperature_2m,relativehumidity_2m,dewpoint_2m,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,Power,Location_Location2,Location_Location3,Location_Location4
0,28.5,85,24.5,1.44,1.26,146,162,1.4,0.1635,False,False,False
1,28.4,86,24.7,2.06,3.99,151,158,4.4,0.1424,False,False,False
2,26.8,91,24.5,1.3,2.78,148,150,3.2,0.1214,False,False,False
3,27.4,88,24.3,1.3,2.69,58,105,1.6,0.1003,False,False,False
4,27.3,88,24.1,2.47,4.43,58,84,4.0,0.0793,False,False,False
5,27.6,90,25.2,2.73,5.59,66,80,4.5,0.0778,False,False,False
6,26.7,92,24.9,2.1,5.82,65,78,4.7,0.0959,False,False,False
7,28.4,91,26.1,3.1,6.24,69,78,4.8,0.1141,False,False,False
8,30.2,88,27.0,3.54,6.89,74,81,5.8,0.1322,False,False,False
9,34.0,82,29.1,3.44,6.01,82,86,6.5,0.1503,False,False,False
