In [1]:
import catboost as cb
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
df=pd.read_csv("Bruska_data.csv")
df.head()

Unnamed: 0,time stamp,Bruska_wind speed,Bruska_wind direction,Bruska_air density,Zelengrad_wind speed,Zelengrad_wind direction,Zelengrad_air density,Bruska_active power output,Bruska_normally operating number
0,2018-01-01 00:00:00,9.363334,172.79999,1.21,10.635238,165.28897,1.207,21.608133,14.0
1,2018-01-01 01:00:00,9.266666,174.9999,1.21,10.261664,166.09566,1.207,21.05086,14.0
2,2018-01-01 02:00:00,9.586667,174.19984,1.209,13.286666,161.92586,1.207,21.25793,14.0
3,2018-01-01 03:00:00,9.32,167.53835,1.208,14.777143,159.17044,1.206,20.700977,14.0
4,2018-01-01 04:00:00,9.267742,160.9163,1.207,11.447466,151.64941,1.205,20.331597,14.0


In [3]:
df.shape

(17352, 9)

In [4]:
df.isnull().sum()

time stamp                            0
Bruska_wind speed                     0
Bruska_wind direction                 0
Bruska_air density                    0
Zelengrad_wind speed                  0
Zelengrad_wind direction              0
Zelengrad_air density                 0
Bruska_active power output          163
Bruska_normally operating number    163
dtype: int64

In [5]:
df.dropna(inplace=True)

In [6]:
df.shape

(17189, 9)

In [7]:
df.drop(['time stamp'], axis = 1, inplace = True) 

In [8]:
df.drop(['Bruska_normally operating number'], axis = 1, inplace = True) 

In [9]:
df.describe()

Unnamed: 0,Bruska_wind speed,Bruska_wind direction,Bruska_air density,Zelengrad_wind speed,Zelengrad_wind direction,Zelengrad_air density,Bruska_active power output
count,17189.0,17189.0,17189.0,17189.0,17189.0,17189.0,17189.0
mean,7.406829,124.526991,1.175567,7.484516,133.140676,1.173686,12.986623
std,4.659973,106.650245,0.035333,5.126521,92.257722,0.036053,12.696268
min,0.503352,0.0,1.093001,0.572682,0.420131,1.091,0.0
25%,3.754839,32.92855,1.147,3.405714,45.913185,1.145,1.23161
50%,6.476667,87.79958,1.174,6.075128,111.267845,1.172,8.279612
75%,10.194118,201.17917,1.2,10.486428,208.01765,1.198,24.654873
max,27.91893,359.94537,1.291999,30.919079,359.07263,1.29,35.967899


In [10]:
s1= MinMaxScaler(feature_range=(0,1))
X= s1.fit_transform(df)

In [11]:
s2=MinMaxScaler(feature_range=(0,1))
y= s2.fit_transform(df[['Bruska_active power output']])

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,random_state=42,shuffle=False)

In [13]:
train_dataset = cb.Pool(X_train, y_train) 
test_dataset = cb.Pool(X_test, y_test)

In [14]:
model = cb.CatBoostRegressor(learning_rate=0.3,max_depth=2,l2_leaf_reg=0.2,iterations=1000)

In [15]:
import time
start=time.time()
model.fit(X_train,y_train)
end=time.time()
continental=end-start
print("convergence time:",continental)

0:	learn: 0.2591969	total: 144ms	remaining: 2m 23s
1:	learn: 0.1909529	total: 149ms	remaining: 1m 14s
2:	learn: 0.1405046	total: 153ms	remaining: 50.9s
3:	learn: 0.1050689	total: 156ms	remaining: 38.9s
4:	learn: 0.0792801	total: 159ms	remaining: 31.6s
5:	learn: 0.0625684	total: 162ms	remaining: 26.8s
6:	learn: 0.0486842	total: 164ms	remaining: 23.3s
7:	learn: 0.0392170	total: 167ms	remaining: 20.7s
8:	learn: 0.0328966	total: 170ms	remaining: 18.7s
9:	learn: 0.0293823	total: 174ms	remaining: 17.2s
10:	learn: 0.0263694	total: 176ms	remaining: 15.9s
11:	learn: 0.0249192	total: 179ms	remaining: 14.7s
12:	learn: 0.0238612	total: 181ms	remaining: 13.8s
13:	learn: 0.0232934	total: 184ms	remaining: 12.9s
14:	learn: 0.0227567	total: 187ms	remaining: 12.3s
15:	learn: 0.0217880	total: 190ms	remaining: 11.7s
16:	learn: 0.0215537	total: 192ms	remaining: 11.1s
17:	learn: 0.0209642	total: 195ms	remaining: 10.6s
18:	learn: 0.0208063	total: 197ms	remaining: 10.2s
19:	learn: 0.0203436	total: 200ms	remai

In [16]:

import time
start=time.time()
pred = model.predict(X_test)
end=time.time()
continental=end-start
print("inference time:",continental)

inference time: 0.05224347114562988


In [17]:
pred

array([ 1.71866481e-03, -1.35170642e-04, -6.66846762e-05, ...,
        3.00671644e-02,  4.95145354e-02,  4.79059763e-03])

In [18]:
prediction = pred.reshape(pred.shape[0], 1)

In [19]:
prediction.shape

(3438, 1)

In [20]:
Pred= s2.inverse_transform(prediction)

In [21]:
Actual= s2.inverse_transform(y_test)

In [22]:
x=Pred
z=Actual

In [23]:
from sklearn.metrics import mean_squared_error
import math

 
MSE = mean_squared_error(z,x)
 
RMSE = math.sqrt(MSE)
from sklearn.metrics import mean_absolute_error
MAE=mean_absolute_error(z,x)
MAE
from sklearn.metrics import mean_absolute_percentage_error
MAPE=mean_absolute_percentage_error(z,x)
MAPE

print("Root Mean Square Error:",RMSE)
print("Mean Square Error:", MSE)
print("Mean Absolute Error:",MAE)
print("Mean Absolute Percentage Error:",MAPE)

Root Mean Square Error: 0.05201533639621567
Mean Square Error: 0.002705595220411478
Mean Absolute Error: 0.03488376387147053
Mean Absolute Percentage Error: 2310436714625.806


In [24]:
max= x.max()
max

35.834639699046264

In [25]:
min=x.min()
min

-0.13623523771200496

In [26]:
NRMSE= (RMSE/(max-min))*100
print("Normalized Root Mean Square Error:",NRMSE)

Normalized Root Mean Square Error: 0.1446040344797444


In [27]:
Calculated = pd.DataFrame(Pred, columns = ['Prediction'])

In [28]:
Actual = pd.DataFrame(Actual, columns = ['Actual'])

In [29]:
combined_df = pd.concat([Calculated, Actual], axis=1)

In [31]:
combined_df.to_csv(r'C:\Users\sunrise\Desktop\Wind power forecasting\Bruska\CatBoost\CatBoost_output.csv', index = False)