In [1]:
import catboost as cb
import numpy as np
import pandas as pd
import seaborn as sns
import shap
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import MinMaxScaler
import tqdm as nootbook_tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df=pd.read_csv("sunny_cluster.csv")
df.head()

Unnamed: 0,Clearsky DHI,GHI,cluster,Temperature,Clearsky DNI,Clearsky GHI,Cloud Type,DHI,DNI,Solar Zenith Angle,Relative Humidity,Wind Speed
0,113,398,1,16.2,816,562,2,240,287,56.64,67.65,7.1
1,105,362,1,16.3,846,578,2,258,186,55.96,68.41,7.1
2,97,528,1,16.4,833,528,0,97,833,58.86,68.73,7.1
3,84,421,1,16.4,792,421,0,84,792,64.84,68.6,6.9
4,105,498,1,16.0,804,498,1,105,804,60.73,57.23,8.1


In [3]:
df.shape

(2472, 12)

In [4]:
df.isnull().sum()

Clearsky DHI          0
GHI                   0
cluster               0
Temperature           0
Clearsky DNI          0
Clearsky GHI          0
Cloud Type            0
DHI                   0
DNI                   0
Solar Zenith Angle    0
Relative Humidity     0
Wind Speed            0
dtype: int64

In [5]:
df.drop(['cluster'], axis = 1, inplace = True) 

In [6]:
df.describe()

Unnamed: 0,Clearsky DHI,GHI,Temperature,Clearsky DNI,Clearsky GHI,Cloud Type,DHI,DNI,Solar Zenith Angle,Relative Humidity,Wind Speed
count,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0,2472.0
mean,159.983414,662.377427,24.368487,746.915453,696.968042,1.055825,182.382686,667.106796,42.631667,55.488612,3.898867
std,69.310638,195.74336,5.992083,140.269572,182.697742,2.011732,89.443809,229.13432,14.978986,11.649103,1.944037
min,69.0,300.0,12.2,122.0,321.0,0.0,69.0,16.0,10.48,18.48,0.2
25%,115.0,490.0,19.2,681.0,554.0,0.0,118.0,549.0,31.9975,48.39,2.5
50%,142.0,647.0,25.4,775.0,691.0,0.0,152.0,745.0,45.035,56.24,3.7
75%,180.25,829.0,29.425,844.0,850.0,1.0,225.25,836.0,55.2625,63.64,5.0
max,555.0,1042.0,38.0,1001.0,1042.0,8.0,555.0,1001.0,68.46,81.62,12.8


In [7]:
s1= MinMaxScaler(feature_range=(0,1))
X= s1.fit_transform(df)

In [8]:
s2=MinMaxScaler(feature_range=(0,1))
y= s2.fit_transform(df[['GHI']])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42,shuffle=False)

In [10]:
train_dataset = cb.Pool(X_train, y_train) 
test_dataset = cb.Pool(X_test, y_test)

In [11]:
model = cb.CatBoostRegressor(learning_rate=0.1,depth=6,l2_leaf_reg=0.2,iterations=400)

In [12]:
import time
start=time.time()
model.fit(X_train,y_train)
end=time.time()
continental=end-start
print(continental)

0:	learn: 0.2420965	total: 138ms	remaining: 54.9s
1:	learn: 0.2191958	total: 145ms	remaining: 28.8s
2:	learn: 0.1987500	total: 151ms	remaining: 20s
3:	learn: 0.1803372	total: 156ms	remaining: 15.5s
4:	learn: 0.1636129	total: 162ms	remaining: 12.8s
5:	learn: 0.1484527	total: 168ms	remaining: 11s
6:	learn: 0.1342815	total: 173ms	remaining: 9.72s
7:	learn: 0.1217034	total: 181ms	remaining: 8.85s
8:	learn: 0.1108306	total: 185ms	remaining: 8.04s
9:	learn: 0.1008144	total: 189ms	remaining: 7.38s
10:	learn: 0.0919128	total: 193ms	remaining: 6.83s
11:	learn: 0.0835670	total: 198ms	remaining: 6.4s
12:	learn: 0.0758620	total: 202ms	remaining: 6.01s
13:	learn: 0.0691786	total: 206ms	remaining: 5.67s
14:	learn: 0.0630584	total: 212ms	remaining: 5.45s
15:	learn: 0.0575435	total: 216ms	remaining: 5.19s
16:	learn: 0.0525625	total: 220ms	remaining: 4.96s
17:	learn: 0.0480459	total: 227ms	remaining: 4.81s
18:	learn: 0.0439246	total: 231ms	remaining: 4.62s
19:	learn: 0.0400838	total: 235ms	remaining: 4

In [13]:

import time
start=time.time()
pred = model.predict(X_test)
end=time.time()
continental=end-start
print("Inference time:",continental)

Inference time: 0.007997751235961914


In [14]:
pred

array([0.18200792, 0.29970938, 0.41760971, 0.6553479 , 0.65669191,
       0.56243058, 0.35021082, 0.05203289, 0.1580431 , 0.38113898,
       0.58934268, 0.6769372 , 0.66775311, 0.57208615, 0.41205042,
       0.17930508, 0.40161258, 0.48667901, 0.59412351, 0.64872209,
       0.55395202, 0.24879981, 0.14181934, 0.17826569, 0.40371211,
       0.59754049, 0.68198457, 0.6757251 , 0.5990309 , 0.43194764,
       0.18551721, 0.20756553, 0.44030367, 0.61886146, 0.70738321,
       0.63691409, 0.60862518, 0.43482209, 0.20143993, 0.23409011,
       0.61779154, 0.69893622, 0.69926018, 0.61790319, 0.44030841,
       0.20157685, 0.21338311, 0.37637217, 0.42204026, 0.16723375,
       0.17403504, 0.09550315, 0.31434427, 0.60397453, 0.68671347,
       0.68634748, 0.60081142, 0.42112573, 0.18362889, 0.19945641,
       0.43796875, 0.62031018, 0.698843  , 0.69523721, 0.60073494,
       0.4231369 , 0.18215838, 0.21687233, 0.35102788, 0.62566397,
       0.71452935, 0.71274636, 0.62493227, 0.44203492, 0.20041

In [15]:
prediction = pred.reshape(pred.shape[0], 1)

In [16]:
prediction.shape

(495, 1)

In [17]:
Pred= s2.inverse_transform(prediction)

In [18]:
Actual= s2.inverse_transform(y_test)

In [19]:
x= Pred
z= Actual

In [20]:
from sklearn.metrics import mean_squared_error
import math

 
MSE = mean_squared_error(z,x)
 
RMSE = math.sqrt(MSE)
from sklearn.metrics import mean_absolute_error
MAE=mean_absolute_error(z,x)
MAE
print("Root Mean Square Error:",RMSE)
print("Mean Square Error:", MSE)
print("Mean Absolute Error:",MAE)


Root Mean Square Error: 2.9839782593294744
Mean Square Error: 8.90412625215096
Mean Absolute Error: 2.2557328120699958


In [21]:
max= x.max()
min=x.min()
NRMSE= (RMSE/(max-min))*100
print("max:",max)
print("min:",min)
print("Normalized Root Mean Square Error:",NRMSE)

max: 830.1807755650299
min: 338.60840560511417
Normalized Root Mean Square Error: 0.6070272541096638


In [22]:
Calculated = pd.DataFrame(Pred, columns = ['Prediction'])

In [23]:
Calculated.to_csv(r'E:\CatBoost_1\literature_comparison\Libya\catboost\sunny hours\preddiction.csv', index = False)

In [24]:
Actual = pd.DataFrame(Actual, columns = ['Actual'])

In [25]:
Actual.to_csv(r'E:\CatBoost_1\literature_comparison\Libya\catboost\sunny hours\Actual.csv', index = False)