In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


In [2]:
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import AdaBoostRegressor

In [3]:
df = pd.read_csv("df.csv")
df.head()

Unnamed: 0,power_kw,wind_speed_ms,theoretical_power_kw,wind_direction_deg,year,month,day,hour
0,380.047791,5.311336,416.328908,259.994904,2018,1,1,0
1,453.769196,5.672167,519.917511,268.641113,2018,1,1,0
2,306.376587,5.216037,390.900016,272.564789,2018,1,1,0
3,419.645905,5.659674,516.127569,271.258087,2018,1,1,0
4,380.650696,5.577941,491.702972,265.674286,2018,1,1,0


In [4]:
features = ['power_kw', 'wind_speed_ms', 'wind_direction_deg', 'month', 'day', 'hour'] 

training_data = df[features]
X = training_data.drop(columns=['power_kw'], axis = 1).values
Y= training_data['power_kw'].values

x_train , x_test , y_train , y_test = train_test_split(X, Y, test_size=0.20, random_state=1234)

print("Dimensions of train set:")
print(f"X_train: {x_train.shape}, Y_train: {y_train.shape}")
print("Dimensions of test set:")
print(f"X_test: {x_test.shape}, Y_test: {y_test.shape}")

Dimensions of train set:
X_train: (40424, 5), Y_train: (40424,)
Dimensions of test set:
X_test: (10106, 5), Y_test: (10106,)


In [5]:
import pickle

with open('etree_tuned.pickle', 'rb') as f:
    model = pickle.load(f)

y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)
y_total_prediction = model.predict(X)

print("\t\tError Table")
print('Mean Absolute Error      : ', metrics.mean_absolute_error(y_test, y_pred_test))
print('Mean Squared  Error      : ', metrics.mean_squared_error(y_test, y_pred_test))
print('Root Mean Squared  Error : ', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test)))
print('Accuracy on Training set   : ', metrics.r2_score(y_train,y_pred_train))
print('Accuracy on Testing set  : ', metrics.r2_score(y_test,y_pred_test))

		Error Table
Mean Absolute Error      :  51.646818753259495
Mean Squared  Error      :  17258.780104670965
Root Mean Squared  Error :  131.37267640065406
Accuracy on Training set   :  0.9999077946091646
Accuracy on Testing set  :  0.9900887605692937


In [6]:
print('Accuracy on Testing set  : ', metrics.r2_score(Y,y_total_prediction))
print('Mean Absolute Error      : ', metrics.mean_absolute_error(Y,y_total_prediction))

Accuracy on Testing set  :  0.9979225334845138
Mean Absolute Error      :  14.813175152006036


INFERENCE WITH COMPRESSED PICKLE

In [9]:
import bz2file as bz2
import pickle
import numpy as np
from sklearn.ensemble import RandomForestRegressor

In [10]:
# Define a function to load the model from a compressed pickle file
def decompress_pickle(file):
    data = bz2.BZ2File(file, 'rb')
    data = pickle.load(data)
    return data

model2 = decompress_pickle('best_rf.pbz2')

y_pred_test = model2.predict(x_test)
y_pred_train = model2.predict(x_train)
y_total_prediction = model2.predict(X)

print("\t\tError Table")
print('Mean Absolute Error      : ', metrics.mean_absolute_error(y_test, y_pred_test))
print('Mean Squared  Error      : ', metrics.mean_squared_error(y_test, y_pred_test))
print('Root Mean Squared  Error : ', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test)))
print('Accuracy on Training set   : ', metrics.r2_score(y_train,y_pred_train))
print('Accuracy on Testing set  : ', metrics.r2_score(y_test,y_pred_test))

		Error Table
Mean Absolute Error      :  59.34199220281618
Mean Squared  Error      :  22640.90121970747
Root Mean Squared  Error :  150.4689377237291
Accuracy on Training set   :  0.9977891265399955
Accuracy on Testing set  :  0.9869979574712376
