In [1]:

import numpy as np # 
import pandas as pd 
import requests, io 
import glob
from pprint import pprint
import matplotlib.pyplot as plt 


# scikit-learn modules
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error 
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.model_selection import RandomizedSearchCV
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'
def Evolution_Metrics(model,x_test,y_test):
    y_pred = model.predict(x_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(color.BOLD)
    print('MAE: ',mae)
    print('MSE: ',mse)
    print('RMSE: ',rmse)
    print('R2: ',r2)
    print(color.END)



# M0000_FAU

**Train Datasets**

In [2]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M0000_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAU'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [3]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M0000_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [4]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [5]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.0019692701676392375
MSE:  2.608539627674345e-05
RMSE:  0.005107386442863263
R2:  0.9822460873770426
[0m


**Randomizied Search Best Parameters**

In [6]:
# Create a RandomForestRegressor with the best parameters
model_3 = RandomForestRegressor(
   n_estimators= 400, 
    min_samples_split=5, 
    min_samples_leaf= 1, 
    max_features="log2", 
    max_depth=30, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)

model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=30, max_features='log2',
                      min_samples_split=5, n_estimators=400, n_jobs=-1,
                      random_state=42)

In [7]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.0016682301246703681
MSE:  1.786738069870059e-05
RMSE:  0.00422698245781794
R2:  0.9878393292415243
[0m


**Optuna Best Parameters**

In [8]:
#Best parameters: {'n_estimators': 1800, 'max_features': 'sqrt', 'max_depth': 120, 'min_samples_split': 5, 'min_samples_leaf': 2, 'bootstrap': False}

In [9]:

model_3 = RandomForestRegressor(
    n_estimators= 1800, 
    min_samples_split=5, 
    min_samples_leaf= 2, 
    max_features="sqrt", 
    max_depth=120, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=120, max_features='sqrt',
                      min_samples_leaf=2, min_samples_split=5,
                      n_estimators=1800, n_jobs=-1, random_state=42)

In [10]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.0017023170709286129
MSE:  1.7812054355918185e-05
RMSE:  0.004220432958348964
R2:  0.9878769847574722
[0m


# M0000_FAL

**Train Datasets**

In [11]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M0000_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAL'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [12]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M0000_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [13]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [14]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.0024314444158959557
MSE:  7.624301802415181e-05
RMSE:  0.008731724802360173
R2:  0.9635959370154766
[0m


**Optuna Best Parameters**

In [15]:
#Best parameters: {'n_estimators': 2000, 'max_features': 'log2', 'max_depth': 450, 'min_samples_split': 2, 'min_samples_leaf': 4, 'bootstrap': False}

In [16]:
model_3 = RandomForestRegressor(
    n_estimators= 2000, 
    min_samples_split=2, 
    min_samples_leaf= 4, 
    max_features="log2", 
    max_depth=450, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=450, max_features='log2',
                      min_samples_leaf=4, n_estimators=2000, n_jobs=-1,
                      random_state=42)

In [17]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.0023376507384050005
MSE:  6.000278011230617e-05
RMSE:  0.007746146145813811
R2:  0.9713502292660692
[0m


# M2503_FAU

**Train Datasets**

In [18]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M2503_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAU'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [19]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M2503_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [20]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [21]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.02007961363842738
MSE:  0.0007937516518977585
RMSE:  0.028173598490390937
R2:  0.9998980855088034
[0m


**Optuna Best Parameters**

In [22]:
#Best parameters: {'n_estimators': 1800, 'max_features': 'sqrt', 'max_depth': 230, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': True}

In [23]:
model_3 = RandomForestRegressor(
    n_estimators= 1800, 
    min_samples_split=2, 
    min_samples_leaf= 1, 
    max_features="sqrt", 
    max_depth=230, 
    bootstrap=True,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(max_depth=230, max_features='sqrt', n_estimators=1800,
                      n_jobs=-1, random_state=42)

In [24]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.01759698350182122
MSE:  0.000609739432631712
RMSE:  0.02469290247483499
R2:  0.9999217119310674
[0m


# M2503_FAL

**Train Datasets**

In [25]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M2503_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAL'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [26]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M2503_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [27]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [28]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.028728945281169264
MSE:  0.0020164132038811185
RMSE:  0.04490448979646822
R2:  0.9998494688693416
[0m


**Optuna Best Parameters**

In [29]:
#Best parameters: {'n_estimators': 1400, 'max_features': 'sqrt', 'max_depth': 340, 'min_samples_split': 2, 'min_samples_leaf': 2, 'bootstrap': False}

In [30]:
model_3 = RandomForestRegressor(
    n_estimators= 1400, 
    min_samples_split=2, 
    min_samples_leaf= 2, 
    max_features="sqrt", 
    max_depth=340, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=340, max_features='sqrt',
                      min_samples_leaf=2, n_estimators=1400, n_jobs=-1,
                      random_state=42)

In [31]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.02570917407155803
MSE:  0.0016620767001564198
RMSE:  0.04076857490956018
R2:  0.999875921123491
[0m


#  M2508_FAU

**Train Datasets**

In [2]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M2508_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAU'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [3]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M2508_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [4]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [5]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.010011885653072556
MSE:  0.006170643619405303
RMSE:  0.07855344435099777
R2:  0.9897059310328041
[0m


**Optuna Best Parameters**

In [6]:
#Best parameters: {'n_estimators': 1200, 'max_features': 'log2', 'max_depth': 230, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': False}

In [16]:
model_3 = RandomForestRegressor(
    n_estimators= 1000, 
    min_samples_split=2, 
    min_samples_leaf= 1, 
    max_features="log2", 
    max_depth=230, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=230, max_features='log2',
                      n_estimators=1000, n_jobs=-1, random_state=42)

In [17]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.01015857674119153
MSE:  0.007305963230256938
RMSE:  0.08547492749489138
R2:  0.9878119538247925
[0m


#  M2508_FAL

**Train Datasets**

In [39]:
df=pd.read_csv(r'E:\Havelsan\Datasets\M2508_train_sensors.csv', engine='c')
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAL'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [40]:
dft=pd.read_csv(r'E:\Havelsan\Datasets\M2508_test_sensors.csv', engine='c')
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [41]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [42]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.01207136862770884
MSE:  0.01745337770959034
RMSE:  0.13211123233696043
R2:  0.938434542534289
[0m


**Optuna Best Parameters**

In [43]:
#Best parameters: {'n_estimators': 1400, 'max_features': 'auto', 'max_depth': 1000, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': True}

In [44]:
model_3 = RandomForestRegressor(
    n_estimators= 1400, 
    min_samples_split=2, 
    min_samples_leaf= 1, 
    max_features="auto", 
    max_depth=1000, 
    bootstrap=True,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(max_depth=1000, n_estimators=1400, n_jobs=-1,
                      random_state=42)

In [45]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.01202899728282533
MSE:  0.02272356845129992
RMSE:  0.1507433860947137
R2:  0.9198443470235016
[0m


#  ALL_FAL

**Train Datasets**

In [2]:
df1=pd.read_csv(r'E:\Havelsan\Datasets\M2508_train_sensors.csv', engine='c')
df2=pd.read_csv(r'E:\Havelsan\Datasets\M2503_train_sensors.csv', engine='c')
df3=pd.read_csv(r'E:\Havelsan\Datasets\M0000_train_sensors.csv', engine='c')
df = pd.concat([df1,df2,df3], axis=0)
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAL'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [3]:
dft1=pd.read_csv(r'E:\Havelsan\Datasets\M2508_test_sensors.csv', engine='c')
dft2=pd.read_csv(r'E:\Havelsan\Datasets\M2503_test_sensors.csv', engine='c')
dft3=pd.read_csv(r'E:\Havelsan\Datasets\M0000_test_sensors.csv', engine='c')
dft = pd.concat([dft1,dft2,dft3], axis=0)
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [4]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [5]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.013976220081759189
MSE:  0.006779769861085962
RMSE:  0.08233935791033327
R2:  0.9993671344555162
[0m


**Optuna Best Parameters**

In [6]:
#Best parameters: {'n_estimators': 800, 'max_features': 'auto', 'max_depth': 1000, 'min_samples_split': 10, 'min_samples_leaf': 6, 'bootstrap': False}

In [7]:
model_3 = RandomForestRegressor(
    n_estimators= 800, 
    min_samples_split=10, 
    min_samples_leaf= 6, 
    max_features="auto", 
    max_depth=1000, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=1000, min_samples_leaf=6,
                      min_samples_split=10, n_estimators=800, n_jobs=-1,
                      random_state=42)

In [9]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.01791308829630532
MSE:  0.010924023244669314
RMSE:  0.10451805224299443
R2:  0.9989802842780294
[0m


#  ALL_FAU

**Train Datasets**

In [10]:
df1=pd.read_csv(r'E:\Havelsan\Datasets\M2508_train_sensors.csv', engine='c')
df2=pd.read_csv(r'E:\Havelsan\Datasets\M2503_train_sensors.csv', engine='c')
df3=pd.read_csv(r'E:\Havelsan\Datasets\M0000_train_sensors.csv', engine='c')
df = pd.concat([df1,df2,df3], axis=0)
SeaTemp='SW20'
WindSpeed='WC0'
Load='FAU'
df=df[df['SeaTemp']==SeaTemp]
df=df[df['WindSpeed']==WindSpeed]
df=df[df['Load']==Load]
result=df
result.dropna(inplace=True)
result.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
df_sample=result[abr_sensor_list]
x_train = df_sample.drop('Z02013', axis = 1) # Features
y_train = df_sample['Z02013']  # Target

**Test Datasets**

In [11]:
dft1=pd.read_csv(r'E:\Havelsan\Datasets\M2508_test_sensors.csv', engine='c')
dft2=pd.read_csv(r'E:\Havelsan\Datasets\M2503_test_sensors.csv', engine='c')
dft3=pd.read_csv(r'E:\Havelsan\Datasets\M0000_test_sensors.csv', engine='c')
dft = pd.concat([dft1,dft2,dft3], axis=0)
dft=dft[dft['SeaTemp']==SeaTemp]
dft=dft[dft['WindSpeed']==WindSpeed]
dft=dft[dft['Load']==Load]
dft.dropna(inplace=True)
dft.reset_index(drop=True,inplace=True)
abr_sensor_list = ['E02005', 'E02006', 'E02056', 'E03760', 'G00027', 'G00108', 'G02011', 'N02015', 'P00023', 'P01005', 'P01302', 'P01303', 'P01600', 'P01602', 'P02055', 'P02065', 'P02066', 'P02071', 'P02072', 'Q02004', 'T00002', 'T01010', 'T01011', 'T01350', 'T01351', 'T01601', 'T01603', 'T02014', 'T02040', 'T02041', 'T02042', 'T02044', 'T04600', 'Z00518', 'Z01970', 'Z02013']
dft=dft[abr_sensor_list]
x_test = dft.drop('Z02013', axis = 1) # Features
y_test = dft['Z02013']  # Target

**Base Model**

In [12]:
# Initializing the Random Forest Regression model with 10 decision trees
model = RandomForestRegressor(n_estimators = 10, random_state = 0)

# Fitting the Random Forest Regression model to the data
model.fit(x_train, y_train) 

RandomForestRegressor(n_estimators=10, random_state=0)

In [13]:
Evolution_Metrics(model,x_test,y_test)

[1m
MAE:  0.010522175017728648
MSE:  0.0021606878282932275
RMSE:  0.04648319941971752
R2:  0.9996876048256604
[0m


**Optuna Best Parameters**

In [14]:
#{'n_estimators': 1000, 'max_features': 'auto', 'max_depth': 780, 'min_samples_split': 14, 'min_samples_leaf': 8, 'bootstrap': True}

In [15]:
model_3 = RandomForestRegressor(
    n_estimators= 1000, 
    min_samples_split=14, 
    min_samples_leaf= 8, 
    max_features="auto", 
    max_depth=True, 
    bootstrap=False,
    random_state=42,  # Seed
    n_jobs=-1  # Use all processors
)
# Train the model
model_3.fit(x_train, y_train)

RandomForestRegressor(bootstrap=False, max_depth=True, min_samples_leaf=8,
                      min_samples_split=14, n_estimators=1000, n_jobs=-1,
                      random_state=42)

In [17]:
Evolution_Metrics(model_3,x_test,y_test)

[1m
MAE:  0.6251985014097601
MSE:  1.117086071954556
RMSE:  1.0569229262129551
R2:  0.838490181862064
[0m


In [20]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "deneme.png")