In [1]:
# Import necessary libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

In [3]:
data = pd.read_excel('Dataset.xlsx')

In [4]:
data.head()

Unnamed: 0,Blades,RPM,Diameter,Thrust,Ship_Speed,Hub_Diameter,Efficiency
0,3,200,2.0,25000,5,0.4,0.012197
1,3,250,2.0,25000,5,0.4,0.013144
2,3,300,2.0,25000,5,0.4,0.01419
3,3,350,2.0,25000,5,0.4,0.015349
4,3,400,2.0,25000,5,0.4,0.016637


In [5]:
#Store the data in the form of dependent and independent variables separately
X = data.iloc[:, :-1].values 
y = data.iloc[:, -1:].values

In [6]:
#Split the Dataset into Training and Test Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21111404)

In [7]:
#Create a Random Forest regressor object from Random Forest Regressor class
RFReg = RandomForestRegressor(n_estimators = 5, random_state = 21111404, criterion = "mae")
  
#Fit the random forest regressor with training data represented by X_train and y_train
RFReg.fit(X_train, y_train.ravel())

RandomForestRegressor(criterion='mae', n_estimators=5, random_state=21111404)

In [8]:
#Predicted Height from test dataset w.r.t Random Forest Regression
y_predict_rfr = RFReg.predict((X_test))

#Model Evaluation using R-Square for Random Forest Regression
r_square = metrics.r2_score(y_test, y_predict_rfr)
print('R-Square Error associated with Random Forest Regression is:', r_square)

R-Square Error associated with Random Forest Regression is: 0.9782550305533042


In [9]:
idx = (-y_predict_rfr).argsort()[:3]

In [10]:
idx

array([742, 228, 385], dtype=int64)

In [11]:
for i in range(len(idx)):
    print(X_test[idx[i]])

[4.0e+00 2.0e+03 2.0e+00 2.5e+04 5.0e+00 4.0e-01]
[5.0e+00 2.0e+03 2.0e+00 2.5e+04 5.0e+00 4.0e-01]
[4.00e+00 1.95e+03 2.00e+00 2.50e+04 5.00e+00 4.00e-01]


In [12]:
from sklearn.linear_model import LinearRegression
MLR = LinearRegression()
MLR.fit(X_train, y_train)

LinearRegression()

In [13]:
#Print the constant and coeffcients
print(f'constant = {MLR.intercept_}')
print(f'coefficients = {MLR.coef_}')

constant = [0.05286604]
coefficients = [[-3.86388186e-03  4.99410639e-05 -1.15444769e-02  0.00000000e+00
   0.00000000e+00  2.51432406e-30]]


In [14]:
# Predicting the Test set results
y_predict = MLR.predict(X_test)

In [15]:
# Display actual and predicted values side by side
df = pd.DataFrame(data=y_test, columns=['y_test'])
df['y_predict'] = y_predict
df

Unnamed: 0,y_test,y_predict
0,0.000003,-0.064419
1,0.000014,-0.031930
2,0.000023,-0.016007
3,0.000027,-0.014830
4,0.002370,0.045579
...,...,...
750,0.000117,0.008422
751,0.000517,0.009254
752,0.004145,0.047623
753,0.002771,0.020798


In [16]:
#Model Evaluation using R-Square
r_square = metrics.r2_score(y_test, y_predict)
print('R-Square Error:', r_square)

R-Square Error: 0.21111618738313676


In [17]:
data = pd.read_excel('Dataset.xlsx') 

X = data.iloc[:, 0:-1].values 
y = data.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

  
#Create a Random Forest regressor object from Random Forest Regressor class
RFReg = RandomForestRegressor(criterion='mse',n_estimators = 4, random_state = 21111003)
  
#Fit the random forest regressor with training data represented by X_train and y_train
RFReg.fit(X_train, y_train)

y_predict_rfr = RFReg.predict((X_test))

from sklearn import metrics
r_square = metrics.r2_score(y_test, y_predict_rfr)
print('R-Square Error:', r_square)

R-Square Error: 0.9865456311119819


In [18]:
eff = (max(y_predict_rfr))

In [19]:
param = (X_test[np.argmax(y_predict_rfr)])

In [20]:
print("Maximum Efficiency in Test Dataset =",eff)

Maximum Efficiency in Test Dataset = 0.7173886386330197


In [21]:
print("Optimal Number of Blades in Test Dataset =",param[0])
print("Optimal Rotations Per Minute of Propeller in Test Dataset =",param[1])
print("Optimal Propeller Diameter in Test Dataset =",param[2],"metres")

Optimal Number of Blades in Test Dataset = 4.0
Optimal Rotations Per Minute of Propeller in Test Dataset = 2000.0
Optimal Propeller Diameter in Test Dataset = 2.0 metres


In [22]:
#Polynomial linear regression model
from sklearn.linear_model import LinearRegression
LinReg = LinearRegression()
LinReg.fit(X_train, y_train)

from sklearn.preprocessing import PolynomialFeatures 
  
polynom = PolynomialFeatures(degree = 2) 
X_polynom = polynom.fit_transform(X_train) 

PolyReg = LinearRegression() 
PolyReg.fit(X_polynom, y_train)

y_predict_pr = PolyReg.predict(polynom.fit_transform(X_test))

r_square = metrics.r2_score(y_test, y_predict_pr)
print('R-Square Error:', r_square)

R-Square Error: 0.5755558907640888


In [23]:
#Multiple linear regression model
from sklearn.linear_model import LinearRegression
MLR = LinearRegression()
MLR.fit(X_train, y_train)

y_predict = MLR.predict(X_test)
r_square = metrics.r2_score(y_test, y_predict)
print('R-Square Error:', r_square)

R-Square Error: 0.23789679893866778
