In [3]:
import json
import pandas as pd
import numpy as np
import re
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.linear_model import LinearRegression

In [4]:
with open('file.json') as f:
    data = json.load(f)

In [5]:
target = data['design_state_data']['target']
models = data['design_state_data']['algorithms']

In [6]:
target

{'prediction_type': 'Regression',
 'target': 'petal_width',
 'type': 'regression',
 'partitioning': True}

In [7]:
models 

{'RandomForestClassifier': {'model_name': 'Random Forest Classifier',
  'is_selected': False,
  'min_trees': 10,
  'max_trees': 30,
  'feature_sampling_statergy': 'Default',
  'min_depth': 20,
  'max_depth': 30,
  'min_samples_per_leaf_min_value': 5,
  'min_samples_per_leaf_max_value': 50,
  'parallelism': 0},
 'RandomForestRegressor': {'model_name': 'Random Forest Regressor',
  'is_selected': True,
  'min_trees': 10,
  'max_trees': 20,
  'feature_sampling_statergy': 'Default',
  'min_depth': 20,
  'max_depth': 25,
  'min_samples_per_leaf_min_value': 5,
  'min_samples_per_leaf_max_value': 10,
  'parallelism': 0},
 'GBTClassifier': {'model_name': 'Gradient Boosted Trees',
  'is_selected': False,
  'num_of_BoostingStages': [67, 89],
  'feature_sampling_statergy': 'Fixed number',
  'learningRate': [],
  'use_deviance': True,
  'use_exponential': False,
  'fixed_number': 22,
  'min_subsample': 1,
  'max_subsample': 2,
  'min_stepsize': 0.1,
  'max_stepsize': 0.5,
  'min_iter': 20,
  'max_i

In [9]:
# Load the dataset

In [8]:
iris = pd.read_csv('iris.csv')

In [9]:
iris.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [10]:
iris['species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: species, dtype: int64

In [11]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [None]:
# checking out if any values of petal length or sepal width are null

In [12]:
(iris['sepal_length'] == 0).sum()

0

In [13]:
(iris['petal_width'] == 0).sum()

0

In [14]:
(iris['sepal_width'] == 0).sum()

0

In [17]:
# Training the Model
# One Hot Encoding on Dataset

In [15]:
iris = pd.get_dummies(iris, columns = ['species'], drop_first = True)

In [16]:
X = iris.drop(columns = ['petal_width'], axis = 1)
Y = iris.petal_width

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.25, random_state = 2)

In [20]:
from sklearn.ensemble import RandomForestRegressor
ranfor = RandomForestRegressor(n_estimators = 20 ,max_depth =40, min_samples_leaf = 10)
ranfor.fit(X_train, y_train)
print('Random Forest Regression Training set Accuracy : ', ranfor.score(X_train, y_train))

Random Forest Regression Training set Accuracy :  0.9555296424595066


In [21]:
pred = ranfor.predict(X_test)
mse = mean_squared_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared = False)
mae = mean_absolute_error(y_test, pred)
print(f"Mean Squared Error for Random_Forest_Regressor : {mse}")
print(f"Root Mean Squared Error for Random_ Forest_Regressor: {rmse}")
print(f"Mean Absolute Error for Random_Forest_Regressor: {mae}")

Mean Squared Error for Random_Forest_Regressor : 0.028663415699880224
Root Mean Squared Error for Random_ Forest_Regressor: 0.16930273388188458
Mean Absolute Error for Random_Forest_Regressor: 0.12982821673573505


In [22]:
# Using Gradient Boost 
from sklearn.ensemble import GradientBoostingRegressor
gbtr = GradientBoostingRegressor(n_estimators = 89, max_depth=7)
gbtr.fit(X_train, y_train)
print('Gradient Boosting Regressor Training Set Accuracy :', gbtr.score(X_train, y_train))

Gradient Boosting Regressor Training Set Accuracy : 0.9999211129448211


In [23]:
pred = gbtr.predict(X_test)
mse = mean_squared_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared = False)
mae = mean_absolute_error(y_test, pred)
print(f"Mean Squared Error for Gradient_Boosting_Regressor : {mse}")
print(f"Root Mean Squared Error for Gradient_Boosting_Regressor: {rmse}")
print(f"Mean Absolute Error for Gradient_Boosting_Regressor: {mae}")

Mean Squared Error for Gradient_Boosting_Regressor : 0.05161986462356195
Root Mean Squared Error for Gradient_Boosting_Regressor: 0.22720005418917036
Mean Absolute Error for Gradient_Boosting_Regressor: 0.16139841544468794


In [22]:
# Using Ridge Regression
from sklearn.linear_model import Ridge
rdr = Ridge(max_iter = 50)
rdr.fit(X_train, y_train)
print('Ridge Regression training Accuracy:', rdr.score(X_train, y_train))

Ridge Regression training Accuracy: 0.9481347763745945


In [23]:
pred = rdr.predict(X_test)
mse = mean_squared_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared = False)
mae = mean_absolute_error(y_test, pred)
print(f"Mean Squared Error for Ridge_Regressor : {mse}")
print(f"Root Mean Squared Error for Ridge_Regressor: {rmse}")
print(f"Mean Absolute Error for Ridge_Regressor: {mae}")

Mean Squared Error for Ridge_Regressor : 0.028977679660194447
Root Mean Squared Error for Ridge_Regressor: 0.17022831627022117
Mean Absolute Error for Ridge_Regressor: 0.12603910246238026


In [24]:
# Using Lasso Regression
from sklearn.linear_model import Lasso
lsr = Lasso(max_iter = 50)
lsr.fit(X_train, y_train)
print('Lasso Regression Training Accuracy:', lsr.score(X_train, y_train))

Lasso Regression Training Accuracy: 0.3347690867815504


In [25]:
pred = lsr.predict(X_test)
mse = mean_squared_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared = False)
mae = mean_absolute_error(y_test, pred)
print(f"Mean Squared Error for Lasso_Regressor : {mse}")
print(f"Root Mean Squared Error for Lasso_Regressor: {rmse}")
print(f"Mean Absolute Error for Lasso_Regressor: {mae}")

Mean Squared Error for Lasso_Regressor : 0.40209874248376387
Root Mean Squared Error for Lasso_Regressor: 0.6341125629442803
Mean Absolute Error for Lasso_Regressor: 0.5489472957115965


In [26]:
# Using Elastic Net Regression
from sklearn.linear_model import ElasticNet 
enr = ElasticNet(max_iter = 50)
enr.fit(X_train, y_train)
print('Elastic Net Regression Training Accuracy:', enr.score(X_train, y_train))

Elastic Net Regression Training Accuracy: 0.7049468277182213


In [27]:
pred = enr.predict(X_test)
mse = mean_squared_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared = False)
mae = mean_absolute_error(y_test, pred)
print(f"Mean Squared Error for ElasticNet_Regressor : {mse}")
print(f"Root Mean Squared Error for ElasticNet_Regressor: {rmse}")
print(f"Mean Absolute Error for ELasticNet_Regressor: {mae}")


Mean Squared Error for ElasticNet_Regressor : 0.16845726715576168
Root Mean Squared Error for ElasticNet_Regressor: 0.4104354604024385
Mean Absolute Error for ELasticNet_Regressor: 0.34328015138135004


In [28]:
# Using XGBoost Regression
import xgboost as xg
xgb = xg.XGBRegressor()
xgb.fit(X_train, y_train)
print('XGBoost Regression Training Accuracy:', xgb.score(X_train, y_train))

XGBoost Regression Training Accuracy: 0.9999047540693478


In [29]:
pred = xgb.predict(X_test)
mse = mean_squared_error(y_test,pred)
rmse = mean_squared_error(y_test,pred,squared=False)
mae = mean_absolute_error(y_test,pred)
print(f"Mean Squared Error for Xgboost_Regression :- {mse}")
print(f"Root Mean Squared Error Xgboost_Regression :- {rmse}")
print(f"Mean Absolute Error for Xgboost_Regression :-{mae}")

Mean Squared Error for Xgboost_Regression :- 0.049315047100074005
Root Mean Squared Error Xgboost_Regression :- 0.22206991489185113
Mean Absolute Error for Xgboost_Regression :-0.16168331703857372


In [31]:
# Using Decision Tree 
from sklearn.tree import DecisionTreeRegressor
tree = DecisionTreeRegressor(min_samples_leaf = 12)
tree.fit(X_train, y_train)
print('Decision Tree Regression Training Accuracy:', tree.score(X_train, y_train))

Decision Tree Regression Training Accuracy: 0.9491586412992195


In [32]:
pred = tree.predict(X_test)
mse =mean_squared_error(y_test,pred)
rmse = mean_squared_error(y_test,pred,squared=False)
mae = mean_absolute_error(y_test,pred)
print(f"Mean Squared Error for DecisionTree_Regression :- {mse}")
print(f"Root Mean Squared Error DecisionTree_Regression :- {rmse}")
print(f"Mean Absolute Error for DecisionTree_Regression :-{mae}")

Mean Squared Error for DecisionTree_Regression :- 0.03445087883001875
Root Mean Squared Error DecisionTree_Regression :- 0.18560947936465624
Mean Absolute Error for DecisionTree_Regression :-0.13697315648747538


In [30]:
# using SVM Regression
from sklearn.svm import SVR 
svr = SVR(kernel ='rbf', gamma = 7, max_iter=7)
svr.fit(X_train, y_train)
print('SVM Regression Training Accuracy:', svr.score(X_train, y_train))

SVM Regression Training Accuracy: 0.8548465776588885




In [31]:
pred = svr.predict(X_test)
mse =mean_squared_error(y_test,pred)
rmse = mean_squared_error(y_test,pred,squared=False)
mae = mean_absolute_error(y_test,pred)
print(f"Mean Squared Error for SVR_Regression :- {mse}")
print(f"Root Mean Squared Error SVR_Regression :- {rmse}")
print(f"Mean Absolute Error for SVR_Regression :-{mae}")


Mean Squared Error for SVR_Regression :- 0.1729406507953265
Root Mean Squared Error SVR_Regression :- 0.4158613360187822
Mean Absolute Error for SVR_Regression :-0.31810623277315275


In [32]:
#Using Linear Regression
from sklearn.linear_model import LinearRegression
lregr = LinearRegression()
  
lregr.fit(X_train, y_train)
print(lregr.score(X_test, y_test))


0.95122985883765


In [33]:
pred = lregr.predict(X_test)
mse = mean_squared_error(y_test,pred)
rmse = mean_squared_error(y_test,pred,squared=False)
mae = mean_absolute_error(y_test,pred)
print(f"Mean Squared Error for Linear_Regression :-  {mse}")
print(f"Root Mean Squared Error Linear_Regression :- {rmse}")
print(f"Mean Absolute Error for Linear_Regression :- {mae}")

Mean Squared Error for Linear_Regression :-  0.029069233238935586
Root Mean Squared Error Linear_Regression :- 0.1704970182699263
Mean Absolute Error for Linear_Regression :- 0.1263536659671673
