In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")


In [None]:
df = pd.read_csv('insurance.csv')

In [None]:
df.isnull().sum().sort_values(ascending= False)


age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [None]:
df['sex'] = pd.factorize(df['sex'])[0] + 1
df['region'] = pd.factorize(df['region'])[0] + 1
df['smoker'] = pd.factorize(df['smoker'])[0] + 1
corr = df.corr()
corr['charges'].sort_values(ascending=False)

charges     1.000000
age         0.299008
bmi         0.198341
children    0.067998
sex         0.057292
region      0.006208
smoker     -0.787251
Name: charges, dtype: float64

In [None]:
X = df.drop('charges', axis = 1)
y = df['charges']
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.3, random_state=101)

In [None]:
scaler= StandardScaler()
scaler.fit(X_train)
X_train_scaled= scaler.transform(X_train)
X_test_scaled= scaler.transform(X_test)

# Linear Regression

In [None]:
linear_reg_model= LinearRegression()
linear_reg_model.fit(X_train_scaled, y_train)

In [None]:
y_pred = linear_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_li_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_li_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_li_reg =np.sqrt(MSE_li_reg)
pd.DataFrame([MAE_li_reg, MSE_li_reg, RMSE_li_reg], index=['MAE_li_reg', 'MSE_li_reg', 'RMSE_li_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_li_reg,3990.25
MSE_li_reg,33530130.0
RMSE_li_reg,5790.521


In [None]:
scores = cross_val_score(linear_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.88791389 0.85653048 0.84404195 0.87198372 0.84417492]


# Gradient Boost



In [None]:
Gradient_model = GradientBoostingRegressor()
Gradient_model.fit(X_train_scaled, y_train)

In [None]:
y_pred = Gradient_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_gradient= metrics.mean_absolute_error(y_test, y_pred)
MSE_gradient = metrics.mean_squared_error(y_test, y_pred)
RMSE_gradient =np.sqrt(MSE_gradient)
pd.DataFrame([MAE_gradient, MSE_gradient, RMSE_gradient], index=['MAE_gradient', 'MSE_gradient', 'RMSE_gradient'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_gradient,2526.184
MSE_gradient,21100720.0
RMSE_gradient,4593.552


In [None]:
scores = cross_val_score(Gradient_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.9459288  0.9145374  0.91686777 0.9245836  0.91745967]


# XGBoost Regression

In [None]:
XGB_model =XGBRegressor()
XGB_model.fit(X_train_scaled, y_train);

In [None]:
y_pred = XGB_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_XGB= metrics.mean_absolute_error(y_test, y_pred)
MSE_XGB = metrics.mean_squared_error(y_test, y_pred)
RMSE_XGB =np.sqrt(MSE_XGB)
pd.DataFrame([MAE_XGB, MSE_XGB, RMSE_XGB], index=['MAE_XGB', 'MSE_XGB', 'RMSE_XGB'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_XGB,3142.59
MSE_XGB,29765290.0
RMSE_XGB,5455.757


In [None]:
scores = cross_val_score(XGB_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.91782241 0.88730325 0.90410079 0.90440364 0.88383429]


# Decision Tree Regressor

In [None]:
tree_reg_model =DecisionTreeRegressor()
tree_reg_model.fit(X_train_scaled, y_train);

In [None]:
y_pred = tree_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_tree_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_tree_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_tree_reg =np.sqrt(MSE_tree_reg)
pd.DataFrame([MAE_tree_reg, MSE_tree_reg, RMSE_tree_reg], index=['MAE_tree_reg', 'MSE_tree_reg', 'RMSE_tree_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_tree_reg,3533.16
MSE_tree_reg,50263850.0
RMSE_tree_reg,7089.7


In [None]:
scores = cross_val_score(tree_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.84365571 0.79897329 0.85711102 0.85617752 0.84559314]


In [None]:
r2_score(y_test, tree_reg_model.predict(X_test_scaled))


0.6421920507336811

# Random Forest Regressor

In [None]:
forest_reg_model =RandomForestRegressor()
forest_reg_model.fit(X_train_scaled, y_train);

In [None]:
y_pred = forest_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_forest_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_forest_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_forest_reg =np.sqrt(MSE_forest_reg)
pd.DataFrame([MAE_forest_reg, MSE_forest_reg, RMSE_forest_reg], index=['MAE_forest_reg', 'MSE_forest_reg', 'RMSE_forest_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_forest_reg,2881.751
MSE_forest_reg,25554630.0
RMSE_forest_reg,5055.158


In [None]:
scores = cross_val_score(forest_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.94269486 0.90634129 0.92126266 0.91531059 0.90681894]


In [None]:
r2_score(y_test, forest_reg_model.predict(X_test_scaled))


0.8180869768973499

# SVM Regressor

In [None]:
 from sklearn.svm import SVR


In [None]:
svm_reg_model =SVR()
svm_reg_model.fit(X_train_scaled, y_train);

In [None]:
y_pred = svm_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)
MAE_svm_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_svm_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_svm_reg =np.sqrt(MSE_svm_reg)
pd.DataFrame([MAE_svm_reg, MSE_svm_reg, RMSE_svm_reg], index=['MAE_svm_reg', 'MSE_svm_reg', 'RMSE_svm_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_svm_reg,7850.719
MSE_svm_reg,151400900.0
RMSE_svm_reg,12304.51


In [None]:
scores = cross_val_score(svm_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[nan nan nan nan nan]


In [None]:
r2_score(y_test, svm_reg_model.predict(X_test_scaled))


-0.07776159498069157

In [None]:

svm_reg_model = SVR()
svm_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = svm_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_svm_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_svm_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_svm_reg =np.sqrt(MSE_svm_reg)
pd.DataFrame([MAE_svm_reg, MSE_svm_reg, RMSE_svm_reg], index=['MAE_svm_reg', 'MSE_svm_reg', 'RMSE_svm_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_svm_reg,7850.719
MSE_svm_reg,151400900.0
RMSE_svm_reg,12304.51


In [None]:
scores = cross_val_score(svm_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[nan nan nan nan nan]


In [None]:

import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train_scaled, y_train, epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c9fef608160>

In [None]:
y_pred = model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)



In [None]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.23.0 typeguard-2.13.3


In [None]:
scores = cross_val_score(keras.wrappers.scikit_learn.KerasClassifier(model), X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

AttributeError: module 'keras.api._v2.keras' has no attribute 'wrappers'

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd

mlp_reg_model = MLPRegressor(random_state=42)

mlp_reg_model.fit(X_train_scaled, y_train)

y_pred_mlp = mlp_reg_model.predict(X_test_scaled)

MAE_mlp_reg = mean_absolute_error(y_test, y_pred_mlp)
MSE_mlp_reg = mean_squared_error(y_test, y_pred_mlp)
RMSE_mlp_reg = np.sqrt(MSE_mlp_reg)

metrics_mlp_reg = pd.DataFrame([MAE_mlp_reg, MSE_mlp_reg, RMSE_mlp_reg],
                               index=['MAE_mlp_reg', 'MSE_mlp_reg', 'RMSE_mlp_reg'],
                               columns=['Metrics'])

print(metrics_mlp_reg)

scores_mlp_reg = cross_val_score(mlp_reg_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
print(np.sqrt(-scores_mlp_reg))

r2_mlp_reg = r2_score(y_test, y_pred_mlp)
print("R-squared score:", r2_mlp_reg)


                   Metrics
MAE_mlp_reg   1.255025e+04
MSE_mlp_reg   2.938602e+08
RMSE_mlp_reg  1.714235e+04
[17168.84474787 17041.67077343 19370.50183615 18396.6791381
 17746.7079636 ]
R-squared score: -1.0918717945708512


In [None]:
MAE_mlp_reg = mean_absolute_error(y_test, y_pred_mlp)
MSE_mlp_reg = mean_squared_error(y_test, y_pred_mlp)
RMSE_mlp_reg = np.sqrt(MSE_mlp_reg)

metrics_mlp_reg = pd.DataFrame([MAE_mlp_reg, MSE_mlp_reg, RMSE_mlp_reg],
                               index=['MAE_mlp_reg', 'MSE_mlp_reg', 'RMSE_mlp_reg'],
                               columns=['Metrics'])

print("Evaluation Metrics for MLP Regressor:")
print(metrics_mlp_reg)

scores_mlp_reg = cross_val_score(mlp_reg_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
print("Cross-Validation RMSE Scores:")
print(np.sqrt(-scores_mlp_reg))

r2_mlp_reg = r2_score(y_test, y_pred_mlp)
print("R-squared score:", r2_mlp_reg)

Evaluation Metrics for MLP Regressor:
                   Metrics
MAE_mlp_reg   1.255025e+04
MSE_mlp_reg   2.938602e+08
RMSE_mlp_reg  1.714235e+04
Cross-Validation RMSE Scores:
[17168.84474787 17041.67077343 19370.50183615 18396.6791381
 17746.7079636 ]
R-squared score: -1.0918717945708512


In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd

mlp_reg_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)

mlp_reg_model.fit(X_train_scaled, y_train)

y_pred_mlp = mlp_reg_model.predict(X_test_scaled)

MAE_mlp_reg = mean_absolute_error(y_test, y_pred_mlp)
MSE_mlp_reg = mean_squared_error(y_test, y_pred_mlp)
RMSE_mlp_reg = np.sqrt(MSE_mlp_reg)
r2_mlp_reg = r2_score(y_test, y_pred_mlp)

print("Evaluation Metrics for MLP Regressor:")
print("MAE:", MAE_mlp_reg)
print("MSE:", MSE_mlp_reg)
print("RMSE:", RMSE_mlp_reg)
print("R-squared score:", r2_mlp_reg)

scores_mlp_reg = cross_val_score(mlp_reg_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
print("Cross-Validation RMSE Scores:")
print(np.sqrt(-scores_mlp_reg))


Evaluation Metrics for MLP Regressor:
MAE: 4287.75273695574
MSE: 39925325.836424015
RMSE: 6318.649051531824
R-squared score: 0.7157878053864326
Cross-Validation RMSE Scores:
[6849.48983979 8137.86372953 9408.60437645 7835.25036552 8509.6937565 ]


# CatBoost Regression

In [None]:

!pip install catboost
from catboost import CatBoostRegressor

cat_reg_model = CatBoostRegressor(random_state=42)

cat_reg_model.fit(X_train_scaled, y_train)


Learning rate set to 0.040517
0:	learn: 11841.1472117	total: 929us	remaining: 928ms
1:	learn: 11494.4901114	total: 2ms	remaining: 996ms
2:	learn: 11148.0587171	total: 3.65ms	remaining: 1.21s
3:	learn: 10837.3853223	total: 4.44ms	remaining: 1.11s
4:	learn: 10561.4791540	total: 5.35ms	remaining: 1.06s
5:	learn: 10260.1133264	total: 6.16ms	remaining: 1.02s
6:	learn: 9976.6752823	total: 7.02ms	remaining: 996ms
7:	learn: 9702.2782990	total: 7.92ms	remaining: 982ms
8:	learn: 9439.3669799	total: 8.76ms	remaining: 965ms
9:	learn: 9195.1059498	total: 9.35ms	remaining: 926ms
10:	learn: 8973.7282125	total: 10.6ms	remaining: 950ms
11:	learn: 8734.3641507	total: 11.5ms	remaining: 946ms
12:	learn: 8512.3834736	total: 12.4ms	remaining: 944ms
13:	learn: 8304.7923180	total: 13.3ms	remaining: 936ms
14:	learn: 8096.7513151	total: 14.2ms	remaining: 932ms
15:	learn: 7944.6905297	total: 14.7ms	remaining: 903ms
16:	learn: 7760.7422284	total: 15.6ms	remaining: 904ms
17:	learn: 7586.9047449	total: 16.3ms	remai

<catboost.core.CatBoostRegressor at 0x7edc0636c7c0>

In [None]:
y_pred = cat_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)


In [None]:
MAE_cat_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_cat_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_cat_reg =np.sqrt(MSE_cat_reg)
pd.DataFrame([MAE_cat_reg, MSE_cat_reg, RMSE_cat_reg], index=['MAE_cat_reg', 'MSE_cat_reg', 'RMSE_cat_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_cat_reg,2767.633
MSE_cat_reg,23786850.0
RMSE_cat_reg,4877.176


In [None]:
scores = cross_val_score(cat_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
5:	learn: 10404.9346680	total: 29.6ms	remaining: 4.9s
6:	learn: 10132.3707999	total: 30.6ms	remaining: 4.34s
7:	learn: 9883.5885239	total: 39.7ms	remaining: 4.93s
8:	learn: 9630.2427553	total: 41.2ms	remaining: 4.53s
9:	learn: 9391.5934728	total: 41.7ms	remaining: 4.13s
10:	learn: 9193.8669223	total: 42.5ms	remaining: 3.82s
11:	learn: 8962.0173618	total: 47.1ms	remaining: 3.88s
12:	learn: 8746.6583619	total: 49.1ms	remaining: 3.73s
13:	learn: 8546.7379436	total: 51.8ms	remaining: 3.65s
14:	learn: 8361.8561262	total: 56.8ms	remaining: 3.73s
15:	learn: 8211.3981648	total: 57.4ms	remaining: 3.53s
16:	learn: 8033.8146547	total: 65.1ms	remaining: 3.76s
17:	learn: 7864.4408160	total: 65.9ms	remaining: 3.59s
18:	learn: 7697.0617892	total: 67.2ms	remaining: 3.47s
19:	learn: 7547.9270192	total: 71ms	remaining: 3.48s
20:	learn: 7397.2696078	total: 73.3ms	remaining: 3.42s
21:	learn: 7267.0877085	total: 78.1ms	remaining: 3.47s
22:	le

In [None]:
r2_score(y_test, cat_reg_model.predict(X_test_scaled))

0.8306710953653351

# Huber Regressor

In [None]:

from sklearn.linear_model import HuberRegressor

huber_reg_model = HuberRegressor()
huber_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = huber_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_huber_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_huber_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_huber_reg =np.sqrt(MSE_huber_reg)
pd.DataFrame([MAE_huber_reg, MSE_huber_reg, RMSE_huber_reg], index=['MAE_huber_reg', 'MSE_huber_reg', 'RMSE_huber_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_huber_reg,2953.231
MSE_huber_reg,38661090.0
RMSE_huber_reg,6217.804


In [None]:
scores = cross_val_score(huber_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.84405615 0.8060616  0.80818189 0.82158808 0.77469955]


In [None]:
r2_score(y_test, huber_reg_model.predict(X_test_scaled))

0.7247874130898331

# Light Gradient Boosting Machine

> Add blockquote



In [None]:

import lightgbm as lgb

lgb_reg_model = lgb.LGBMRegressor(random_state=42)

lgb_reg_model.fit(X_train_scaled, y_train)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 321
[LightGBM] [Info] Number of data points in the train set: 936, number of used features: 6
[LightGBM] [Info] Start training from score 13463.722542


In [None]:
y_pred = lgb_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_lgb_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_lgb_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_lgb_reg =np.sqrt(MSE_lgb_reg)
pd.DataFrame([MAE_lgb_reg, MSE_lgb_reg, RMSE_lgb_reg], index=['MAE_lgb_reg', 'MSE_lgb_reg', 'RMSE_lgb_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_lgb_reg,2872.124
MSE_lgb_reg,24176500.0
RMSE_lgb_reg,4916.961


In [None]:
scores = cross_val_score(lgb_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 311
[LightGBM] [Info] Number of data points in the train set: 748, number of used features: 6
[LightGBM] [Info] Start training from score 13668.696329
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 309
[LightGBM] [Info] Number of data points in the train set: 749, number of used features: 6
[LightGBM] [Info] Start training from score 13641.002188
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enoug

In [None]:
r2_score(y_test, lgb_reg_model.predict(X_test_scaled))

0.8278972956552948

# Bayesian Ridge **Regressor**

In [None]:

from sklearn.linear_model import BayesianRidge

bayesian_reg_model = BayesianRidge()

bayesian_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = bayesian_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_bayesian_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_bayesian_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_bayesian_reg =np.sqrt(MSE_bayesian_reg)
pd.DataFrame([MAE_bayesian_reg, MSE_bayesian_reg, RMSE_bayesian_reg], index=['MAE_bayesian_reg', 'MSE_bayesian_reg', 'RMSE_bayesian_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_bayesian_reg,3993.719
MSE_bayesian_reg,33555360.0
RMSE_bayesian_reg,5792.699


In [None]:
scores = cross_val_score(bayesian_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.88788039 0.85654165 0.84399707 0.87194725 0.84428607]


In [None]:
r2_score(y_test, bayesian_reg_model.predict(X_test_scaled))

0.7611329772340062

# Lasso Regression


In [None]:
from sklearn.linear_model import Lasso
lasso_reg_model = Lasso(alpha=0.05, random_state=90)

lasso_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = lasso_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_lasso_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_lasso_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_lasso_reg =np.sqrt(MSE_lasso_reg)
pd.DataFrame([MAE_lasso_reg, MSE_lasso_reg, RMSE_lasso_reg], index=['MAE_lasso_reg', 'MSE_lasso_reg', 'RMSE_lasso_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_lasso_reg,3990.249
MSE_lasso_reg,33530150.0
RMSE_lasso_reg,5790.522


In [None]:
scores = cross_val_score(lasso_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.887914   0.85653092 0.84404188 0.87198371 0.84417531]


In [None]:
r2_score(y_test, lasso_reg_model.predict(X_test_scaled))

0.7613124772957979

# Least Angle Regression

In [None]:
from sklearn.linear_model import Lars
lars_reg_model = Lars(n_nonzero_coefs=1)

lars_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = lars_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_lars_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_lars_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_lars_reg =np.sqrt(MSE_lars_reg)
pd.DataFrame([MAE_lars_reg, MSE_lars_reg, RMSE_lars_reg], index=['MAE_lars_reg', 'MSE_lars_reg', 'RMSE_lars_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_lars_reg,6187.347
MSE_lars_reg,67942530.0
RMSE_lars_reg,8242.726


In [None]:
scores = cross_val_score(lars_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.741348   0.7284067  0.71678763 0.72873124 0.73358299]


In [None]:
r2_score(y_test, lars_reg_model.predict(X_test_scaled))

0.5163446794045099

# AdaBoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor

ada_reg_model =AdaBoostRegressor(random_state=42, learning_rate=0.01)
ada_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = ada_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_ada_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_ada_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_ada_reg =np.sqrt(MSE_ada_reg)
pd.DataFrame([MAE_ada_reg, MSE_ada_reg, RMSE_ada_reg], index=['MAE_ada_reg', 'MSE_ada_reg', 'RMSE_ada_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_ada_reg,3007.376
MSE_ada_reg,22666320.0
RMSE_ada_reg,4760.916


In [None]:
scores = cross_val_score(ada_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.94529202 0.90773528 0.91442819 0.92973771 0.91062266]


In [None]:
r2_score(y_test, ada_reg_model.predict(X_test_scaled))

0.8386476521651605

# Orthogonal Matching Pursuit	Regressor


In [None]:
from sklearn.linear_model import OrthogonalMatchingPursuit
orth_reg_model = OrthogonalMatchingPursuit(n_nonzero_coefs=1)
orth_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = orth_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_orth_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_orth_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_orth_reg =np.sqrt(MSE_orth_reg)
pd.DataFrame([MAE_orth_reg, MSE_orth_reg, RMSE_orth_reg], index=['MAE_orth_reg', 'MSE_orth_reg', 'RMSE_orth_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_orth_reg,5513.935
MSE_orth_reg,54473270.0
RMSE_orth_reg,7380.601


In [None]:
scores = cross_val_score(orth_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.80064687 0.7738914  0.78738299 0.79020527 0.78286924]


In [None]:
r2_score(y_test, orth_reg_model.predict(X_test_scaled))

0.6122268811542722

# Elastic Net Regressor

In [None]:
from sklearn.linear_model import ElasticNet

elastic_reg_model = ElasticNet(alpha=0.05, l1_ratio=0.5, random_state=42)

elastic_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = elastic_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_elastic_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_elastic_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_elastic_reg =np.sqrt(MSE_elastic_reg)
pd.DataFrame([MAE_elastic_reg, MSE_elastic_reg, RMSE_elastic_reg], index=['MAE_elastic_reg', 'MSE_elastic_reg', 'RMSE_elastic_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_elastic_reg,4032.087
MSE_elastic_reg,33858310.0
RMSE_elastic_reg,5818.789


In [None]:
scores = cross_val_score(elastic_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.88740559 0.85640921 0.84339717 0.87143716 0.84493835]


In [None]:
r2_score(y_test, elastic_reg_model.predict(X_test_scaled))

0.7589764531451498

# Ridge Regressor

In [None]:
from sklearn.linear_model import Ridge

ridge_reg_model = Ridge(alpha=0.05, random_state=42)
ridge_reg_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = ridge_reg_model.predict(X_test_scaled)
y_pred = pd.DataFrame(y_pred)

In [None]:
MAE_ridge_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_ridge_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_ridge_reg =np.sqrt(MSE_ridge_reg)
pd.DataFrame([MAE_ridge_reg, MSE_ridge_reg, RMSE_ridge_reg], index=['MAE_ridge_reg', 'MSE_ridge_reg', 'RMSE_ridge_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
MAE_ridge_reg,3990.332
MSE_ridge_reg,33530730.0
RMSE_ridge_reg,5790.572


In [None]:
scores = cross_val_score(ridge_reg_model, X_train_scaled, y_train, cv=5)
print(np.sqrt(scores))

[0.88791321 0.85653082 0.84404088 0.87198293 0.84417776]


In [None]:
r2_score(y_test, ridge_reg_model.predict(X_test_scaled))

0.7613083624094129

# OLC Regression

In [None]:
lm=sm.OLS(y_train,X_train)
model=lm.fit()
model.summary()


NameError: name 'sm' is not defined

# SVR

In [None]:
from sklearn.svm import SVR

svr_reg = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)



In [None]:
svr_reg= metrics.mean_absolute_error(y_test, y_pred)
MSE_svr_reg = metrics.mean_squared_error(y_test, y_pred)
RMSE_svr_reg =np.sqrt(MSE_svr_reg)
pd.DataFrame([svr_reg, MSE_svr_reg, RMSE_svr_reg], index=['svr_reg', 'MSE_svr_reg', 'RMSE_svr_reg'], columns=['Metrics'])

Unnamed: 0,Metrics
svr_reg,3990.332
MSE_svr_reg,33530730.0
RMSE_svr_reg,5790.572


In [None]:
scores = cross_val_score(svr_reg, X_train_scaled, y_train, cv=5)

print(np.sqrt(scores))

TypeError: estimator should be an estimator implementing 'fit' method, 3990.332292888588 was passed

In [None]:
print(type(svr_reg))

<class 'numpy.float64'>


In [None]:
hasattr(svr_reg, 'fit')

False