In [20]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df['target'] = diabetes.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:,:-1].values, df.target.values, test_size=0.1, random_state=2023
)

In [9]:
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, pred_lr)
mse_lr = mean_squared_error(y_test, pred_lr)

In [10]:
from sklearn.linear_model import Ridge
ridge = Ridge(random_state=2023)
ridge.fit(X_train, y_train)
pred_rg = ridge.predict(X_test)
r2_rg = r2_score(y_test, pred_rg)
mse_rg = mean_squared_error(y_test, pred_rg)

In [11]:
from sklearn.linear_model import Lasso
lasso = Lasso(random_state=2023)
lasso.fit(X_train, y_train)
pred_ls = lasso.predict(X_test)
r2_ls = r2_score(y_test, pred_ls)
mse_ls = mean_squared_error(y_test, pred_ls)

In [12]:
from sklearn.svm import SVR
svr = SVR()
svr.fit(X_train, y_train)
pred_sv = svr.predict(X_test)
r2_sv = r2_score(y_test, pred_sv)
mse_sv = mean_squared_error(y_test, pred_sv)

In [13]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=2023)
rfr.fit(X_train, y_train)
pred_rf = rfr.predict(X_test)
r2_rf = r2_score(y_test, pred_rf)
mse_rf = mean_squared_error(y_test, pred_rf)

In [14]:
from xgboost import XGBRegressor
xgr = XGBRegressor()
xgr.fit(X_train, y_train)
pred_xg = xgr.predict(X_test)
r2_xg = r2_score(y_test, pred_xg)
mse_xg = mean_squared_error(y_test, pred_xg)

In [16]:
print('\t\tLR\tRidge\tLasso\tSV\tRF\tXG')
print(f'R squared\t{r2_lr:.4f}\t{r2_rg:.4f}\t{r2_ls:.4f}\t{r2_sv:.4f}\t{r2_rf:.4f}\t{r2_xg:.4f}')
print(f'Mean squared E\t{mse_lr:.2f}\t{mse_rg:.2f}\t{mse_ls:.2f}\t{mse_sv:.2f}\t{mse_rf:.2f}\t{mse_xg:.2f}')

		LR	Ridge	Lasso	SV	RF	XG
R squared	0.5474	0.4885	0.3414	0.1736	0.4374	0.3298
Mean squared E	2934.28	3316.07	4269.57	5357.84	3647.41	4345.29


In [17]:
df = pd.DataFrame({
    'y':y_test, 'LR':pred_lr, 'Ridge':pred_rg, 'Lasso':pred_ls, 'SV':pred_sv, 'RF':pred_rf, 'XG':pred_xg
})
df.head(10)

Unnamed: 0,y,LR,Ridge,Lasso,SV,RF,XG
0,265.0,164.438865,160.151435,163.228322,144.190589,196.66,240.086166
1,261.0,230.141881,202.152287,185.709959,157.247371,257.69,294.357269
2,160.0,112.861523,130.893089,136.437283,126.542081,82.91,69.451904
3,249.0,205.771219,193.404176,172.572808,155.329271,162.19,135.287415
4,102.0,105.694296,130.014978,125.594654,129.916467,89.69,63.781631
5,200.0,181.051091,168.462653,171.165806,144.85424,164.64,148.613083
6,42.0,82.067894,104.536203,109.488925,119.731117,96.46,97.405067
7,55.0,77.253222,94.52976,111.518618,129.977129,141.39,97.169762
8,178.0,128.891822,131.573181,138.565391,131.056091,145.14,157.598083
9,317.0,226.235038,208.643264,182.521455,150.626285,193.59,207.042252
