# **Gradient Desendent**

In [79]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline,make_pipeline
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor

In [80]:
df = pd.read_csv('insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [81]:
df.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [82]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='charges'),df['charges'],test_size=0.2)

In [83]:
numerical_columns = [0,2,3]
cat_nominal = [5]
cat_ordinal = [1,4]

In [84]:
impute_num = Pipeline(steps=[
    ('impute_num',SimpleImputer(strategy='mean')),
    ('scale_num', MinMaxScaler())
])

In [85]:
encode_nominal = Pipeline(steps=[
    ('encode_nominal',OneHotEncoder(drop='first', handle_unknown='ignore')),
    ('impute_nominal',SimpleImputer(strategy='most_frequent'))
])

In [86]:
encode_ordinal = Pipeline(steps=[
    ('encode_ordinal',OrdinalEncoder()),
    ('impute_ordinal',SimpleImputer(strategy='most_frequent'))
])

In [87]:
preprocessing = ColumnTransformer(transformers=[
    ('handle_numerical',impute_num,numerical_columns),
    ('handle_nominal',encode_nominal,cat_nominal),
    ('handle_ordinal',encode_ordinal,cat_ordinal)
],remainder='passthrough')

In [88]:
model = LinearRegression()

In [89]:
pipe = make_pipeline(preprocessing,model)

In [90]:
pipe.fit(X_train,y_train)

In [91]:
ypred = pipe.predict(X_test)

In [92]:
accuracy_score = r2_score(y_test,ypred)

In [93]:
modelGD = SGDRegressor()

In [94]:
pipee = make_pipeline(preprocessing, modelGD)

In [95]:
pipee.fit(X_train, y_train)

In [96]:
y_pred_GD = pipee.predict(X_test)

In [97]:
accuracy_score_GD = r2_score(y_test,y_pred_GD)

In [98]:
print('R2 Square with Linear Regression ', accuracy_score)
print('R2 Square with Gradient Descendent ', accuracy_score_GD)

print()
if accuracy_score > accuracy_score_GD:
    print('Which one is performing better in this dataset? Linear Regression')
else:
    print('Which one is performing better in this dataset? Gradient Descendent')

R2 Square with Linear Regression  0.7139534989581794
R2 Square with Gradient Descendent  0.7134146025249959

Which one is performing better in this dataset? Linear Regression
