#LinearRegression with GridSearchCV

##Dataset California **Housing**

###Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_california_housing #oad the California Housing dataset.
from sklearn.linear_model import LinearRegression, Ridge  #Ridge is a regularized linear model that can help prevent overfitting.
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error

###Loading the *dataset*

In [None]:
#This loads the California Housing dataset

data = fetch_california_housing()
x,y=data.data,data.target

###Splitting the Data inti Train Test Sets

In [None]:
#divide the dataset into two subsets: a training set and a testing set

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

###Define Parameter Grid for Ridge Regression

In [None]:
#we define the potential values for a hyperparameter that we want to optimize
params={'alpha':[0.1,1.0,10.0,100.0]}

###Setup GridSearchCV

In [None]:

grid = GridSearchCV(Ridge(),params,cv=5,scoring='neg_mean_squared_error')
grid.fit(x_train,y_train) # Trains the GridSearchCV object on the training data

###Print the Hyperparameters

In [None]:
print('Best Hyperparameters:',grid.best_params_)


Best Hyperparameters: {'alpha': 10.0}


###Train Final Model With Best Alpha

In [None]:
best_alpha=grid.best_params_['alpha'] # Retrieves the value of the best hyperparameter
final_model=Ridge(alpha=best_alpha) #New Ridge regression model instance named final_model.
final_model.fit(x_train,y_train)

###Evaluate Model on Test Data

In [None]:
#evaluates the performance of two different models on the test dataset.

model=LinearRegression()  #for predicting continuous values.
model.fit(x_train,y_train)
y_pred=model.predict(x_test)
mse=mean_squared_error(y_test,y_pred)
print('Mean Squared Error Before Tuning:',mse)

y_pred_final=final_model.predict(x_test)
mse=mean_squared_error(y_test,y_pred)
print('Mean Squared Error After Tuning:',mse)

Mean Squared Error Before Tuning: 0.5558915986952422
Mean Squared Error After Tuning: 0.5558915986952422


##LogisticRegression with GridSearchCV

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

###Load the iris Dataset and make it yes/no question(setosa or not setosa)

In [None]:
# Load the Iris dataset and preparing it for a binary classification task.
data=load_iris()
x,y=data.data,(data.target==0).astype(int)

###Splitting dataset into train and test sets

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

###Setup the Model

In [None]:
model1=LogisticRegression(solver='liblinear')

###List parameters

In [None]:
params1 ={
    'C':[0.001,0.01,0.1,1,10,100,1000],
    'penalty':['l1','l2']
}


###Setup GridSearchCV

In [None]:
grid=GridSearchCV(model1,params1,cv=5,scoring='accuracy')

###Train the Model

In [None]:
grid.fit(x_train,y_train)

###Best  Setting parameters

In [None]:
print("Best Setting:",grid.best_params_)

Best Setting: {'C': 0.1, 'penalty': 'l1'}


###Evaluating the Model before and afteer tuning

In [None]:
#Evaluating the Model before and afteer tuning
model2=LogisticRegression()
model2.fit(x_train,y_train)
y_pred=model2.predict(x_test)
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy Before Tuning:",accuracy)

y_pred_final=grid.predict(x_test)
accuracy=accuracy_score(y_test,y_pred_final)
print("Accuracy After Tuning:",accuracy)

Accuracy Before Tuning: 1.0
Accuracy After Tuning: 1.0
