In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer

In [None]:
breast_cancer = load_breast_cancer()

df = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
df['target'] = breast_cancer.target
df.head()

In [None]:
#Independent feature
X = df.iloc[:,:-1]
X.head()

In [None]:
#Dependent feature
Y = df.iloc[:,-1]
Y.head()

In [None]:
#Checking is the data is imbalanced
df['target'].value_counts()

In [None]:
#Train test split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,train_size=0.33,random_state=42)

In [None]:
params = [{'C':[1,5,10]},{'max_iter':[100,150]}]

In [None]:
model1 = LogisticRegression(C=100,max_iter=100)

In [None]:
"""GridSearchCV is a technique to search through the best parameter values from the given set of the grid of parameters.
It is basically a cross-validation method. The model and the parameters are required to be fed in.
Best parameter values are extracted and then the predictions are made."""
from sklearn.model_selection import GridSearchCV
model = GridSearchCV(model1,param_grid=params,scoring='f1',cv=5)
model.fit(X_train,Y_train)

In [None]:
print(model.best_params_)
print(model.best_score_)

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
#Confusion matrix
confusion_matrix(Y_test,y_pred)

In [None]:
#Accuracy score
accuracy_score(Y_test,y_pred)

In [None]:
#Classification report
print(classification_report(Y_test,y_pred))