# Logistic Regression, RandomSearchCV, GridSearchCV

#### Importing libraries

In [1]:
import pandas as pd
import pickle

#### Loading Dataset

In [2]:
df = pd.read_csv('heart_disease.csv')
df.head(2)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1


#### Data Cleaning

In [3]:
df['target'].value_counts()

target
1    165
0    138
Name: count, dtype: int64

In [4]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

#### Train-test Split

In [5]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [6]:
x = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach','exang', 'oldpeak', 'slope', 'ca', 'thal']]
y = df[['target']]

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

#### Scaler

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [10]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler,file)

# Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()
log_model.fit(x_train, y_train)
y_test_pred = log_model.predict(x_test)

  y = column_or_1d(y, warn=True)


In [12]:
with open('log_model.pkl', 'wb') as file:
    pickle.dump(log_model,file)

#### Performance Matrics

In [13]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy = accuracy_score(y_test_pred, y_test)
conf_matrix = confusion_matrix(y_test_pred, y_test)
print("Accuracy of Logistic Regression is:", accuracy)
print("Confusion matrix:")
print(conf_matrix)

Accuracy of Logistic Regression is: 0.8524590163934426
Confusion matrix:
[[25  5]
 [ 4 27]]


# GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
param = {
    'penalty':['l1', 'l2', 'elasticnet'],
    'C':[0.001,0.01,0.1,1,10,100],
    'solver':['newton-cg', 'saga', 'sag']
}
grid_model = GridSearchCV(estimator=log_model, param_grid=param, cv = 5)

In [None]:
grid_model.fit(x_train, y_train)

In [None]:
grid_model.get_params

In [None]:
grid_model.best_params_

In [None]:
grid_model.best_score_

# RandomSearchCV

In [None]:
from sklearn.model_selection import RandomizedSearchCV
param = {
    'penalty':['l1', 'l2', 'elasticnet'],
    'C':[0.001,0.01,0.1,1,10,100],
    'solver':['saga', 'sag', 'newton-cg']
}
rand_model = RandomizedSearchCV(estimator=log_model, param_distributions=param, cv = 5)

In [None]:
rand_model.fit(x_train, x_test)

In [None]:
rand_model.best_params_

In [None]:
rand_model.best_score_

#### Prediction with New Data

In [14]:
age = 0
sex = 63
cp = 1
trestbps = 3
chol = 145
fbs = 233
restecg = 1
thalach = 0
exang = 150
oldpeak = 0
slope = 2.3
ca = 0
thal = 0

In [15]:
new_df = pd.DataFrame({'age':[age], 'sex':[sex], 'cp':[cp], 'trestbps':[trestbps], 'chol':[chol], 'fbs':[fbs], 'restecg':[restecg], 
              'thalach':[thalach],'exang':[exang], 'oldpeak':[oldpeak], 'slope':[slope], 'ca':[ca], 'thal':[thal]})
new_df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,0,63,1,3,145,233,1,0,150,0,2.3,0,0


In [16]:
new_df = scaler.transform(new_df)

In [17]:
predicted_result = log_model.predict(new_df)
predicted_result[0]

np.int64(0)

In [18]:
if predicted_result[0]==0:
    print("There is no Heart Problem")
elif predicted_result[0]==1:
    print("There is Heart Problem")
else:
    print("Error")

There is no Heart Problem
