<a href="https://colab.research.google.com/github/fafa20252025/fariba/blob/main/Copy_of_logesticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [131]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# **Reading the dataset**

In [132]:
heart=pd.read_csv('heart.csv')
heart.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [133]:
heart.shape #number of rows and columns

(303, 14)

In [134]:
heart.dtypes

Unnamed: 0,0
age,int64
sex,int64
cp,int64
trestbps,int64
chol,int64
fbs,int64
restecg,int64
thalach,int64
exang,int64
oldpeak,float64


In [135]:
heart.isnull().sum() #print the total number of missing values

Unnamed: 0,0
age,0
sex,0
cp,0
trestbps,0
chol,0
fbs,0
restecg,0
thalach,0
exang,0
oldpeak,0


In [136]:
heart.duplicated().sum() #print the total number of duplicated values

np.int64(1)

In [137]:
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df


In [138]:
data = remove_outliers(heart, 'age')
data = remove_outliers(data, 'trestbps')
data = remove_outliers(data, 'chol')
data = remove_outliers(data, 'thalach')
data = remove_outliers(data, 'oldpeak')


In [139]:
data.shape

(284, 14)

# **Machine learning process**

In [140]:
X=heart.drop('target',axis=1)
y=heart['target']

# **split data to training and testing data**

In [141]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=100)

# **Standardasation/scaling of the data**
This method helps to scale down some column values which are lsrge so that it has less effect on predicted value. we use a formula z=data-mean /SD.



In [142]:
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

# **Applying logestic regression on the data**

In [143]:
log_reg= LogisticRegression()
log_reg.fit(X_train_scaled,y_train)

In [144]:
log_reg.coef_ ## below values are m1, m2, m3 ,...

array([[ 0.02651805, -0.76115681,  0.78058821, -0.2162428 , -0.45756905,
        -0.11290994,  0.0609823 ,  0.44218324, -0.33736004, -0.5613874 ,
         0.37441386, -0.68913694, -0.62006129]])

In [145]:
log_reg.intercept_

array([0.30759926])

In [146]:
y_pred=log_reg.predict(X_test_scaled)

In [147]:
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0])

In [148]:
log_reg.predict_proba(X_test_scaled)
#first column has probability of 0 and the second column has probability of 1
# value in first rown and first column shows probability of the closness of  the number being 0 And
#value in first row and second column shows the probability of closness of the number being 1

array([[0.03172541, 0.96827459],
       [0.97804488, 0.02195512],
       [0.99171495, 0.00828505],
       [0.04084817, 0.95915183],
       [0.2396951 , 0.7603049 ],
       [0.97976681, 0.02023319],
       [0.89108422, 0.10891578],
       [0.76087831, 0.23912169],
       [0.38411517, 0.61588483],
       [0.36902516, 0.63097484],
       [0.77382973, 0.22617027],
       [0.38952305, 0.61047695],
       [0.81439411, 0.18560589],
       [0.36708872, 0.63291128],
       [0.89841421, 0.10158579],
       [0.27406122, 0.72593878],
       [0.81696856, 0.18303144],
       [0.79720593, 0.20279407],
       [0.94181194, 0.05818806],
       [0.36693681, 0.63306319],
       [0.5678237 , 0.4321763 ],
       [0.02952236, 0.97047764],
       [0.76062591, 0.23937409],
       [0.04531766, 0.95468234],
       [0.01144994, 0.98855006],
       [0.92531029, 0.07468971],
       [0.99467155, 0.00532845],
       [0.06640405, 0.93359595],
       [0.98848755, 0.01151245],
       [0.99056291, 0.00943709],
       [0.

In [149]:
accuracy_score(y_test,y_pred)

0.819672131147541