<a href="https://colab.research.google.com/github/haroonwaheed19/Logistic-Regression-From-Scratch/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Libraries**

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

**Coding Logistic Regression Model**

In [2]:
class Logistic_Regression():

   def __init__( self, learning_rate, no_of_iterations ) :

        self.learning_rate = learning_rate

        self.no_of_iterations = no_of_iterations

    # fit function to train the model

   def fit( self, X, Y ) :

        # no_of_training_examples, no_of_features

        self.m, self.n = X.shape

        # initiating the weight and bias

        self.w = np.zeros( self.n )

        self.b = 0

        self.X = X

        self.Y = Y


        # implementing Gradient Descent for Optimization

        for i in range( self.no_of_iterations ) :

            self.update_weights()



    # function to update weights in gradient descent

   def update_weights( self ) :

        y_hat = 1 / (1 + np.exp(-(self.X.dot(self.w)+self.b)))

        # calculate gradients

        dw = (1/self.m) * np.dot(self.X.T,(y_hat - self.Y))

        db = (1/self.m) * np.sum(y_hat - self.Y)

        # updating the weights

        self.w = self.w - self.learning_rate * dw

        self.b = self.b - self.learning_rate * db


    # sigmoid function for prediction:

   def predict( self, X ) :

        y_pred = 1 / (1 + np.exp(-(X.dot(self.w)+self.b)))

        y_pred = np.where( y_pred > 0.5, 1, 0 )

        return y_pred


**Reading Data from CSV File into Data Frame**

In [12]:
diabetes = pd.read_csv('diabetes.csv')

**Exploratory Data Analysis**

In [13]:
diabetes.head()

Unnamed: 0,Index,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Outcome.1
0,0.0,6,148,72,35,0,33.6,0.627,50,1,1.0
1,1.0,1,85,66,29,0,26.6,0.351,31,0,-1.0
2,2.0,8,183,64,0,0,23.3,0.672,32,1,1.0
3,3.0,1,89,66,23,94,28.1,0.167,21,0,-1.0
4,4.0,0,137,40,35,168,43.1,2.288,33,1,1.0


In [14]:
diabetes.tail()

Unnamed: 0,Index,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Outcome.1
763,,10,101,76,48,180,32.9,0.171,63,0,
764,,2,122,70,27,0,36.8,0.34,27,0,
765,,5,121,72,23,112,26.2,0.245,30,0,
766,,1,126,60,0,0,30.1,0.349,47,1,
767,,1,93,70,31,0,30.4,0.315,23,0,


In [15]:
diabetes.describe()

Unnamed: 0,Index,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Outcome.1
count,10.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,27.0
mean,4.5,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958,0.333333
std,3.02765,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951,0.960769
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0,-1.0
25%,2.25,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0,-1.0
50%,4.5,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0,1.0
75%,6.75,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0,1.0
max,9.0,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0,1.0


In [16]:
diabetes.isnull().sum()

Unnamed: 0,0
Index,758
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


**Data Pre-Processing**

In [27]:
diabetes.drop(columns='Index', axis=1, inplace=True)

In [29]:
diabetes.drop(columns='Outcome.1',axis=1,inplace=True)

In [30]:
diabetes['Outcome'].value_counts()

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,500
1,268


In [31]:
diabetes.groupby('Outcome').mean()

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3.298,109.98,68.184,19.664,68.792,30.3042,0.429734,31.19
1,4.865672,141.257463,70.824627,22.164179,100.335821,35.142537,0.5505,37.067164


**Seprating Features and Outcome(label)**

In [32]:
features = diabetes.drop(columns = 'Outcome', axis=1)
target = diabetes['Outcome']

**Standarizing Data**

In [33]:
scaler = StandardScaler()

In [34]:
stand = scaler.fit_transform(features)
features = stand

In [36]:
features.std()

np.float64(1.0)

**Training and Testing Data Splitting**

In [37]:
x_train,x_test,y_train,y_test = train_test_split(features,target,test_size=0.2,random_state=2,stratify=target)

In [38]:
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(614, 8) (154, 8) (614,) (154,)


**Training the Model**

In [40]:
classifier = Logistic_Regression(learning_rate=0.01,no_of_iterations=1000)

In [41]:
classifier.fit(x_train,y_train)

**Model Evaluation**

In [42]:
x_predictions = classifier.predict(x_train)
training_accuracy = accuracy_score(y_train,x_predictions)

In [43]:
print('Training Accuracy : ',training_accuracy*100)

Training Accuracy :  77.85016286644951


In [44]:
x_test_pred = classifier.predict(x_test)
test_accuracy = accuracy_score(y_test,x_test_pred)

In [45]:
print("Testing Accuracy : ",test_accuracy*100)

Testing Accuracy :  75.97402597402598


**Making a Predictive System**

In [46]:
input_data = (1,79,60,42,48,43.5,0.678,23)

input_data_npArray = np.asarray(input_data)

input_data_reshaped = input_data_npArray.reshape(1,-1)

stand_data = scaler.transform(input_data_reshaped)

prediction = classifier.predict(stand_data)

if(prediction[0] == 0):
    print("The person is Non-Diabetic")
else:
    print("The person is Diabetic")



The person is Non-Diabetic


