In [67]:
import numpy as np

def logreg(x):
    return 1/(1+np.exp(-x))

class LogisticRegression():

    def __init__(self, learning_rate=0.001, no_iterations=1000):
        self.learning_rate = learning_rate
        self.no_iterations = no_iterations
        self.weights = 0
        self.bias = 0

    def fit(self, X, y):
        no_samples, no_features = X.shape
        self.weights = np.zeros(no_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = logreg(linear_pred)

            dw = (1/no_samples) * np.dot(X.T, (predictions - y))
            db = (1/no_samples) * np.sum(predictions-y)

            self.weights = self.weights - self.learning_rate*dw
            self.bias = self.bias - self.learning_rate*db


    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = logreg(linear_pred)
        classify_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return classify_pred

In [33]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression 

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [35]:
# loading the diabetes dataset to a pandas DataFrame
diabetes_dataset = pd.read_csv('Downloads/diabetes.csv') 

In [36]:
diabetes_dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [37]:
diabetes_dataset.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [38]:
# separating the data and labels
features = diabetes_dataset.drop(columns = 'Outcome', axis=1)
target = diabetes_dataset['Outcome']

In [39]:
scaler = StandardScaler()
scaler.fit(features)
standardized_data = scaler.transform(features)

In [43]:
features = standardized_data
target = diabetes_dataset['Outcome']

In [44]:
X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)

In [46]:
logreg_clf = LogisticRegression(random_state=16)

In [48]:
logreg_clf.fit(X_train,Y_train)
y_pred =logreg_clf.predict(X_test)

In [49]:
# accuracy score on the training data
X_train_prediction = logreg_clf.predict(X_train)
training_data_accuracy = accuracy_score( Y_train, X_train_prediction)

In [50]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.7801302931596091


In [51]:
# accuracy score on the test data
X_test_prediction = logreg_clf.predict(X_test)
test_data_accuracy = accuracy_score( Y_test, X_test_prediction)

In [52]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.7662337662337663


In [63]:
# Making a Predictive System
input_data = (24,32,5,9,28.6, 166, 100, 3)

In [65]:
# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = logreg_clf.predict(std_data)

[[  5.98534359  -2.78214506  -3.31410849  -0.72365966  -0.44455975
   17.00812086 300.5867453   -2.57312899]]




In [66]:
if (prediction[0] == 0):
  print('The person is not diabetic')
else:
  print('The person is diabetic')

The person is diabetic
