In [104]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from  sklearn.preprocessing import StandardScaler

In [105]:
df = pd.read_csv('/workspaces/Machine_Learning-/SVM/diabetes (1).csv')

In [106]:
class SVM_classifier():


  # initiating the hyperparameters
  def __init__(self, learning_rate, no_of_iterations, lambda_parameter):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter


  
  # fitting the dataset to SVM Classifier
  def fit(self, X, Y):

    # m  --> number of Data points --> number of rows
    # n  --> number of input features --> number of columns
    self.m, self.n = X.shape

    # initiating the weight value and bias value

    self.w = np.zeros(self.n)

    self.b = 0

    self.X = X

    self.Y = Y

    # implementing Gradient Descent algorithm for Optimization

    for i in range(self.no_of_iterations):
      self.update_weights()



  # function for updating the weight and bias value
  def update_weights(self):

    # label encoding
    y_label = np.where(self.Y <= 0, -1, 1)



    # gradients ( dw, db)
    for index, x_i in enumerate(self.X):

      condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1

      if (condition == True):

        dw = 2 * self.lambda_parameter * self.w
        db = 0

      else:

        dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])
        db = y_label[index]


      self.w = self.w - self.learning_rate * dw

      self.b = self.b - self.learning_rate * db



  # predict the label for a given input value
  def predict(self, X):

    output = np.dot(X, self.w) - self.b
    
    predicted_labels = np.sign(output)

    y_hat = np.where(predicted_labels <= -1, 0, 1)

    return y_hat  


In [107]:
X = df.drop('Outcome', axis = 1)
Y = df['Outcome']


In [108]:
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
X = standardized_data
target = df['Outcome']

In [109]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [110]:
model = SVM_classifier(learning_rate = 0.001, no_of_iterations = 2000, lambda_parameter = 0.01)

In [111]:
model.fit(X_train, Y_train)

In [112]:
X_train_prediction = model.predict(X_train)
train_test_accuracy   = accuracy_score(X_train_prediction, Y_train)    

In [113]:
X_test_prediction = model.predict(X_test)

In [114]:
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [115]:
print("Accuracy score of training data and Test_data:", train_test_split , test_data_accuracy)   

Accuracy score of training data and Test_data: <function train_test_split at 0x773db3be6a20> 0.7467532467532467


In [116]:
input_data = (5,166,72,19,175,25.8,0.587,51)
input_data_as_numpy_array = np.asarray(input_data)
intput_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
standardized_input_data = scaler.transform(intput_data_reshaped)
prediction = model.predict(standardized_input_data)
print(prediction)
if (prediction[0] == 0):
  print("The person is not diabetic")
else:   
  print("The person is diabetic")
  

[1]
The person is diabetic


