In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [11]:
data = pd.read_csv('/Users/cenkerarin/ml_test/datasets/diabetes.csv')

In [12]:
columns_to_scale = ['Age', 'Insulin', 'SkinThickness','DiabetesPedigreeFunction']
for col in columns_to_scale:
    data[col] = np.log1p(data[col])

In [13]:
x = data.drop('Outcome', axis=1)
y = data['Outcome']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [14]:
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        y_ = np.where(y <= 0, -1, 1)
        
        self.w = np.zeros(n_features)
        self.b = 0

        for i in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]

    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

svm = SVM(learning_rate=0.001, lambda_param=0.01, n_iters=1000)
svm.fit(x_train.values, y_train.values)

y_pred = svm.predict(x_test.values)

y_pred = np.where(y_pred == -1, 0, 1)

accuracy = np.mean(y_pred == y_test.values)
print(f"SVM Accuracy: {accuracy:.4f}")

print(f"True Positives: {np.sum((y_pred == 1) & (y_test.values == 1))}")
print(f"True Negatives: {np.sum((y_pred == 0) & (y_test.values == 0))}")
print(f"False Positives: {np.sum((y_pred == 1) & (y_test.values == 0))}")
print(f"False Negatives: {np.sum((y_pred == 0) & (y_test.values == 1))}")

SVM Accuracy: 0.7532
True Positives: 46
True Negatives: 70
False Positives: 29
False Negatives: 9
