# Group: 10
# Rollno: 19EE10050, 19EC10041, 22CS60R18
# Project Code: PSSVM
# Project Title: Pulsar Star Classification using Support Vector Machines

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Model

In [None]:
class SVM:
    
    def __init__(self, C=1.0):
        self.C = C
        self.W = 0
        self.b = 0
    
    def hinge_loss(self, W, b, X, Y):
        W = np.reshape(W, (-1, 1))
        loss = 0.0
        loss += 0.5 * np.dot(W.T, W)[0][0]
        X = np.reshape(X, (-1, 1))
        m = X.shape[0]
        for i in range(m):
            ti = Y[i] * (np.dot(W.T, X[i]) + b)
            loss += self.C * max(0, (1 - ti))
        return loss[0]

    def fit(self, X, Y, batch_size=100, learning_rate=0.001, maxItr=300):
        X = np.reshape(X, (-1, 1))
        Y = np.reshape(Y, (-1, 1))
        n_features = X.shape[1]
        n_samples = X.shape[0]
        n = learning_rate
        c = self.C
        W = np.zeros((n_features, 1))
        bias = 0
        losses = []
        for i in range(maxItr):
            l = self.hinge_loss(W, bias, X, Y)
            losses.append(l)
            ids = np.arange(n_samples)
            np.random.shuffle(ids)
            for batch_start in range(0, n_samples, batch_size):
                gradw = 0
                gradb = 0
                for j in range(batch_start, batch_start + batch_size):
                    if j < n_samples:
                        i = ids[j]
                        ti = Y[i] * (np.dot(W.T, X[i]) + bias)
                        if ti > 1:
                            gradw += 0
                            gradb += 0
                        else:
                            gradw += c * Y[i] * X[i]
                            gradb += c * Y[i]
                W = W - n * W + n * gradw
                bias = bias + n * gradb
        
        print("Final loss is", l)
        self.W = W
        self.b = bias
        return W, bias, losses
    
    def predict(self, X):
        X = np.reshape(X, (-1, 1))
        return np.sign(np.dot(X, self.W) + self.b)
    
    def score(self, X, Y):
        X = np.reshape(X, (-1, 1))
        Y = np.reshape(Y, (-1, 1))
        Y_ = self.predict(X)
        return np.sum(Y_ == Y) / Y.shape[0]
    
    def visualize(self, X, Y, losses):
        X = np.reshape(X, (-1, 1))
        Y = np.reshape(Y, (-1, 1))
        plt.figure(0)
        plt.scatter(X, Y)
        plt.show()
        plt.figure(1)
        plt.plot(losses)
        plt.show()
        return 0

if __name__ == "__main__":
    data = pd.read_csv("data_2d.csv", header=None)
    X = data.iloc[:, 0]
    Y = data.iloc[:, 1]
    labels = data.iloc[:, 2]
    svm = SVM()
    W, b, losses = svm.fit(X, labels)
    svm.visualize(X, labels, losses)
    print(svm.score(X, labels))
