In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import pandas as pd
import ntpath
import random
import SVM

In [None]:
columns = ['ph', 'Hardness', 'Solids', 'Chloramines', 'Sulfate', 'Conductivity', 'Organic_carbon', 'Trihalomethanes', 'Turbidity', 'Potability']
data = pd.read_csv('water_potability.csv', names=columns, skiprows=1)

pd.set_option('display.max_colwidth', None)
data.head()

In [None]:
data = data.sample(frac=1)
# Extract labels (potability)
pot = data["Potability"]

# Deletes potability from data (data is now only inputs)
data = data.drop("Potability", axis=1)

# Converts pandas dataframe to numpy array
data_arr = data.to_numpy()

# x will be input
# y will be label
x = []
y = []

# Go through each column, delete invalid x values
i = 0
while i < len(data_arr):
    if True not in np.isnan(data_arr[i]):
        x.append(data_arr[i])
        y.append(pot[i])
    i += 1


nans = np.isnan(y)
if True in nans:
    print("NaN present in labels")
    exit(1)


In [None]:
# Split data into 4 blocks, concatenate first 3 blocks for training and use D for testing
x = np.array(x)
y = np.array(y)
split_array = np.array_split(x, 4)
A = split_array[0]
B = split_array[1]
C = split_array[2]
D = split_array[3]
x = np.concatenate((A,B,C),axis = 0)

In [None]:
def svm_train(x, y, learning_rate=0.01, num_epochs=1000, C=1.0):
    num_samples, num_features = x.shape
    w = np.zeros(num_features)
    b = 0

    for epoch in range(num_epochs):
        for i in range(num_samples):
            if y[i] * (np.dot(x[i], w) - b) >= 1:
                w -= learning_rate * (2 * C * w)
            else:
                w -= learning_rate * (2 * C * w - np.dot(x[i], y[i]))
                b -= learning_rate * y[i]
    
    return w, b

In [None]:
def svm_predict(x, w, b):
    scores = np.dot(x, w) - b
    predictions = np.sign(scores)
    return predictions

In [None]:

w, b = svm_train(x, y, learning_rate=0.01, num_epochs=1000, C=1.0)

svm.reset()

In [None]:
y_pred = svm_predict(D, w, b)
print(y_pred)
