# Import libraries

In [104]:
import json
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Import Data

In [105]:
file_path = 'training_data.txt'
df = pd.read_csv(file_path, header=None)
df.head()

Unnamed: 0,0,1,2
0,0.051267,0.69956,1
1,-0.092742,0.68494,1
2,-0.21371,0.69225,1
3,-0.375,0.50219,1
4,-0.51325,0.46564,1


In [106]:
df.describe()

Unnamed: 0,0,1,2
count,118.0,118.0,118.0
mean,0.054779,0.183102,0.491525
std,0.496654,0.519743,0.50206
min,-0.83007,-0.76974,0.0
25%,-0.37212,-0.254385,0.0
50%,-0.006336,0.213455,0.0
75%,0.47897,0.646563,1.0
max,1.0709,1.1089,1.0


# Helper functions

In [107]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_cost(theta, X, y, lambda_):
    m = y.size
    h = sigmoid(X @ theta)

    t1 = -y.T @ np.log(h)
    t2 = (1 - y).T @ np.log(1 - h)
    cost = (1 / m) * np.sum(t1 - t2)
    reg_term = (lambda_ / (2*m)) * np.sum(theta[1:] ** 2)

    J = cost + reg_term

    return J

def compute_gradient(theta, X, y, lambda_):
    m = y.size
    h = sigmoid(X @ theta)

    grad = (1 / m) * (X.T @ (h - y))
    reg_term = (lambda_ / m) * theta
    reg_term[0] = 0

    return grad + reg_term

def gradient_descent(theta, X, y, alpha, num_iters, lambda_):
    J_history = []
    for i in range(num_iters):
        theta = theta - alpha * compute_gradient(theta, X, y, lambda_)
        J_history.append(compute_cost(theta, X, y, lambda_))

    return theta, J_history

def predict(theta, X):
    prob = sigmoid(X @ theta)
    return (prob >= 0.5).astype(int)

def evaluate(y, y_pred):
    acc = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    return acc, precision, recall, f1


# Map function

In [108]:
def map_feature(x1, x2):
#   x1, x2 type: numpy array
#   Returns a new feature array with more features, comprising of 
#   x1, x2, x1.^2, x2.^2, x1*x2, x1*x2.^2, etc.

    degree = 6
    out = np.ones([len(x1), int((degree + 1) * (degree + 2) / 2)])
    idx = 1

    for i in range(1, degree + 1):
        for j in range(0, i + 1):
            a1 = x1 ** (i - j)
            a2 = x2 ** j
            out[:, idx] = a1 * a2
            idx += 1

    return out

# Main function

In [109]:
# Import json
with open('config.json',) as f:
    configs = json.load(f)

# Import data
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy()
X_mapped = map_feature(X[:, 0], X[:, 1])
theta = np.zeros(X_mapped.shape[1])

# Perfrom gradient decent
theta, J_history = gradient_descent(theta, X_mapped, y, alpha=configs["Alpha"], num_iters=configs["NumIter"], lambda_=configs["Lambda"])
y_pred = predict(theta, X_mapped)

# Evaluate the predictions
acc, precision, recall, f1 = evaluate(y, y_pred)

print(f'Accuracy: {acc}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

# Save model
with open('model.json', 'w') as f:
    model = { 'theta: ': theta.tolist() }
    json.dump(model, f, indent=4)

# Save evaluations
with open('classification_report.json', 'w') as f:
    score = { 
        'Accuracy: ': acc,
        'Precision: ': precision,
        'Recall: ': recall,
        'F1 Score: ': f1
    }
    json.dump(score, f, indent=4)

Accuracy: 0.8305084745762712
Precision: 0.7794117647058824
Recall: 0.9137931034482759
F1 Score: 0.8412698412698414
