### Individual Assignment 2 (10%)


-   **Name:** Ezlan Zulfiqree bin Hashim
-   **Matric Number:** 17192056


#### 1. Logistic Regression model from scratch


In [None]:
import numpy as np


def CalcObj(XTrain, YTrain, wHat):
    n = len(YTrain)
    X_with_bias = np.hstack((np.ones((n, 1)), XTrain))
    scores = np.dot(X_with_bias, wHat)
    obj = -np.mean(YTrain * scores - np.log(1 + np.exp(scores)))
    return obj


def CalcGrad(XTrain, YTrain, wHat):
    n = len(YTrain)
    X_with_bias = np.hstack((np.ones((n, 1)), XTrain))
    scores = np.dot(X_with_bias, wHat)
    probs = 1 / (1 + np.exp(-scores))
    grad = np.dot(X_with_bias.T, probs - YTrain) / n
    return grad


def UpdateParams(weight, grad, lr):
    wHat = weight - lr * grad
    return wHat


def CheckConvg(oldObj, newObj, tol):
    hasConverged = abs(newObj - oldObj) < tol
    return hasConverged


class LogisticRegressionSc:
    def __init__(self, iters, lr=0.01, tol=0.001):
        self.iters = iters
        self.lr = lr
        self.tol = tol

    def GradientDescent(self, XTrain, YTrain):
        p = XTrain.shape[1]
        wHat = np.zeros((p + 1, 1))
        objVals = []

        for _ in range(self.iters):
            oldObj = CalcObj(XTrain, YTrain, wHat)
            grad = CalcGrad(XTrain, YTrain, wHat)
            wHat = UpdateParams(wHat, grad, self.lr)
            newObj = CalcObj(XTrain, YTrain, wHat)
            objVals.append(newObj)

            if CheckConvg(oldObj, newObj, self.tol):
                break

        return wHat, objVals


def PredictLabels(XTest, YTest, wHat):
    m = XTest.shape[0]
    X_with_bias = np.hstack((np.ones((m, 1)), XTest))
    scores = np.dot(X_with_bias, wHat)
    probs = 1 / (1 + np.exp(-scores))
    yHat = (probs >= 0.5).astype(int)
    numErrors = np.sum(yHat != YTest)
    return yHat, numErrors

##### Train logistic regression model with Train_toydata.txt


In [None]:
with open("Train_toydata.txt") as trainToyDataFile:
    n = len(trainToyDataFile.readlines())
    trainToyDataFile.seek(0)
    XTrain = np.empty([n, 2])
    YTrain = np.empty([n, 1])

    for i in range(n):
        data = trainToyDataFile.readline().strip().split()
        XTrain[i] = np.array([data[0], data[1]])
        YTrain[i] = np.array([data[2]])

logRegTrain = LogisticRegressionSc(iters=1000)
wHat, objVals = logRegTrain.GradientDescent(XTrain, YTrain)

##### Test logistic regression model with Test_toydata.txt


In [None]:
with open("Test_toydata.txt") as testToyDataFile:
    m = len(testToyDataFile.readlines())
    testToyDataFile.seek(0)
    XTest = np.empty([m, 2])
    YTest = np.empty([m, 1])

    for i in range(m):
        data = testToyDataFile.readline().strip().split()
        XTest[i] = np.array([data[0], data[1]])
        YTest[i] = np.array([data[2]])

yHat, numErrors = PredictLabels(XTest, YTest, wHat)
print(yHat)
print("numErrors:", numErrors)

#### 2. Logistic regression model using built-in PyTorch functions


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


class LogisticRegressionPT(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionPT, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        out = torch.sigmoid(self.linear(x))
        return out

##### Train PyTorch logistic regression model with Train_toydata.txt


In [None]:
with open("Train_toydata.txt") as trainToyDataFile:
    n = len(trainToyDataFile.readlines())
    trainToyDataFile.seek(0)
    XTrain = torch.empty((n, 2))
    YTrain = torch.empty((n, 1))

    for i in range(n):
        data = trainToyDataFile.readline().strip().split()
        XTrain[i, 0] = float(data[0])
        XTrain[i, 1] = float(data[1])
        YTrain[i] = float(data[2])

input_dim = XTrain.shape[1]
model = LogisticRegressionPT(input_dim)

criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
num_epochs = 1000

for epoch in range(num_epochs):
    outputs = model(XTrain)
    loss = criterion(outputs, YTrain)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}")

##### Test PyTorch logistic regression model with Test_toydata.txt


In [None]:
with open("Test_toydata.txt") as testToyDataFile:
    m = len(testToyDataFile.readlines())
    testToyDataFile.seek(0)
    XTest = torch.empty((m, 2))
    YTest = torch.empty((m, 1))

    for i in range(m):
        data = testToyDataFile.readline().strip().split()
        XTest[i, 0] = float(data[0])
        XTest[i, 1] = float(data[1])
        YTest[i] = float(data[2])

with torch.no_grad():
    yHat = model(XTest)
    predictions = (yHat >= 0.5).squeeze().numpy()

numErrorsPT = np.sum(yHat != YTest.numpy())
print(yHat)
print("numErrors:", numErrorsPT)

#### 3. Compare testing accuracy between own functions and built-in PyTorch functions


In [None]:
from tabulate import tabulate

data = [["Scratch", numErrors], ["PyTorch", numErrorsPT]]
col_names = ["model", "numErrors"]
print(tabulate(data, headers=col_names))