# ECE4179 - Assignment\#1 
## Starter Code

In [None]:
# COMMENT WHEN SUBMITTING
%load_ext lab_black

In [None]:
import numpy as np
import matplotlib.pyplot as plt

### define the sigmoid function below

In [None]:
def sigmoid(x):
    # this function should compute the sigmoid of x
    return 1 / (1 + np.exp(-x))

### use the sigmoid function and write a predictor for the logistic model below. 

In [None]:
def predict(X, theta):
    """
    this function should get X, an array of samples, and theta, the parameters
    of the logistic model and generate 0 or 1 as the label of each sample in X
    #the rule is that, if the sigmoid of x >= 0.5, we predict the label of x to be 1
    otherwise the label is 0
    """
    return ((X @ theta.T) >= 0.5).astype(int) # jank boolean conversion

### write a function to compute the loss and gradient for the logistic model below

In [None]:
epsilon = 1e-100  # arbitrarily small number


def compute_grad_loss(X, y, theta):
    # this function will get X, a set of samples (each sample is a row in X),
    # the corresponding labels in the array y and the current parameter of the logistic model theta
    # use the sigmoid function to compute the loss and the gradient of samples with respect to theta
    # when computing the loss value, pay extra attention to the log function. log(0) can cause problems so you need
    # to handle it
    wx = X @ theta.T
    sig_wx = sigmoid(wx)
    sig_wx = np.where(sig_wx == 0, epsilon, sig_wx)

    loss = -np.mean(y.T @ np.log(sig_wx) + (1 - y).T @ np.log(1 - sig_wx))
    # if theta is (N,1), use axis = 1
    grad_vec = np.mean((sig_wx - y).T @ X, axis=0)

    return loss, grad_vec

### use this cell to load your data

In [None]:
# you can load your data using this cell

npzfile = np.load("toy_data.npz")  # toy_data.npz or toy_data_two_circles.npz


X_train = npzfile["arr_0"]
X_test = npzfile["arr_1"]
y_train = npzfile["arr_2"]
y_test = npzfile["arr_3"]


# remember that each row in X_train and X_test is a sample. so X_train[1,:] is the first training sample

### you can plot the data using the cell below

In [None]:
# this code will plot the data
plt.subplot(121)
plt.scatter(
    X_train[:, 0], X_train[:, 1], marker="o", c=y_train[:, 0], s=25, edgecolor="k"
)
plt.subplot(122)
plt.scatter(X_test[:, 0], X_test[:, 1], marker="o", c=y_test[:, 0], s=25, edgecolor="k")

### below you need to implement the gradient descent (GD) algorithm.

In [None]:
# theta is the parameters of the logistic model
np.random.seed(0)
# To learn them, we randomly initilize them below
# theta = np.random.randn(X_train.shape[1], 1)
theta = np.random.randn(1, X_train.shape[1])  # shape (1,2)

# this is the learning rate of the GD algorithm, you need to tune this and study its effects in your report
lr = 5e-4

# this is the maximum number of iterations of the GD algorithm.
# Since we use the GD, each iteration of the algorithm is equivalent to one epoch, hence the name
max_epoch = 500

# keep track of the loss/accuracy values for plotting
loss = np.zeros(max_epoch)
accuracy = np.zeros(max_epoch)
for epoch in range(max_epoch):
    # call the compute_grad_loss that you have implemented above to
    # measure the loss and the gradient
    loss[epoch], grad_vec = compute_grad_loss(X_train, y_train, theta)
    # update the theta parameter according to the GD here
    theta -= lr * grad_vec

    # storage for plotting
    y_test_hat = predict(X_test, theta)
    accuracy[epoch] = float(sum(y_test_hat == y_test)) / float(len(y_test))

# Plot our training results

In [None]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(accuracy, "r-")
ax2.plot(loss, "g-")

ax1.set_xlabel("Number of epochs")
ax1.set_ylabel("Accuracy", color="r")
ax2.set_ylabel("Loss", color="g")
plt.title("Test Accuracy and Training Loss")

### Evaluate your trained model using the code below

In [None]:
# make sure that the predictions are either 0 or 1 and the shape of y_test_hat
(y_test_hat >= 0).all() and (y_test_hat <= 1).all()

In [None]:
# check that shapes match
y_test_hat.shape == y_test.shape

In [None]:
# now that you have trained your model, let's evaluate it

# first call the predict function on your test data with the parameters obtained by GD
y_test_hat = predict(X_test, theta)

# the script below, if the dimensionality of the arrays is set correctly,
# will measure how many samples are correctly classified by your model
score = float(sum(y_test_hat == y_test)) / float(len(y_test))
print(score)