In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# FUNCTIONS FOR Q1

def compute_gradient(X, y, w):
    err = y - X.dot(w)
    grad = -2 * err
    grad = np.mean(np.multiply(grad, X), axis=0)
    return grad.reshape(-1, 1)

def gradient_descent(X, y, w, num_iterations, alpha):
    for i in np.arange(num_iterations):
        gradient = compute_gradient(X, y, w)
        w = w - (alpha * gradient)
        loss = np.mean((X.dot(w) - y) ** 2)
        print(f"Iteration {i}:\nWeights: w0={w[0]}\tw1={w[1]}\tw2={w[2]}\nLoss: {loss}\n")

In [5]:
# FUNCTION FOR Q2

def check_predictions(X, y, w):
    preds = np.array(X.dot(w))
    i = 0
    for pred in preds:
        if pred > 0:
            preds[i] = 1
        else:
            preds[i] = -1
        i += 1
    return preds == y

In [137]:
# FUNCTIONS FOR Q3

def map_to_binary(y):
    map = np.zeros(len(y), dtype=int)
    map[y==1] = 1
    return map
        
def get_gradients(X, w, y):
    n = len(y)
    grads = np.zeros(2)
    mapped_y = map_to_binary(y)
    
    for i in np.arange(2):
        for j in np.arange(n):
            k = mapped_y[j]
            grads[i] += (k * d_dw_positive(X[j], w, i)) + ((1-k) * d_dw_negative(X[j], w, i))
        grads[i] = grads[i] / n
    return grads
    

def d_dw_negative(x, w, i):
    return - x[i] * (1 / (1 + np.exp(-(w.dot(x)))))
                       
def d_dw_positive(x, w, i):
    return x[i] * (np.exp(-w.dot(x)) / (1 + np.exp(-(w.dot(x)))))

def prob_positive(x, w):
    return 1 / (1 + np.exp(-w.dot(x)))
    
def prob_negative(x, w):
    return 1 - prob_positive(x, w)

def log_likelihood(X, w, y):
    n = len(y)
    agg = 0
    
    for i in np.arange(n):
        if y[i] == 1:
            agg += np.log(prob_positive(X[i], w))
        elif y[i] == -1:
            agg += np.log(prob_negative(X[i], w))
    return agg / n

def gradient_ascent(X, w, y, num_iterations, alpha):
    for i in np.arange(num_iterations):
        grads = get_gradients(X, w, y)
        w = w + (alpha * grads)
        logL = log_likelihood(X, w, y)
        print(f"Iteration {i}:\nWeights: w1 = {w[0]}\tw2 = {w[1]}\nLog Likelihood: {logL}\n")


In [138]:
# Q3

# (b)

df_logistic_reg = pd.DataFrame({"x1": [1, 2, -3, -3],
                                "x2": [1, -1, -1, 1],
                                "y": [1, -1, 1, 1]
                               })

X = df_logistic_reg.iloc[:, 0:-1].values
w = np.array([0, 0])
y = df_logistic_reg.iloc[:, -1].values
num_iterations = 4
alphas = np.array([0.01, 0.2])

for alpha in alphas:
    print(f"\nLearning Rate: {alpha}\n\n")
    gradient_ascent(X, w, y, num_iterations, alpha)
    # pass



Learning Rate: 0.01


Iteration 0:
Weights: w1 = -0.00875	w2 = 0.0025
Log Likelihood: -0.6849231067802005

Iteration 1:
Weights: w1 = -0.01737266290186508	w2 = 0.004988282193829135
Log Likelihood: -0.6769244585507829

Iteration 2:
Weights: w1 = -0.025869865270989494	w2 = 0.007464960986992616
Log Likelihood: -0.6691447513079959

Iteration 3:
Weights: w1 = -0.03424349233161267	w2 = 0.009930154478800376
Log Likelihood: -0.6615776273762636


Learning Rate: 0.2


Iteration 0:
Weights: w1 = -0.17500000000000002	w2 = 0.05
Log Likelihood: -0.5501553964144653

Iteration 1:
Weights: w1 = -0.30009837513120896	w2 = 0.09545860605224449
Log Likelihood: -0.47233289121465316

Iteration 2:
Weights: w1 = -0.39264223696690004	w2 = 0.13768578397958153
Log Likelihood: -0.42589497044094043

Iteration 3:
Weights: w1 = -0.4638056446551079	w2 = 0.17759552506386816
Log Likelihood: -0.3954420670938938

