# Lab 7 Austin Nguyen

November 8, 2024

In [11]:
import numpy as np

# Load data
train_data = np.genfromtxt('spam-train.csv', delimiter=',')
test_data = np.genfromtxt('spam-test.csv', delimiter=',')

# Separate features and labels
X_train = train_data[:, :-1]  # All feature columns
y_train = train_data[:, -1]   # Last column is the label
X_test = test_data[:, :-1]
y_test = test_data[:, -1]

# Normalize the features with Z-score normalization based on training data
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

# Add a bias term (column of ones) 
X_train = np.hstack([np.ones((X_train.shape[0], 1)), X_train])
X_test = np.hstack([np.ones((X_test.shape[0], 1)), X_test])

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Initial weights
r = np.random.default_rng(22)
w = r.normal(size=(X_train.shape[1]))

# Gradient descent parameters
alpha = 0.001
tolerance = 1e-5  

converged = False
iterations = 0

while not converged:
    # Calculate predictions
    z = np.dot(X_train, w)
    predictions = sigmoid(z)
    
    # Compute gradient
    gradient = np.dot(X_train.T, predictions - y_train) / len(y_train)
    
    # Update weights
    w -= alpha * gradient
    
    # Check for convergence
    if np.linalg.norm(alpha * gradient) < tolerance:
        converged = True
    
    iterations += 1

print(f'Converged in {iterations} iterations.')

# Prediction function
def predict(X, w):
    return sigmoid(np.dot(X, w)) >= 0.5

# Training accuracy
train_predictions = predict(X_train, w)
train_accuracy = np.mean(train_predictions == y_train)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')

# Test accuracy
test_predictions = predict(X_test, w)
test_accuracy = np.mean(test_predictions == y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Converged in 134827 iterations.
Training Accuracy: 92.56%
Test Accuracy: 93.16%
