In [21]:
import numpy as np
import pandas as pd

In [22]:
df = pd.read_csv('C:/Users/dilshan/Documents/AI_ML_Course(zuu)/week03/zuu_crew_scores.csv')

In [23]:
def load_and_prepare_data():
    features = [
        'EducationLevel', 'Attendance', 'TotalHours', 'AssignmentsCompleted',
        'HackathonParticipation', 'GitHubScore', 'PeerReviewScore'
    ]
    X = df[features].values
    y = (df['CapstoneScore'] >= 75).astype(int).values
    X = (X - X.mean(axis=0)) / X.std(axis=0)
    X = np.hstack([np.ones((X.shape[0], 1)), X])
    return X, y

In [24]:
def sigmoid(z):
    return 1 / (1 + np.exp(-np.clip(z, -500, 500)))

In [25]:
def compute_logistic_cost(X, y, theta):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    # Adding small epsilon to avoid log(0)
    epsilon = 1e-15
    cost = -(1/m) * np.sum(y * np.log(h + epsilon) + (1-y) * np.log(1-h + epsilon))
    return cost

In [26]:
def compute_logistic_gradients(X, y, theta):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    gradients = (1/m) * np.dot(X.T, (h - y))
    return gradients

In [27]:
def logistic_gradient_descent(X, y, learning_rate=0.01, num_iterations=1000):
    m, n = X.shape
    theta = np.zeros(n)
    costs = []
    
    for _ in range(num_iterations):
        gradients = compute_logistic_gradients(X, y, theta)
        theta -= learning_rate * gradients
        cost = compute_logistic_cost(X, y, theta)
        costs.append(cost)
    
    return theta, costs

In [28]:
def predict_proba(X, theta):
    return sigmoid(np.dot(X, theta))

# Binary class predictions
def predict_class(X, theta, threshold=0.5):
    return (predict_proba(X, theta) >= threshold).astype(int)

In [29]:
def main():
    # Loading data
    X, y = load_and_prepare_data()
    
    # Training the model
    theta, costs = logistic_gradient_descent(X, y, learning_rate=0.1, num_iterations=1000)
    
    # Making predictions
    y_pred = predict_class(X, theta)
    
    # Calculating accuracy
    accuracy = np.mean(y_pred == y)
    print(f'Model Accuracy: {accuracy:.4f}')
    
    # Printing final cost
    print(f'Final Cost: {costs[-1]:.4f}')
    
    return theta, costs

if __name__ == '__main__':
    theta, costs = main()

Model Accuracy: 0.7333
Final Cost: 0.4753
