In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# =========================
# Data Loading and Preprocessing
# =========================

# Load training and test data
train_data = pd.read_csv('data/diabetes_train.csv')
test_data = pd.read_csv('data/diabetes_test.csv')

# Preprocessing - Separating target and features
train_data_outcome = train_data['Outcome'].values  # Shape: (668,)
train_data = train_data.drop(columns=['Outcome'])    # Shape: (668, 8)

# Normalizing features using training data statistics
for column in train_data.columns:
    mean = train_data[column].mean()
    std = train_data[column].std()
    train_data[column] = (train_data[column] - mean) / std
    test_data[column] = (test_data[column] - mean) / std

# Adding bias column (intercept term) with value 1
train_data.insert(0, 'Bias', 1)  # Shape: (668, 9)
test_data.insert(0, 'Bias', 1)    # Shape: (100, 9)

# Convert DataFrames to numpy arrays
train_data = train_data.values       # Shape: (668, 9)
test_data_numpy = test_data.values   # Shape: (100, 9)

# Split the data into training and validation sets
# Note: train_test_split expects samples as rows, so no transposing here
X_train, X_validation, y_train, y_validation = train_test_split(
    train_data, train_data_outcome, test_size=0.2, random_state=42
)

# Transpose the feature matrices to shape (features, samples) for the model
X_train = X_train.T          # Shape: (9, 534)
X_validation = X_validation.T  # Shape: (9, 134)

# Target variables remain as 1D arrays
y_train = y_train             # Shape: (534,)
y_validation = y_validation   # Shape: (134,)

# Verify the shapes
print(f'X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}')
print(f'X_validation.shape: {X_validation.shape}, y_validation.shape: {y_validation.shape}')
print(f'test_data_numpy.shape: {test_data_numpy.shape}')

# =========================
# Model Definition
# =========================

class Model:
    def __init__(self, input_size, hidden_size=1000, output_size=1):
        # Initialize weights with mean 0 and std 0.01
        self.w1 = np.random.randn(hidden_size, input_size) * 0.01  # Shape: (1000, 9)
        self.w2 = np.random.randn(output_size, hidden_size) * 0.01  # Shape: (1, 1000)

    def predict(self, inputs):
        """
        Forward propagation through the network.
        Args:
            inputs (numpy.ndarray): Input data of shape (features, samples)
        Returns:
            A_1 (numpy.ndarray): Activations from hidden layer
            A_2 (numpy.ndarray): Activations from output layer
        """
        Z_1 = self.w1 @ inputs          # Shape: (1000, samples)
        A_1 = np.maximum(0, Z_1)        # ReLU activation
        Z_2 = self.w2 @ A_1             # Shape: (1, samples)
        A_2 = 1 / (1 + np.exp(-Z_2))    # Sigmoid activation
        return A_1, A_2

    def update_weights_for_one_epoch(self, inputs, outputs, learning_rate):
        """
        Performs one epoch of weight updates using gradient descent.
        Args:
            inputs (numpy.ndarray): Input data of shape (features, samples)
            outputs (numpy.ndarray): True labels of shape (samples,)
            learning_rate (float): Learning rate (alpha)
        """
        A_1, A_2 = self.predict(inputs)  # Forward pass

        n = inputs.shape[1]  # Number of samples

        # Compute the error term for output layer
        error_output = (outputs - A_2.flatten())  # Shape: (samples,)
        shared_coefficient = (2 / n) * error_output * A_2.flatten() * (1 - A_2.flatten())  # Shape: (samples,)

        # Reshape shared_coefficient for matrix operations
        shared_coefficient = shared_coefficient.reshape(1, -1)  # Shape: (1, samples)

        # Update weights for w2
        delta_w2 = shared_coefficient @ A_1.T  # Shape: (1, 1000)
        self.w2 += learning_rate * delta_w2

        # Compute the gradient for w1
        delta_A1 = self.w2.T @ shared_coefficient  # Shape: (1000, samples)
        relu_gradient = np.where(A_1 > 0, 1, 0)    # Shape: (1000, samples)
        delta_w1 = (delta_A1 * relu_gradient) @ inputs.T  # Shape: (1000, 9)
        self.w1 += learning_rate * delta_w1

    def fit(self, inputs, outputs, learning_rate, epochs=64):
        """
        Trains the model using the provided data.
        Args:
            inputs (numpy.ndarray): Input data of shape (features, samples)
            outputs (numpy.ndarray): True labels of shape (samples,)
            learning_rate (float): Learning rate (alpha)
            epochs (int): Number of training epochs
        """
        for epoch in range(epochs):
            self.update_weights_for_one_epoch(inputs, outputs, learning_rate)
            if (epoch + 1) % 10 == 0 or epoch == 0:
                _, A_2 = self.predict(inputs)
                predictions = (A_2.flatten() > 0.5).astype(int)
                accuracy = np.mean(predictions == outputs) * 100
                print(f'Epoch {epoch + 1}/{epochs} - Accuracy: {accuracy:.2f}%')

# =========================
# Model Training and Evaluation
# =========================

def evaluation(model, inputs, outputs):
    """
    Evaluates the model's accuracy.
    Args:
        model (Model): Trained model
        inputs (numpy.ndarray): Input data of shape (features, samples)
        outputs (numpy.ndarray): True labels of shape (samples,)
    Returns:
        float: Accuracy percentage
    """
    _, A_2 = model.predict(inputs)
    prediction = (A_2.flatten() > 0.5).astype(int)
    return np.mean(prediction == outputs) * 100

# Initialize the model
input_size = X_train.shape[0]  # Number of features (including bias)
model = Model(input_size=input_size, hidden_size=1000, output_size=1)

# Train the model
learning_rate = 0.01
epochs = 100
model.fit(X_train, y_train, learning_rate=learning_rate, epochs=epochs)

# Evaluate the model on validation set
validation_accuracy = evaluation(model, X_validation, y_validation)
print(f"Model accuracy on validation set: {validation_accuracy:.2f}%")

# =========================
# Prediction on Test Data
# =========================

# Make predictions on test data
_, test_output = model.predict(test_data_numpy.T)  # Transpose to shape (9, 100)
prediction = (test_output.flatten() > 0.5).astype(int)

# Display test predictions
print("Test Predictions:", prediction)


X_train.shape: (9, 534), y_train.shape: (534,)
X_validation.shape: (9, 134), y_validation.shape: (134,)
test_data_numpy.shape: (100, 9)
Epoch 1/100 - Accuracy: 40.45%
Epoch 10/100 - Accuracy: 55.43%
Epoch 20/100 - Accuracy: 67.04%
Epoch 30/100 - Accuracy: 72.10%
Epoch 40/100 - Accuracy: 73.03%
Epoch 50/100 - Accuracy: 73.60%
Epoch 60/100 - Accuracy: 74.53%
Epoch 70/100 - Accuracy: 74.91%
Epoch 80/100 - Accuracy: 75.28%
Epoch 90/100 - Accuracy: 75.28%
Epoch 100/100 - Accuracy: 75.47%
Model accuracy on validation set: 70.90%
Test Predictions: [0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0]
