Layer Class

In [None]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

class layer: 
    def forward(self, input):
        raise NotImplementedError            
        
    def backward(self, grad_output):
        raise NotImplementedError

   
       

 Linear Layer

In [2]:
class linear_layer(layer):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        # Initialize weights with small random values and biases as zeros.
        self.w = np.random.randn(output_dim,input_dim) * 0.01
        self.b = np.zeros(output_dim)
        self.input = None
        # Gradients computed during backward pass
        self.grad_w = None
        self.grad_b = None
    def forward(self, input):
        self.input = input
        return np.dot(input, self.w.T) + self.b

    def backward(self, delta):
        self.grad_w = delta.T.dot(self.input)
        self.grad_b = delta.sum(axis=0)
        grad_input = delta.dot(self.w)  # shape (n, input_dim)
        return grad_input
           

# Sigmoid Activation

In [3]:
class sigmoid_layer(layer):
    def __init__(self):
        self.out = None

    def forward(self, input):
       self.out = 1 / (1 + np.exp(-input))
       return self.out

    def backward(self, delta):
        return delta * self.out * (1 - self.out)

# Tanh Activation

In [4]:
class tanh_layer(layer):
    def __init__(self):
        self.out = None

    def forward(self, input):
        self.out = np.tanh(input)
        return self.out

    def backward(self, delta):
        return delta * (1 - self.out ** 2)

# ReLU Activation

In [5]:
class relu_layer(layer):
    def __init__(self):
        self.input = None

    def forward(self, input):
        self.input = input
        return np.maximum(0, input)

    def backward(self, grad_output):
        grad = grad_output.copy()
        grad[self.input <= 0] = 0
        return grad

# Binary Cross-Entropy Loss

In [6]:
class BinaryCrossEntropyLoss(layer):
    def __init__(self):
        self.pred = None
        self.target = None

    def forward(self, pred, target):
        # Clip predictions to avoid log(0)
        self.pred = np.clip(pred, 1e-7, 1 - 1e-7)
        self.target = target
        loss = - (target * np.log(self.pred) + (1 - target) * np.log(1 - self.pred))
        return np.mean(loss)
    
    def backward(self):
        # Compute gradient of BCE loss with respect to predictions.
        grad = (-(self.target / self.pred) + ((1 - self.target) / (1 - self.pred))) / self.target.shape[0]
        return grad

# Mean Squared Error Loss

In [7]:
class MSELoss(layer):
    def __init__(self):
        self.pred = None
        self.target = None

    def forward(self, pred, target):
        self.pred = pred
        self.target = target
        loss = np.mean((pred - target) ** 2)
        return loss

    def backward(self):
        grad = 2 * (self.pred - self.target) / self.target.shape[0]
        return grad

# Sequential Container for Layers

In [8]:
class Sequential(layer):
    def __init__(self, layers):
        self.layers = layers

    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input

    def backward(self, grad_output):
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output)
        return grad_output

    def save_weights(self, filename):
        # Save weights for layers that have parameters (e.g., Linear layers)
        weights = []
        for layer in self.layers:
            if isinstance(layer, linear_layer):
                weights.append({'w': layer.w, 'b': layer.b})
            else:
                weights.append(None)
        with open(filename, 'wb') as f:
            pickle.dump(weights, f)
        print("Weights saved to", filename)

    def load_weights(self, filename):
        with open(filename, 'rb') as f:
            weights = pickle.load(f)
        for layer, w in zip(self.layers, weights):
            if isinstance(layer, linear_layer) and w is not None:
                layer.w = w['w']
                layer.b = w['b']
        print("Weights loaded from", filename)

# Training Loop for Classification Networks

In [9]:
def train(network, loss_layer, X, y, epochs, lr):
    losses = []
    for epoch in range(epochs):
        # Forward pass
        pred = network.forward(X)
        loss = loss_layer.forward(pred, y)
        losses.append(loss)
        # Backward pass
        grad_loss = loss_layer.backward()
        network.backward(grad_loss)
        # Update parameters for every Linear layer
        for layer in network.layers:
            if isinstance(layer, linear_layer):
                layer.w -= lr * layer.grad_w
                layer.b -= lr * layer.grad_b
        # Print progress periodically
        if epoch % 1000 == 0:
            print("Epoch {}: Loss = {:.4f}".format(epoch, loss))
    return losses

# Training Loop for Regression with Early Stopping (Taxi Trip Duration)

In [None]:
def train_regression(network, loss_layer, X_train, y_train, X_val, y_val, epochs, lr, patience=30):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0
    for epoch in range(epochs):
        # --- Training Step ---
        pred_train = network.forward(X_train)
        train_loss = loss_layer.forward(pred_train, y_train)
        train_losses.append(train_loss)
        grad_loss = loss_layer.backward()
        network.backward(grad_loss)
        for layer in network.layers:
            if isinstance(layer, linear_layer):
                layer.w -= lr * layer.grad_w
                layer.b -= lr * layer.grad_b

        # --- Validation Step ---
        pred_val = network.forward(X_val)
        val_loss = loss_layer.forward(pred_val, y_val)
        val_losses.append(val_loss)

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered at epoch", epoch)
            break

        if epoch % 100 == 0:
            print("Epoch {}: Train Loss = {:.4f}, Val Loss = {:.4f}".format(epoch, train_loss, val_loss))
    return train_losses, val_losses

# Demonstration: XOR Problem

In [11]:
def solve_XOR():
    X = np.array([[0,0], [0,1], [1,0], [1,1]]) # XOR input
    Y = np.array([[0], [1], [1], [0]]) # XOR output

    print("Training XOR network with sigmoid activation...")


    network_sigmoid = Sequential([linear_layer(2,2), sigmoid_layer(), linear_layer(2,1), sigmoid_layer()])
    loss_bce = BinaryCrossEntropyLoss()
    losses_sigmoid = train(network_sigmoid, loss_bce, X, Y, epochs=10000, lr=0.1)

    preds_sigmoid = network_sigmoid.forward(X)
    print("XOR Predictions (Sigmoid):\n", preds_sigmoid)
    
    # Save the trained weights
    network_sigmoid.save_weights("XOR_solved.w")


    print("Training XOR network with tanh activation...")
    network_tanh = Sequential([linear_layer(2,2), tanh_layer(), linear_layer(2,1), sigmoid_layer()])
    losses_tanh = train(network_tanh, loss_bce, X, Y, epochs=10000, lr=0.1)
    preds_tanh = network_tanh.forward(X)
    print("XOR Predictions (Tanh hidden activation):\n", preds_tanh)

    plt.figure(figsize=(8,4))
    plt.plot(losses_sigmoid, label="Sigmoid")
    plt.plot(losses_tanh, label="Tanh")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("XOR Training Loss")
    plt.legend()
    plt.show()




# Demonstration: Taxi Trip Duration Prediction

In [None]:
def preprocess_data(data_frame, is_training=True):
    if is_training:
        preprocessed_data = data_frame.dropna()
    else:
        preprocessed_data = data_frame  # No preprocessing for testing data
    return preprocessed_data


def predict_taxi_trip_duration():
    # Load the dataset (make sure the file nyc_taxi_data.npy is in the working directory)
    try:
        dataset = np.load("nyc_taxi_data.npy", allow_pickle=True)
        dataset = dataset.item()
        
    except Exception as e:
        print("Error loading taxi dataset:", e)
        return

    
    X_train, y_train, X_test, y_test = dataset["X_train"], dataset["y_train"], dataset["X_test"], dataset["y_test"]
    X_train = preprocess_data(X_train)
    y_train = preprocess_data(y_train)
    X_test = preprocess_data(X_test)
    y_test = preprocess_data(y_test)
    columns_to_drop = ["id", "pickup_datetime", "dropoff_datetime", "trip_duration", "store_and_fwd_flag"]
    X_train = X_train.drop(columns=[col for col in columns_to_drop if col in X_train.columns])
    X_test = X_test.drop(columns=[col for col in columns_to_drop if col in X_test.columns])

    y_train = y_train.values.reshape(-1, 1)
    y_test = y_test.values.reshape(-1, 1)

    # For this demonstration, we perform a simple normalization.
    feature_scaler = StandardScaler()
    X_train_norm = feature_scaler.fit_transform(X_train)
    X_test_norm = feature_scaler.transform(X_test)

           
    
    # Split a validation set from training data (80/20 split)
    split = int(0.8 * X_train_norm.shape[0])
    X_train_new, X_val = X_train_norm[:split], X_train_norm[split:]
    y_train_new, y_val = y_train[:split], y_train[split:]
    
    # Here we define one example network for regression.
    # You should experiment with at least three configurations.
    # This network has 2 hidden layers with 10 nodes each and a linear output.

    # network_configs = [
    #     [linear_layer(X_train_new.shape[1], 10), relu_layer(), linear_layer(10, 10), relu_layer(), linear_layer(10, 1)],
    #     [LinearLayer(X_train.shape[1], 128), TanhActivation(), LinearLayer(128, 64), TanhActivation(), LinearLayer(64, 1)],
    #     [LinearLayer(X_train.shape[1], 256), TanhActivation(), LinearLayer(256, 128), TanhActivation(), LinearLayer(128, 64), TanhActivation(), LinearLayer(64, 1)]
    # ]

    network_reg = Sequential([
        linear_layer(X_train_new.shape[1], 10),
        relu_layer(),
        linear_layer(10, 10),
        relu_layer(),
        linear_layer(10, 1)  # For regression, no activation (or identity) on the output.
    ])
     
    mse_loss = MSELoss()
    print("\n--- Training Taxi Trip Duration Network ---")
    train_losses, val_losses = train_regression(network_reg, mse_loss,
                                                X_train_new, y_train_new,
                                                X_val, y_val,
                                                epochs=1000, lr=0.0001, patience=20)
    
    # Plot training and validation loss curves
    plt.figure(figsize=(8,4))
    plt.plot(train_losses, label="Train Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("MSE Loss")
    plt.title("Taxi Trip Duration Training")
    plt.legend()
    plt.show()
    
    # Evaluate on test set
    test_loss = mse_loss.forward(network_reg.forward(X_test_norm), y_test)
    
    # For RMSLE (Root Mean Squared Logarithmic Error) we can compute:
    pred_test = network_reg.forward(X_test_norm)
    # Clip predictions to avoid log issues
    pred_test = np.clip(pred_test, 1e-7, None)
    rmsle = np.sqrt(np.mean((np.log1p(pred_test) - np.log1p(y_test))**2))
    print("Test RMSLE:", rmsle)

# Main Execution


In [13]:
if __name__ == '__main__':
    #solve_XOR()
    predict_taxi_trip_duration()



X_train_new.shape[1] 6

--- Training Taxi Trip Duration Network ---
grad_loss [[-0.00106834]
 [-0.00092552]
 [-0.00199767]
 ...
 [-0.00048942]
 [-0.0031022 ]
 [-0.00064558]]
Epoch 0: Train Loss = 15732844.2129, Val Loss = 87989162.4327
grad_loss [[-0.00106798]
 [-0.00092515]
 [-0.00199731]
 ...
 [-0.00048906]
 [-0.00310183]
 [-0.00064521]]
grad_loss [[-0.00106762]
 [-0.00092479]
 [-0.00199694]
 ...
 [-0.00048869]
 [-0.00310147]
 [-0.00064485]]
grad_loss [[-0.00106725]
 [-0.00092443]
 [-0.00199658]
 ...
 [-0.00048833]
 [-0.00310111]
 [-0.00064449]]
grad_loss [[-0.00106689]
 [-0.00092406]
 [-0.00199622]
 ...
 [-0.00048797]
 [-0.00310074]
 [-0.00064412]]
grad_loss [[-0.00106653]
 [-0.0009237 ]
 [-0.00199585]
 ...
 [-0.0004876 ]
 [-0.00310038]
 [-0.00064376]]
grad_loss [[-0.00106616]
 [-0.00092334]
 [-0.00199549]
 ...
 [-0.00048724]
 [-0.00310002]
 [-0.00064339]]
grad_loss [[-0.0010658 ]
 [-0.00092297]
 [-0.00199512]
 ...
 [-0.00048687]
 [-0.00309965]
 [-0.00064303]]
grad_loss [[-0.0010654

KeyboardInterrupt: 