**Feedforward Neural Network (FNN)**

Import Libraries

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import gc
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pandas as pd
import random
import time
#from tqdm import tqdm, trange
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

Load csv into df

In [2]:
# --- Ensure consistent working directory for data loading ---
# This block dynamically sets the current working directory to the Git repository root.
# This makes data paths reliable for all collaborators, regardless of where they open the notebook.

current_dir = os.getcwd()
repo_root = current_dir
while not os.path.exists(os.path.join(repo_root, '.git')):
    # Move up one directory
    parent_dir = os.path.dirname(repo_root)
    if parent_dir == repo_root: # Reached filesystem root, .git not found
        raise FileNotFoundError(
            "Could not find the .git directory. "
            "Please ensure you are running this code from within a Git repository."
        )
    repo_root = parent_dir

# Change the current working directory if it's not already the repo root
if os.getcwd() != repo_root:
    os.chdir(repo_root)
    print(f"Working directory set to: {os.getcwd()}") # Informative print for users


# --- Data Loading ---
# Path to the data file, relative to the repository root.
data_file_name = 'Customer_Purchasing_Behaviors.csv'
data_file_path = os.path.join('data', 'raw', data_file_name)

try:
    df = pd.read_csv(data_file_path)
    print(f"Successfully loaded '{data_file_name}' into the DataFrame named df.")
    #print(df.head())
except FileNotFoundError:
    print(f"Error: The file '{data_file_name}' was not found at '{data_file_path}'.")
    print("Please ensure it exists in the 'data/raw/' folder relative to the repository root.")
except Exception as e:
    print(f"An error occurred during data loading: {e}")

Working directory set to: c:\Users\The Winner\DSI\customer_purchasing_behaviour
Successfully loaded 'Customer_Purchasing_Behaviors.csv' into the DataFrame named df.


Prepare data: scale numerical and encode categorical

In [3]:
# Preprocessing: Prepare Inputs

# Extract relevant columns
numerical_cols = ['age', 'annual_income', 'loyalty_score', 'purchase_frequency']
categorical_col = 'region'
target_col = 'purchase_amount'

# Scale numerical features
scaler = StandardScaler()
X_num = scaler.fit_transform(df[numerical_cols])

# Encode categorical feature
encoder = OneHotEncoder(sparse_output=False)
X_cat = encoder.fit_transform(df[[categorical_col]])

# Create DataFrames with distinct column names
num_df = pd.DataFrame(X_num, columns=numerical_cols)
cat_columns = encoder.get_feature_names_out([categorical_col])
cat_df = pd.DataFrame(X_cat, columns=cat_columns)

# Combine safely
combined_df = pd.concat([num_df, cat_df], axis=1)

# Convert to torch tensor
X_combined = torch.tensor(combined_df.values, dtype=torch.float32)

# Target variable
y = torch.tensor(df[target_col].values, dtype=torch.float32).view(-1, 1)

# # Split data
# X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

# Split into Train + Validation, and Test (80% train+val, 20% test)
X_temp, X_test, y_temp, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

Run Model and log it in MLflow

In [None]:
from torch.utils.data import DataLoader, TensorDataset

start_time = time.time()

# Load environment variables
os.chdir('./experiments/neural_networks') if not load_dotenv() else None
load_dotenv()
print(f"load_dotenv() returned: {load_dotenv()}")  # Should be True if file found
# Set tracking URI (where MLflow server is running)
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))  # Note: Not ARTIFACT_ROOT
# Set experiment name
mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT_NAME"))

#mlflow.autolog() # Prefer to run manually, less items logged

lr = 0.005
batch_size = 32  # Added batch size parameter
width_1st_layer = 8        # 64
width_2nd_layer = 8        # 32
width_output_layer = 1      # 1

params = {    
    "width_1st_layer": width_1st_layer,
    "width_2nd_layer": width_2nd_layer,
    "width_output_layer": width_output_layer,
    "activation": "relu",
    "criterion": "Mean Squared Error (MSE) loss",
    "optimizer": "adam",
    "lr": lr,
    "batch_size": batch_size  # Added to parameters
}

with mlflow.start_run():
    mlflow.log_params(params)

    # Make it reproducible
    SEED = 42
    random.seed(SEED)                 # Python random
    np.random.seed(SEED)              # NumPy
    torch.manual_seed(SEED)           # CPU
    torch.cuda.manual_seed(SEED)      # GPU
    torch.cuda.manual_seed_all(SEED)  # Multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Define the Neural Network Model
    class FeedforwardNN(nn.Module):
        def __init__(self, input_size):
            super(FeedforwardNN, self).__init__()
            self.layers = nn.Sequential(
                nn.Linear(input_size, width_1st_layer),
                nn.ReLU(),
                nn.Linear(width_1st_layer, width_2nd_layer),
                nn.ReLU(),
                nn.Linear(width_2nd_layer, width_output_layer)
            )

        def forward(self, x):
            return self.layers(x)

    model = FeedforwardNN(input_size=X_train.shape[1])
    def init_weights(m):
        '''Make the weight initialization reproducible'''
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)  # still uses seed
            if m.bias is not None:
                torch.nn.init.constant_(m.bias, 0.01)
    model.apply(init_weights)      

    # Make GPU work
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = "cpu"
    
    # Move data to device
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    X_val = X_val.to(device)
    y_val = y_val.to(device)
    X_test = X_test.to(device)
    y_test = y_test.to(device)
    
    # Create DataLoaders for batch training
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    test_dataset = TensorDataset(X_test, y_test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Move model to device
    model = model.to(device)

    # Training with MSE & Logging RMSE, MAE 
    # Metrics
    def compute_rmse(predictions, targets):
        return torch.sqrt(F.mse_loss(predictions, targets))
    def compute_mae(predictions, targets):
        return torch.mean(torch.abs(predictions - targets))
    def compute_r2(predictions, targets):
        ss_res = torch.sum((targets - predictions) ** 2)
        ss_tot = torch.sum((targets - torch.mean(targets)) ** 2)
        return 1 - ss_res / ss_tot

    # Training + Early Stopping
    criterion = nn.MSELoss()   # Mean Squared Error (MSE) loss
    optimizer = optim.Adam(model.parameters(), lr)
    best_val_loss = float('inf')
    patience = 10_000
    trigger_times = 0
    min_delta = -0.05
    train_losses = []
    val_losses = []

    progress = tqdm(range(10_001), desc="Training")
    for epoch in progress:
        # Training phase with batches
        model.train()
        epoch_train_loss = 0.0
        num_train_batches = 0
        
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
            num_train_batches += 1
        
        # Average training loss for the epoch
        avg_train_loss = epoch_train_loss / num_train_batches

        # Validation phase with batches
        model.eval()
        epoch_val_loss = 0.0
        num_val_batches = 0
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                val_output = model(batch_X)
                val_loss = criterion(val_output, batch_y)
                epoch_val_loss += val_loss.item()
                num_val_batches += 1
        
        # Average validation loss for the epoch
        avg_val_loss = epoch_val_loss / num_val_batches
        
        # For progress display, get RMSE on a sample batch
        with torch.no_grad():
            sample_output = model(X_train[:batch_size])  # Use first batch_size samples
            sample_rmse = compute_rmse(sample_output, y_train[:batch_size])
            progress.set_postfix({"Loss": avg_train_loss, "RMSE": sample_rmse.item()})
        
        # Save losses each epoch
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)

        # Early stopping logic
        if avg_val_loss + min_delta < best_val_loss:
            best_val_loss = avg_val_loss
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f"\n⏹️ Early stopping at epoch {epoch} — no validation improvement after {patience} epochs.")
                
                # Calculate final metrics on full datasets
                model.eval()
                with torch.no_grad():
                    train_output = model(X_train)
                    val_output = model(X_val)
                    train_loss_final = criterion(train_output, y_train)
                    val_loss_final = criterion(val_output, y_val)
                    
                    rmse = compute_rmse(train_output, y_train)
                    mae = compute_mae(train_output, y_train)
                    r2 = compute_r2(train_output, y_train)
                    val_rmse = compute_rmse(val_output, y_val)
                    val_mae = compute_mae(val_output, y_val)
                    val_r2 = compute_r2(val_output, y_val)
                    
                print(f"Train → MSE = {train_loss_final.item():.4f}, RMSE = {rmse.item():.4f}, MAE = {mae.item():.4f}, R² = {r2.item():.4f}")
                print(f"Val   → MSE = {val_loss_final.item():.4f}, RMSE = {val_rmse.item():.4f}, MAE = {val_mae.item():.4f}, R² = {val_r2.item():.4f}")
                break

        if epoch % 10_000 == 0:
            # Calculate metrics on full datasets for reporting
            model.eval()
            with torch.no_grad():
                train_output = model(X_train)
                val_output = model(X_val)
                train_loss_full = criterion(train_output, y_train)
                val_loss_full = criterion(val_output, y_val)
                
                rmse = compute_rmse(train_output, y_train)
                mae = compute_mae(train_output, y_train)
                r2 = compute_r2(train_output, y_train)
                val_rmse = compute_rmse(val_output, y_val)
                val_mae = compute_mae(val_output, y_val)
                val_r2 = compute_r2(val_output, y_val)
                
            print(f"Epoch {epoch}:")
            print(f"Train → MSE = {train_loss_full.item():.4f}, RMSE = {rmse.item():.4f}, MAE = {mae.item():.4f}, R² = {r2.item():.4f}")
            print(f"Val   → MSE = {val_loss_full.item():.4f}, RMSE = {val_rmse.item():.4f}, MAE = {val_mae.item():.4f}, R² = {val_r2.item():.4f}")

    # Compute average loss per epoch
    avg_losses = [(train + val) / 2 for train, val in zip(train_losses, val_losses)]
    # Condition to only search within epochs < 7500
    search_limit = 10_000
    limited_avg_losses = avg_losses[:search_limit]
    # Find the minimum within this range
    min_avg = min(limited_avg_losses)
    epoch_min_avg = limited_avg_losses.index(min_avg)
    print(f"🔍 Minimum average loss before epoch {search_limit}: {min_avg:.4f} at epoch {epoch_min_avg}")

    # Final Evaluation on Test Set
    model.eval()
    with torch.no_grad():
        test_predictions = model(X_test)
        test_loss = criterion(test_predictions, y_test)
        test_rmse = compute_rmse(test_predictions, y_test)
        test_mae = compute_mae(test_predictions, y_test)
        test_r2 = compute_r2(test_predictions, y_test)
        print(f"\nFinal Test Evaluation:")
        print(f"MSE = {test_loss.item():.4f}, RMSE = {test_rmse.item():.4f}, MAE = {test_mae.item():.4f}, R² Score = {test_r2.item():.4f}")

    # Plot after training
    start = 50 
    plt.figure(figsize=(10, 5))
    #plt.yscale('log')
    plt.plot(range(start, len(train_losses)), train_losses[start:], label='Training Loss', color='blue')
    plt.plot(range(start, len(val_losses)), val_losses[start:], label='Validation Loss', color='orange')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Curves")
    # Draw line minimum average training and validation losses
    plt.plot(range(start, len(avg_losses)), avg_losses[start:], label='Avg Loss', color='purple')
    plt.axvline(x=epoch_min_avg, color='red', linestyle='--', label='Min Avg Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Clean memory in case we want to run this cell again without running the whole notebook
    # remove references to GPU objects 
    del model, avg_losses, limited_avg_losses, min_avg, train_losses, val_losses
    # Invoke garbage collector
    #gc.collect()
    # Clear GPU cache
    torch.cuda.empty_cache()

    #########################################################################################################
    #########################################################################################################
    # Now that I know the number of epochs optimum where to stop, I am going to calculate it again and stop there

    # Make it reproducible
    SEED = 42
    random.seed(SEED)                 # Python random
    np.random.seed(SEED)              # NumPy
    torch.manual_seed(SEED)           # CPU
    torch.cuda.manual_seed(SEED)      # GPU
    torch.cuda.manual_seed_all(SEED)  # Multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Define the Neural Network Model
    class FeedforwardNN(nn.Module):
        def __init__(self, input_size):
            super(FeedforwardNN, self).__init__()
            self.layers = nn.Sequential(
                nn.Linear(input_size, width_1st_layer),
                nn.ReLU(),
                nn.Linear(width_1st_layer, width_2nd_layer),
                nn.ReLU(),
                nn.Linear(width_2nd_layer, width_output_layer)
            )

        def forward(self, x):
            return self.layers(x)

    model = FeedforwardNN(input_size=X_train.shape[1])
    def init_weights(m):
        '''Make the weight initialization reproducible'''
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)  # still uses seed
            if m.bias is not None:
                torch.nn.init.constant_(m.bias, 0.01)
    model.apply(init_weights)      

    # Make GPU work
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = "cpu"
    
    # Move data to device (already moved above, but keeping for clarity)
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    X_val = X_val.to(device)
    y_val = y_val.to(device)
    X_test = X_test.to(device)
    y_test = y_test.to(device)
    
    # Recreate DataLoaders for second training
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    test_dataset = TensorDataset(X_test, y_test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Move model to device
    model = model.to(device)

    # Training with MSE & Logging RMSE, MAE 
    # Metrics
    def compute_rmse(predictions, targets):
        return torch.sqrt(F.mse_loss(predictions, targets))
    def compute_mae(predictions, targets):
        return torch.mean(torch.abs(predictions - targets))
    def compute_mape(predictions, targets):
        return torch.mean(torch.abs((predictions - targets) / targets) * 100)
    def compute_r2(predictions, targets):
        ss_res = torch.sum((targets - predictions) ** 2)
        ss_tot = torch.sum((targets - torch.mean(targets)) ** 2)
        return 1 - ss_res / ss_tot

    # Training + Early Stopping
    criterion = nn.MSELoss()   # Mean Squared Error (MSE) loss
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    train_losses = []
    val_losses = []

    progress = tqdm(range(epoch_min_avg+1), desc="Training")
    for epoch in progress:
        # Training phase with batches
        model.train()
        epoch_train_loss = 0.0
        num_train_batches = 0
        
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
            num_train_batches += 1
        
        # Average training loss for the epoch
        avg_train_loss = epoch_train_loss / num_train_batches

        # Validation phase with batches
        model.eval()
        epoch_val_loss = 0.0
        num_val_batches = 0
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                val_output = model(batch_X)
                val_loss = criterion(val_output, batch_y)
                epoch_val_loss += val_loss.item()
                num_val_batches += 1
        
        # Average validation loss for the epoch
        avg_val_loss = epoch_val_loss / num_val_batches
        
        # For progress display, get RMSE on a sample
        with torch.no_grad():
            sample_output = model(X_train[:batch_size])
            sample_rmse = compute_rmse(sample_output, y_train[:batch_size])
            progress.set_postfix({"Loss": avg_train_loss, "RMSE": sample_rmse.item()})
        
        # Save losses each epoch
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)

        if epoch % 10_000 == 0:
            # Calculate metrics on full datasets for reporting
            model.eval()
            with torch.no_grad():
                train_output = model(X_train)
                val_output = model(X_val)
                train_loss_full = criterion(train_output, y_train)
                val_loss_full = criterion(val_output, y_val)
                
                rmse = compute_rmse(train_output, y_train)
                mae = compute_mae(train_output, y_train)
                r2 = compute_r2(train_output, y_train)
                val_rmse = compute_rmse(val_output, y_val)
                val_mae = compute_mae(val_output, y_val)
                val_r2 = compute_r2(val_output, y_val)
                
            print(f"Epoch {epoch}:")
            print(f"Train → MSE = {train_loss_full.item():.4f}, RMSE = {rmse.item():.4f}, MAE = {mae.item():.4f}, R² = {r2.item():.4f}")
            print(f"Val   → MSE = {val_loss_full.item():.4f}, RMSE = {val_rmse.item():.4f}, MAE = {val_mae.item():.4f}, R² = {val_r2.item():.4f}")

    # Compute average loss per epoch
    avg_losses = [(train + val) / 2 for train, val in zip(train_losses, val_losses)]

    # Metrics after all the epochs - training
    model.eval()
    with torch.no_grad():
        # Get final predictions on full datasets
        train_output = model(X_train)
        val_output = model(X_val)
        test_output = model(X_test)
        
        # Training metrics
        training_mse = criterion(train_output, y_train)
        training_rmse = compute_rmse(train_output, y_train)
        training_mae = compute_mae(train_output, y_train)
        training_mape = compute_mape(train_output, y_train)
        training_r2_score = compute_r2(train_output, y_train)
    
        # Validation metrics
        val_mse = criterion(val_output, y_val)
        val_rmse = compute_rmse(val_output, y_val)
        val_mae = compute_mae(val_output, y_val)
        val_mape = compute_mape(val_output, y_val)
        val_r2_score = compute_r2(val_output, y_val)

        # Test metrics
        test_loss = criterion(test_output, y_test)
        test_rmse = compute_rmse(test_output, y_test)
        test_mae = compute_mae(test_output, y_test)
        test_mape = compute_mape(test_output, y_test)
        test_r2 = compute_r2(test_output, y_test)
        
        print(f"\nFinal Test Evaluation:")
        print(f"MSE = {test_loss.item():.4f}, RMSE = {test_rmse.item():.4f}, MAE = {test_mae.item():.4f}, R² Score = {test_r2.item():.4f}")

    # Log metrics to MLflow
    # training
    mlflow.log_metric("training_mean_squared_error", round(training_mse.item(), 2))
    mlflow.log_metric("training_root_mean_squared_error", round(training_rmse.item(),2))
    mlflow.log_metric("training_mean_absolute_error", round(training_mae.item(),2))
    mlflow.log_metric("training_mean_absolute_percentage_error", round(training_mape.item(),2))
    mlflow.log_metric("training_r2_score", round(training_r2_score.item(),2))
    # val
    mlflow.log_metric("val_mean_squared_error", round(val_mse.item(), 2))
    mlflow.log_metric("val_rmse", round(val_rmse.item(),2))
    mlflow.log_metric("val_mae", round(val_mae.item(),2))
    mlflow.log_metric("val_mape", round(val_mape.item(),2))
    mlflow.log_metric("val_r2_score", round(val_r2_score.item(),2))
    #test
    mlflow.log_metric("test_mse", round(test_loss.item(),2))
    mlflow.log_metric("test_rmse", round(test_rmse.item(),2))
    mlflow.log_metric("test_mae", round(test_mae.item(),2))
    mlflow.log_metric("test_mape", round(test_mape.item(),2))
    mlflow.log_metric("test_r2_score", round(test_r2.item(),2))

    # Log the model
    X_train_numpy = X_train.detach().cpu().numpy()
    train_output_numpy = train_output.detach().cpu().numpy()
    signature = infer_signature(X_train_numpy, train_output_numpy)
    
    mlflow.pytorch.log_model(
        model,
        name="feed_forward_neural_network",
        signature=signature,
        input_example=X_train_numpy[:5],
        registered_model_name="FNN_Batch_Training"
    )

    # Plot after training
    start = 4_000 
    plt.figure(figsize=(10, 5))
    plt.yscale('log')
    plt.plot(range(start, len(train_losses)), train_losses[start:], label='Training Loss', color='blue')
    plt.plot(range(start, len(val_losses)), val_losses[start:], label='Validation Loss', color='orange')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Curves")
    # Draw line minimum average training and validation losses
    plt.plot(range(start, len(avg_losses)), avg_losses[start:], label='Avg Loss', color='purple')
    plt.axvline(x=epoch_min_avg, color='red', linestyle='--', label='Min Avg Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig("loss_curve.png")
    mlflow.log_artifact("loss_curve.png")
    plt.show()

    # Model summary/architecture
    with open("model_summary.txt", "w") as f:
        f.write(str(model))
    mlflow.log_artifact("model_summary.txt")

    # Clean memory in case we want to run this cell again without running the whole notebook
    # remove references to GPU objects 
    del model
    # Invoke garbage collector
    gc.collect()
    # Clear GPU cache
    torch.cuda.empty_cache()

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"\n⏳ Training completed in {elapsed_time:.2f} seconds.")