In [9]:
import copy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader

In [10]:
config = {
    # General
    "training_session": 2,

    # Mean-Teacher Model
    "pre_trained": True,
    "learning_rate": 3e-4,
    "alpha": 0.99,
    "lambda_u": 1.0,
    "epochs": 10,

    # Dataset
    "input_type": "tabular",
    "dataset_path": "../../datasets/credit_data.csv",
    "num_labels": 0.6,
    "batch_size": 64,

    # Image input
    "image_classes": ["form", "invoice", "memo", "letter"],
    "image_size": 224,

    # Text input

    # Tabular input
    "categorical_columns": [
        "Gender", "Existing Customer", "State",
        "City", "Employment Profile", "Occupation"
    ],
    "numeric_columns": [
        "Age", "Income", "Credit Score", "Credit History Length", "Number of Existing Loans",
        "Loan Amount", "Loan Tenure", "LTV Ratio"
    ],
    "target_column": "Profile Score",
    "is_target_categorical": False, 
}

config = {
    # General
    "training_session": 2,

    # Mean-Teacher Model
    "pre_trained": True,
    "learning_rate": 3e-4,
    "alpha": 0.99,
    "lambda_u": 1.0,
    "epochs": 50,

    # Dataset
    "input_type": "tabular",
    "dataset_path": "../../datasets/loan.csv",
    "num_labels": 0.4,
    "batch_size": 64,

    # Image input
    "image_classes": [],
    "image_size": 224,

    # Text input

    # Tabular input
    "categorical_columns": [
        "ApplicationDate", "EmploymentStatus", "EducationLevel",
        "MaritalStatus", "HomeOwnershipStatus", "LoanPurpose"
    ],
    "numeric_columns": [
        "Age", "AnnualIncome", "CreditScore", "Experience", "LoanAmount",
        "LoanDuration", "NumberOfDependents", "MonthlyDebtPayments",
        "CreditCardUtilizationRate", "NumberOfOpenCreditLines", "NumberOfCreditInquiries",
        "DebtToIncomeRatio", "BankruptcyHistory", "PreviousLoanDefaults", "PaymentHistory",
        "LengthOfCreditHistory", "SavingsAccountBalance", "CheckingAccountBalance",
        "TotalAssets", "TotalLiabilities", "MonthlyIncome", "UtilityBillsPaymentHistory",
        "JobTenure", "NetWorth", "BaseInterestRate", "InterestRate",
        "MonthlyLoanPayment", "TotalDebtToIncomeRatio", "RiskScore"
    ],
    "target_column": "LoanApproved",
    "is_target_categorical": True, 
}

In [11]:
def update_ema(student_model, teacher_model):
    for student_param, teacher_param in zip(student_model.parameters(), teacher_model.parameters()):
        teacher_param.data = config["alpha"] * teacher_param.data + (1 - config["alpha"]) * student_param.data

In [12]:
def train_one_epoch(student_model, teacher_model, lb_loader, ulb_loader, optimizer, device, epoch):
    student_model.train()
    teacher_model.train()

    is_regression = True if config["input_type"] == "tabular" and not config["is_target_categorical"] else False
    total_loss = 0
    for (x_lb, y_lb), (x_ulb_w, x_ulb_s) in zip(lb_loader, ulb_loader):
        # Move to device
        x_lb = x_lb.to(device)
        if is_regression:
            y_lb = y_lb.float().unsqueeze(1).to(device)
        else:
            y_lb = y_lb.to(device)

        # Supervised loss
        logits_lb = student_model(x_lb)
        loss_sup = F.mse_loss(logits_lb, y_lb) if is_regression else F.cross_entropy(logits_lb, y_lb)

        # Unsupervised loss (consistency)
        if is_regression:
            with torch.no_grad():
                pseudo_labels = teacher_model(x_ulb_w)
            logits_ulb_s = student_model(x_ulb_s)
            loss_unsup = F.mse_loss(logits_ulb_s, pseudo_labels)
        else:
            with torch.no_grad():
                logits_ulb_w = teacher_model(x_ulb_w)
                pseudo_labels = torch.softmax(logits_ulb_w, dim=1)
            logits_ulb_s = student_model(x_ulb_s)
            loss_unsup = F.mse_loss(torch.softmax(logits_ulb_s, dim=1), pseudo_labels)

        # Total loss
        loss = loss_sup + config["lambda_u"] * loss_unsup
        total_loss += loss.item()

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # EMA update
        update_ema(student_model, teacher_model)

    print(f'Epoch {epoch} | Total Loss: {total_loss:.4f}')

In [13]:
def evaluate(model, val_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0

    is_regression = True if config["input_type"] == "tabular" and not config["is_target_categorical"] else False
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)

            if is_regression:
                loss = F.mse_loss(logits.squeeze(), y.float())
                preds = logits.squeeze()
            else:
                loss = F.cross_entropy(logits, y.long())
                preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
            total_loss += loss.item()

    if is_regression:
        mae = np.mean(np.abs(np.array(all_preds) - np.array(all_labels)))
        print(f"Validation MAE: {mae:.4f} | Loss: {total_loss:.4f}")
        return mae, total_loss
    else:
        acc = np.mean(np.array(all_preds) == np.array(all_labels))
        print(f"Validation Accuracy: {acc:.4f} | Loss: {total_loss:.4f}")
        return acc, total_loss

In [14]:
def train_mean_teacher(student_model, lb_loader, ulb_loader, val_loader, device="cuda", epochs=10):
    teacher_model = copy.deepcopy(student_model)
    for param in teacher_model.parameters():
        param.requires_grad = False

    optimizer = optim.Adam(student_model.parameters(), lr=config["learning_rate"])

    best_val_accuracy = 0.0
    best_model_path = f"../../models/mean_teacher/best_model_{config["training_session"]}.pt"
    for epoch in range(1, epochs + 1):
        train_one_epoch(student_model, teacher_model, lb_loader, ulb_loader, optimizer, device, epoch)

        # TODO: Update logic to account for regression vs classification
        val_accuracy, _ = evaluate(student_model, val_loader, device)
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(student_model.state_dict(), best_model_path)
            print(f"✅ Best model saved to {best_model_path} | Accuracy: {val_accuracy:.4f}")

    return student_model, teacher_model

In [None]:
import importlib
import token_factory as tf
import dataloader_factory as dl
import model_factory as md

importlib.reload(tf)
importlib.reload(dl)
importlib.reload(md)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from token_factory import token_factory
from model_factory import model_factory
from dataloader_factory import dataloader_factory

if config["input_type"] == "image":
    base_transform = token_factory("image", image_size=(config["image_size"], config["image_size"]))
    
    # Create model and dataloaders for image input
    model = model_factory(
        "image", 
        num_classes=len(config["image_classes"]), 
        pretrained=config["pre_trained"]
    ).to(device)
    lb_loader, ulb_loader, val_loader = dataloader_factory(config, base_transform=base_transform)

    # Train Mean Teacher
    trained_student, trained_teacher = train_mean_teacher(
        model, lb_loader, ulb_loader, val_loader, device, config["epochs"]
    )

elif config["input_type"] == "text":
    ...

elif config["input_type"] == "tabular":
    tokenizer = token_factory(
        "tabular", 
        categorical_columns=config["categorical_columns"],
        numeric_columns=config["numeric_columns"],
        target_column=config["target_column"],
        is_target_categorical=config["is_target_categorical"]
    )
    df = pd.read_csv(config["dataset_path"])
    X, y = tokenizer.fit_transform(df)
    
    # Generate dataloaders
    lb_loader, ulb_loader, val_loader = dataloader_factory(config, X=X, y=y)
    print(len(lb_loader), len(ulb_loader), len(val_loader))
    
    # Create model
    input_dim = df.drop(columns=[config["target_column"]]).shape[1]
    is_regression = not config["is_target_categorical"]
    num_classes = df[config["target_column"]].nunique()
    model = model_factory(
        "tabular",
        input_dim=input_dim,
        num_classes=num_classes,
        regression=is_regression
    ).to(device)

    # Train Mean Teacher
    trained_student, trained_teacher = train_mean_teacher(
        model, lb_loader, ulb_loader, val_loader, device, config["epochs"]
    )

else:
    raise ValueError(f"Unsupported input type: {config["input_type"]}")