In [37]:
import os
import copy
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import torch.nn.functional as F

In [38]:
def weight_init(layers):
    for layer in layers:
        if isinstance(layer, nn.BatchNorm1d):
            layer.weight.data.fill_(1)
            layer.bias.data.zero_()
        elif isinstance(layer, nn.Linear):
            n = layer.in_features
            y = 1.0 / np.sqrt(n)
            layer.weight.data.uniform_(-y, y)
            if layer.bias is not None:
                layer.bias.data.fill_(0)
            # nn.init.kaiming_normal_(layer.weight.data, nonlinearity='relu')

Data Loading and Processing

In [39]:
def load_data(data_path):
    # Read feature mapping
    with open(os.path.join(data_path, 'feat.bid.txt'), 'r') as f:
        lines = f.readlines()
    feature_nums = int(lines[0].strip())
    # Assuming features are already mapped to integers in the data files

    # Read train data
    train_data = pd.read_csv(os.path.join(data_path, 'train.bid.txt'), header=None)
    # Read test data
    test_data = pd.read_csv(os.path.join(data_path, 'test.bid.txt'), header=None)

    return train_data, test_data, feature_nums

Dataset Class

In [40]:
class CTRDataset(Dataset):
    def __init__(self, data):
        # Data columns: click + winning price + hour + time_fraction + timestamp + features
        self.labels = data.iloc[:, 0].values.astype(np.float32)  # click labels
        self.features = data.iloc[:, 5:].values.astype(np.int64)  # features start from column 5

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.float32)

## Function to expand embedding layers

In [41]:
def expand_embedding_layer(old_embedding, new_num_embeddings):
    old_num_embeddings, embedding_dim = old_embedding.weight.size()
    if new_num_embeddings <= old_num_embeddings:
        return old_embedding  # No need to expand
    # Create new embedding layer
    new_embedding = nn.Embedding(new_num_embeddings, embedding_dim)
    # Copy weights from old embedding
    with torch.no_grad():
        new_embedding.weight[:old_num_embeddings] = old_embedding.weight
        # Initialize new embeddings
        nn.init.xavier_uniform_(new_embedding.weight[old_num_embeddings:])
    return new_embedding

## Model Implementations
### DeepFM Model

In [42]:
class DeepFM(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(DeepFM, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Linear part
        self.linear = nn.Embedding(self.feature_nums, output_dim)

        # FM embedding
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # MLP
        deep_input_dims = self.field_nums * self.latent_dims
        layers = []

        neuron_nums = [300, 300, 300]
        for neuron_num in neuron_nums:
            layers.append(nn.Linear(deep_input_dims, neuron_num))
            # layers.append(nn.BatchNorm1d(neuron_num))  # Uncomment if needed
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neuron_num

        layers.append(nn.Linear(deep_input_dims, 1))

        # Initialize weights
        weight_init(layers)

        self.mlp = nn.Sequential(*layers)

    def expand_embeddings(self, new_feature_nums):
        self.linear = expand_embedding_layer(self.linear, new_feature_nums)
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        # Linear Part
        linear_out = torch.sum(self.linear(x), dim=1)  # Shape: (batch_size, output_dim)

        # FM Part
        embedding_x = self.feature_embedding(x)
        square_of_sum = torch.sum(embedding_x, dim=1) ** 2
        sum_of_square = torch.sum(embedding_x ** 2, dim=1)
        ix = 0.5 * torch.sum(square_of_sum - sum_of_square, dim=1, keepdim=True)  # Shape: (batch_size, 1)

        # Deep Part
        deep_out = self.mlp(embedding_x.view(-1, self.field_nums * self.latent_dims))  # Shape: (batch_size, 1)

        # Output
        out = linear_out + ix + deep_out  # Shape: (batch_size, 1)

        return out  # Return logits

FNN Model

In [43]:
class FNN(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims):
        super(FNN, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # MLP
        deep_input_dims = self.field_nums * self.latent_dims
        layers = []

        neuron_nums = [300, 300, 300]
        for neuron_num in neuron_nums:
            layers.append(nn.Linear(deep_input_dims, neuron_num))
            # layers.append(nn.BatchNorm1d(neuron_num))  # Uncomment if needed
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neuron_num

        layers.append(nn.Linear(deep_input_dims, 1))

        # Initialize weights
        weight_init(layers)

        self.mlp = nn.Sequential(*layers)

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        embedding_x = self.feature_embedding(x)
        out = self.mlp(embedding_x.view(-1, self.field_nums * self.latent_dims))

        return out  # Return logits


DCN Model

In [44]:
class DCN(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(DCN, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # Deep Network
        deep_input_dims = self.field_nums * self.latent_dims
        deep_net_layers = []
        neural_nums = [300, 300, 300]
        self.num_neural_layers = 5  # Number of layers in the cross network

        for neural_num in neural_nums:
            deep_net_layers.append(nn.Linear(deep_input_dims, neural_num))
            # deep_net_layers.append(nn.BatchNorm1d(neural_num))  # Uncomment if needed
            deep_net_layers.append(nn.ReLU())
            deep_net_layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neural_num

        # Initialize weights
        weight_init(deep_net_layers)

        self.DN = nn.Sequential(*deep_net_layers)

        # Cross Network
        cross_input_dims = self.field_nums * self.latent_dims
        self.cross_net_w = nn.ModuleList([
            nn.Linear(cross_input_dims, cross_input_dims) for _ in range(self.num_neural_layers)
        ])

        # Initialize weights for cross network
        weight_init(self.cross_net_w)

        self.cross_net_b = nn.ParameterList([
            nn.Parameter(torch.zeros(cross_input_dims)) for _ in range(self.num_neural_layers)
        ])

        # Final Linear Layer
        self.linear = nn.Linear(deep_input_dims + cross_input_dims, output_dim)
        # nn.init.xavier_normal_(self.linear.weight)

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums
        # No need to adjust cross network dimensions since field_nums and latent_dims are unchanged

    def forward(self, x):
        embedding_x = self.feature_embedding(x).view(-1, self.field_nums * self.latent_dims)

        # Cross Network
        cn_x0 = embedding_x
        cn_x = embedding_x
        for i in range(self.num_neural_layers):
            cn_x_w = self.cross_net_w[i](cn_x)
            cn_x = cn_x0 * cn_x_w + self.cross_net_b[i] + cn_x

        # Deep Network
        dn_x = self.DN(embedding_x)

        # Concatenate
        x_stack = torch.cat([cn_x, dn_x], dim=1)

        # Final output
        out = self.linear(x_stack)

        return out  # Return logits

AFM Model

In [45]:
class AFM(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(AFM, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # Prepare index pairs for interactions
        self.row, self.col = [], []
        for i in range(self.field_nums - 1):
            for j in range(i + 1, self.field_nums):
                self.row.append(i)
                self.col.append(j)

        attention_factor = self.latent_dims

        # Attention network
        self.attention_net = nn.Linear(self.latent_dims, attention_factor)
        n = self.attention_net.in_features
        y = 1.0 / np.sqrt(n)
        self.attention_net.weight.data.uniform_(-y, y)
        self.attention_net.bias.data.fill_(0)

        self.attention_softmax = nn.Linear(attention_factor, 1)

        # Output layers
        self.fc = nn.Linear(self.latent_dims, output_dim)

        # Linear part
        self.linear = nn.Embedding(self.feature_nums, output_dim)
        self.bias = nn.Parameter(torch.zeros((output_dim,)))

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.linear = expand_embedding_layer(self.linear, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        embedding_x = self.feature_embedding(x)

        # Pairwise interactions
        row_emb = embedding_x[:, self.row]  # Shape: (batch_size, num_pairs, latent_dims)
        col_emb = embedding_x[:, self.col]  # Shape: (batch_size, num_pairs, latent_dims)
        inner_product = row_emb * col_emb  # Element-wise multiplication

        # Attention mechanism
        attn_scores = F.relu(self.attention_net(inner_product))  # Shape: (batch_size, num_pairs, attention_factor)
        attn_scores = F.softmax(self.attention_softmax(attn_scores), dim=1)  # Shape: (batch_size, num_pairs, 1)
        attn_scores = F.dropout(attn_scores, p=0.2)

        # Weighted sum of interactions
        attn_output = torch.sum(attn_scores * inner_product, dim=1)  # Shape: (batch_size, latent_dims)
        attn_output = F.dropout(attn_output, p=0.2)

        # Output
        linear_part = self.bias + torch.sum(self.linear(x), dim=1)  # Shape: (batch_size, output_dim)
        out = linear_part + self.fc(attn_output)  # Shape: (batch_size, output_dim)

        return out  # Return logits

## Training and Testing
### Main Function

In [46]:
def main():
    dataset_ids = ['1458', '2259', '2261', '2821', '2997', '3358', '3386', '3427', '3476']
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    batch_size = 1024

    # Model parameters
    model_name = 'AFM'  # Change as needed
    latent_dims = 10
    dropout = 0.2
    num_layers = 5  # For DCN
    attn_size = 32  # For AFM
    epochs_per_dataset = 1  # Train for 1 epoch per dataset

    # Paths
    base_data_path = '/home/vladplyusnin/tftest/Deep-Learning-COPSCI764/Project/data/ipinyou/'  # Adjust this path as needed

    # Initialize the model (we need to know feature_nums and field_nums)
    # Initialize variables
    max_feature_nums = 0
    field_nums = None  # Will be set after loading the first dataset

    # Initialize model as None
    model = None

    # Loss and optimizer (initialized later)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = None

    for dataset_id in dataset_ids:
        data_path = os.path.join(base_data_path, dataset_id)

        # Load data
        train_data, test_data, feature_nums_dataset = load_data(data_path)
        field_nums_dataset = train_data.shape[1] - 5

        # Update max_feature_nums if needed
        if feature_nums_dataset > max_feature_nums:
            max_feature_nums = feature_nums_dataset

        # Initialize field_nums if not set
        if field_nums is None:
            field_nums = field_nums_dataset

        # Check that field_nums are the same
        if field_nums != field_nums_dataset:
            print(f"Dataset {dataset_id} has different field_nums.")
            # Handle this case as needed
            continue  # Skip this dataset for now

        # Expand model's embeddings if feature_nums increased
        if model is None:
            # Initialize the model
            if model_name == 'DeepFM':
                model = DeepFM(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'FNN':
                model = FNN(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'DCN':
                model = DCN(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'AFM':
                model = AFM(max_feature_nums, field_nums, latent_dims).to(device)
            else:
                raise ValueError('Unknown model name')
            # Initialize optimizer
            optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
        else:
            # Expand embeddings if needed
            if feature_nums_dataset > model.feature_nums:
                model.expand_embeddings(feature_nums_dataset)
                # Update optimizer with new parameters
                optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

        # Split train data into training and validation sets
        from sklearn.model_selection import train_test_split
        train_df, val_df = train_test_split(train_data, test_size=0.2, random_state=42)

        # Create datasets and loaders
        train_dataset = CTRDataset(train_df)
        val_dataset = CTRDataset(val_df)
        test_dataset = CTRDataset(test_data)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

        print(f"\nTraining on dataset {dataset_id}")
        # Train the model for 1 epoch on the current dataset
        train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=1, early_stopping_patience=None)

        # Testing the model on the test set
        model.eval()
        y_true = []
        y_scores = []

        with torch.no_grad():
            for x_batch, y_batch in test_loader:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)

                outputs = model(x_batch)  # Outputs are logits
                probabilities = torch.sigmoid(outputs).detach().cpu().numpy().flatten()

                y_true.extend(y_batch.cpu().numpy())
                y_scores.extend(probabilities)

        test_auc = roc_auc_score(y_true, y_scores)
        print(f'Dataset {dataset_id}, Test AUC: {test_auc:.4f}')

    # Optionally, save the final model
    torch.save(model.state_dict(), f'{model_name}_final_model.pth')

Training Function

In [47]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, device, epochs, early_stopping_patience=None):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            logits = model(x_batch).squeeze()
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)

        # Validation
        model.eval()
        val_total_loss = 0
        y_true = []
        y_scores = []
        with torch.no_grad():
            for x_val, y_val in valid_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                logits = model(x_val).squeeze()
                loss = criterion(logits, y_val)
                val_total_loss += loss.item()
                y_pred = torch.sigmoid(logits)
                y_true.extend(y_val.cpu().numpy())
                y_scores.extend(y_pred.cpu().numpy())
        val_avg_loss = val_total_loss / len(valid_loader)
        val_auc = roc_auc_score(y_true, y_scores)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Val Loss: {val_avg_loss:.4f}, Validation AUC: {val_auc:.4f}')

        # Early stopping not used since epochs=1
        if early_stopping_patience is not None:
            if val_avg_loss < best_loss:
                best_loss = val_avg_loss
                epochs_no_improve = 0
                # Save best model
                torch.save(model.state_dict(), 'best_model.pth')
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= early_stopping_patience:
                    print(f"Early stopping after {epoch+1} epochs")
                    early_stop = True

    if early_stopping_patience is not None:
        print(f'Best Validation Loss: {best_loss:.4f}')

In [48]:
if __name__ == '__main__':
    main()


Training on dataset 1458
Epoch 1/1, Loss: 0.1490, Val Loss: 0.0501, Validation AUC: 0.4532
Dataset 1458, Test AUC: 0.4495

Training on dataset 2259
Epoch 1/1, Loss: 0.0259, Val Loss: 0.0184, Validation AUC: 0.4865
Dataset 2259, Test AUC: 0.4558

Training on dataset 2261
Epoch 1/1, Loss: 0.0835, Val Loss: 0.0346, Validation AUC: 0.5741
Dataset 2261, Test AUC: 0.4965

Training on dataset 2821
Epoch 1/1, Loss: 0.0115, Val Loss: 0.0101, Validation AUC: 0.4851
Dataset 2821, Test AUC: 0.5028

Training on dataset 2997
Epoch 1/1, Loss: 0.0535, Val Loss: 0.0518, Validation AUC: 0.4296
Dataset 2997, Test AUC: 0.4941

Training on dataset 3358
Epoch 1/1, Loss: 0.0086, Val Loss: 0.0083, Validation AUC: 0.5906
Dataset 3358, Test AUC: 0.6763

Training on dataset 3386
Epoch 1/1, Loss: 0.0174, Val Loss: 0.0098, Validation AUC: 0.4997
Dataset 3386, Test AUC: 0.4977

Training on dataset 3427
Epoch 1/1, Loss: 0.0106, Val Loss: 0.0083, Validation AUC: 0.5429
Dataset 3427, Test AUC: 0.5639

Training on dat