### Initialization

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from torch.utils.tensorboard import SummaryWriter

In [None]:
print(torch.__version__)

device = (
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)
print(f"Using {device} device")

2.5.1
Using mps device


In [7]:
import warnings
warnings.filterwarnings("ignore")

### Data Loading and Preprocessing

In [None]:
file_path = "train.csv"
data_df = pd.read_csv(file_path)
print(data_df.isnull().sum())

data_np = data_df.to_numpy()
mask = ~np.isnan(data_np)
data_np[np.isnan(data_np)] = 0
data = torch.tensor(data_np, dtype=torch.float32).to(device)
mask = torch.tensor(mask, dtype=torch.bool).to(device)

### Transformer

In [4]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, nhead, num_layers):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=nhead, dim_feedforward=hidden_dim)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(input_dim, input_dim)

    def forward(self, x, mask):
        x = self.transformer_encoder(x)
        x = self.fc_out(x)
        return x

### Hyperparameters

In [None]:
param_grid = {
    'hidden_dim': [16, 32, 64, 128, 1024],
    'nhead': [2, 4, 8, 16, 32],
    'num_layers': [2, 4, 8, 16, 32],
    'lr': [0.1, 0.05, 0.01, 0.0005, 0.0002, 0.0001, 1e-5],
    'batch_size': [16, 32, 64, 128, 256],
    'epochs': [10, 20, 50, 100, 200, 500, 1000, 10000]
}

### Training

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=0)

writer = SummaryWriter("runs/Transformer_MatrixCompletion")

best_params = None
best_score = float('inf')
input_dim = data.size(1)

for hidden_dim in param_grid['hidden_dim']:
    for nhead in param_grid['nhead']:
        for num_layers in param_grid['num_layers']:
            for lr in param_grid['lr']:
                for batch_size in param_grid['batch_size']:
                    for epochs in param_grid['epochs']:
                        fold_scores = []

                        for fold, (train_index, val_index) in enumerate(kf.split(data)):
                            train_data, val_data = data[train_index], data[val_index]
                            train_mask, val_mask = mask[train_index], mask[val_index]

                            model = TransformerModel(input_dim, hidden_dim, nhead, num_layers).to(device)
                            optimizer = optim.Adam(model.parameters(), lr=lr)
                            criterion = nn.MSELoss()

                            for epoch in range(epochs):
                                model.train()
                                for i in range(0, train_data.size(0), batch_size):
                                    batch_data = train_data[i:i+batch_size]
                                    batch_mask = train_mask[i:i+batch_size]

                                    input_data = batch_data.clone()
                                    input_data[~batch_mask] = 0

                                    optimizer.zero_grad()
                                    output = model(input_data, batch_mask)
                                    loss = criterion(output[batch_mask], batch_data[batch_mask])
                                    loss.backward()
                                    optimizer.step()

                                writer.add_scalar(f'Fold_{fold}/Train_Loss', loss.item(), epoch)

                            model.eval()
                            with torch.no_grad():
                                val_input = val_data.clone()
                                val_input[~val_mask] = 0
                                val_output = model(val_input, val_mask)
                                val_loss = criterion(val_output[val_mask], val_data[val_mask]).item()
                                fold_scores.append(val_loss)

                                writer.add_scalar(f'Fold_{fold}/Val_Loss', val_loss, epoch)

                        avg_score = np.mean(fold_scores)
                        print(f"Params: hidden_dim={hidden_dim}, nhead={nhead}, num_layers={num_layers}, "
                              f"lr={lr}, batch_size={batch_size}, epochs={epochs} | Avg Val Loss: {avg_score:.4f}")

                        if avg_score < best_score:
                            best_score = avg_score
                            best_params = {
                                'hidden_dim': hidden_dim,
                                'nhead': nhead,
                                'num_layers': num_layers,
                                'lr': lr,
                                'batch_size': batch_size,
                                'epochs': epochs
                            }

                            torch.save(model.state_dict(), "transformer_best.pth")
                            print(f"Saved the best transformer model with loss {best_score:.4f}")

print("Best Hyperparameters:：", best_params)
print("Best Test Loss：", best_score)

writer.close()

### Test

In [None]:
test_file_path = "test.csv"
test_df = pd.read_csv(test_file_path)


test_np = test_df.to_numpy()
test_mask = ~np.isnan(test_np)
test_np[np.isnan(test_np)] = 0
test_data = torch.tensor(test_np, dtype=torch.float32).to(device)
test_mask = torch.tensor(test_mask, dtype=torch.bool).to(device)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, nhead, num_layers):
        super(TransformerModel, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=nhead, dim_feedforward=hidden_dim)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        x = self.transformer_encoder(x)
        x = self.fc_out(x)
        return x

#use the tuned hyperparameters
best_params = {
    'hidden_dim': hidden_dim,
    'nhead': nhead,
    'num_layers': num_layers,
    'lr': lr,
    'batch_size': batch_size,
    'epochs': epochs
}
input_dim = test_data.size(1)
hidden_dim = best_params['hidden_dim']
nhead = best_params['nhead']
num_layers = best_params['num_layers']

model = TransformerModel(input_dim, hidden_dim, nhead, num_layers)

model_path = "transformer_best.pth"
model.load_state_dict(torch.load(model_path))
model.eval()

with torch.no_grad():
    test_input = test_data.clone()
    test_input[~test_mask] = 0
    test_generated = model(test_input)

    test_completed = test_data.clone()
    test_completed[~test_mask] = test_generated[~test_mask]

completed_df = pd.DataFrame(test_completed.numpy(), columns=test_df.columns)
output_file_path = "completed_test_data.csv"
completed_df.to_csv(output_file_path, index=False)

print(f"Completed test data saved to {output_file_path}")