In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print(
            "MPS not available because the current PyTorch install was not "
            "built with MPS enabled."
        )
    else:
        print(
            "MPS not available because the current MacOS version is not 12.3+ "
            "and/or you do not have an MPS-enabled device on this machine."
        )
    mps_device = None
else:
    mps_device = torch.device("mps")

if mps_device is not None:
    device = mps_device
    print("Using MPS")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

device

Using MPS


device(type='mps')

## Load and Prepare Data

In [2]:
train_data = pd.read_csv("cs608_ip_train_v3.csv")
probe_data = pd.read_csv("cs608_ip_probe_v3.csv")

# Encoding categorical features
le_user = LabelEncoder()
le_item = LabelEncoder()

train_data["user_id"] = le_user.fit_transform(train_data["user_id"])
train_data["item_id"] = le_item.fit_transform(train_data["item_id"])

probe_data["user_id"] = le_user.transform(probe_data["user_id"])
probe_data["item_id"] = le_item.transform(probe_data["item_id"])


# Preparing torch datasets and dataloaders
class RatingsDataset(Dataset):
    def __init__(self, users, items, ratings):
        self.users = torch.tensor(users, dtype=torch.long)
        self.items = torch.tensor(items, dtype=torch.long)
        self.ratings = torch.tensor(ratings, dtype=torch.float)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]


train_dataset = RatingsDataset(
    train_data["user_id"], train_data["item_id"], train_data["rating"]
)
test_dataset = RatingsDataset(
    probe_data["user_id"], probe_data["item_id"], probe_data["rating"]
)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

## Define DeepFM Model

In [3]:
class DeepFM(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=10, hidden_layers=[32, 16]):
        super(DeepFM, self).__init__()
        self.user_embeddings = nn.Embedding(num_users, embedding_dim)
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)
        self.fm = nn.Bilinear(embedding_dim, embedding_dim, 1)
        self.dnn = nn.Sequential(
            nn.Linear(2 * embedding_dim, hidden_layers[0]),
            nn.ReLU(),
            nn.Linear(hidden_layers[0], hidden_layers[1]),
            nn.ReLU(),
            nn.Linear(hidden_layers[1], 1),
        )

    def forward(self, users, items):
        user_embedded = self.user_embeddings(users)
        item_embedded = self.item_embeddings(items)
        fm_part = self.fm(user_embedded, item_embedded)
        dnn_input = torch.cat((user_embedded, item_embedded), dim=1)
        dnn_output = self.dnn(dnn_input)
        # output = torch.sigmoid(fm_part + dnn_output)
        output = fm_part + dnn_output
        return output.flatten()

# Initialize model, loss, and optimizer
num_users = train_data["user_id"].nunique()
num_items = train_data["item_id"].nunique()

model = DeepFM(num_users, num_items).to(device)

## Train Model

In [4]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

def evaluate_model(model, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for users, items, ratings in test_loader:
            users = users.to(device)  # Move data to GPU
            items = items.to(device)  # Move data to GPU
            ratings = ratings.to(device)  # Move data to GPU
            outputs = model(users, items)
            loss = criterion(outputs, ratings).cpu().numpy()
            total_loss += loss.item()
    return total_loss / len(test_loader)

def train_model(model, train_loader, epochs=5):
    min_test_loss = float("inf")
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, (users, items, ratings) in enumerate(train_loader):
            users = users.to(device)  # Move data to GPU
            items = items.to(device)  # Move data to GPU
            ratings = ratings.to(device)  # Move data to GPU
            optimizer.zero_grad()
            outputs = model(users, items)
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if batch_idx % 100 == 99:
                print(
                    f"Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {running_loss / 100:.4f}"
                )
                running_loss = 0.0

        test_loss = evaluate_model(model, test_loader)
        print(f"Epoch {epoch+1}, Test Loss: {test_loss:.4f}")
        if test_loss < min_test_loss:
            min_test_loss = test_loss
            torch.save(model.state_dict(), "deepfm_model.pth")
            print("Model saved")

train_model(model, train_loader, epochs=20)

Epoch 1, Batch 100, Loss: 14.4116
Epoch 1, Batch 200, Loss: 3.0580
Epoch 1, Batch 300, Loss: 2.1041
Epoch 1, Batch 400, Loss: 1.7954
Epoch 1, Batch 500, Loss: 1.7047
Epoch 1, Batch 600, Loss: 1.6283
Epoch 1, Batch 700, Loss: 1.5996
Epoch 1, Test Loss: 1.5549
Model saved
Epoch 2, Batch 100, Loss: 1.5407
Epoch 2, Batch 200, Loss: 1.5004
Epoch 2, Batch 300, Loss: 1.4869
Epoch 2, Batch 400, Loss: 1.4847
Epoch 2, Batch 500, Loss: 1.4225
Epoch 2, Batch 600, Loss: 1.4687
Epoch 2, Batch 700, Loss: 1.4161
Epoch 2, Test Loss: 1.4229
Model saved
Epoch 3, Batch 100, Loss: 1.3808
Epoch 3, Batch 200, Loss: 1.3793
Epoch 3, Batch 300, Loss: 1.3793
Epoch 3, Batch 400, Loss: 1.3836
Epoch 3, Batch 500, Loss: 1.3792
Epoch 3, Batch 600, Loss: 1.3880
Epoch 3, Batch 700, Loss: 1.3885
Epoch 3, Test Loss: 1.3765
Model saved
Epoch 4, Batch 100, Loss: 1.3489
Epoch 4, Batch 200, Loss: 1.3312
Epoch 4, Batch 300, Loss: 1.3521
Epoch 4, Batch 400, Loss: 1.3458
Epoch 4, Batch 500, Loss: 1.3665
Epoch 4, Batch 600, Loss

In [5]:
# Load the best model
model.load_state_dict(torch.load("deepfm_model.pth"))
test_loss = evaluate_model(model, test_loader)
test_loss

1.3286203998887078

In [6]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, accuracy_score

model.eval()  # Set the model to evaluation mode

# To store predictions and actual values
predictions, actuals = [], []

with torch.no_grad():
    for users, items, ratings in test_loader:
        users = users.to(device)  # Move data to GPU
        items = items.to(device)  # Move data to GPU
        ratings = ratings.to(device)  # Move data to GPU
        outputs = model(users, items)
        predictions.extend(outputs.cpu().numpy())
        actuals.extend(ratings.cpu().numpy())

# Calculate evaluation metrics
predictions = np.array(predictions)
actuals = np.array(actuals)
rmse = np.sqrt(mean_squared_error(actuals, predictions))
accuracy = accuracy_score(actuals, predictions.round())

print(f"RMSE: {rmse}")
print(f"Accuracy: {accuracy}")

RMSE: 1.152674913406372
Accuracy: 0.3543301138509685


In [7]:
from tqdm.notebook import tqdm

def generate_recommendations(model, num_users, num_items, top_k=50):
    model.eval()  # Set the model to evaluation mode
    recommendations = []

    # Iterate over all users
    for user_id in tqdm(range(num_users)):
        user_tensor = torch.tensor(
            [user_id] * num_items, dtype=torch.int64
        ).to(device)  # Repeat user ID for each item
        item_tensor = torch.tensor(range(num_items), dtype=torch.int64).to(device)  # All item IDs

        # Predict scores for all items for this user
        with torch.no_grad():
            scores = (
                model(user_tensor, item_tensor).cpu().numpy()
            )  # Get scores and move to CPU

        # Get the indices of the top k scores
        top_item_indices = scores.argsort()[-top_k:][
            ::-1
        ]  # Indices of top scoring items

        # Append to the list of recommendations
        recommendations.append(top_item_indices.tolist())

    return recommendations

In [8]:
%%time

import zipfile

# Number of users and items
num_users = train_data["user_id"].nunique()
num_items = train_data["item_id"].nunique()

# Generate recommendations for all users
top_k_recommendations = generate_recommendations(model, num_users, num_items)

with open("submission.txt", "w") as file:
    for user_recommendations in top_k_recommendations:
        file.write(" ".join(map(str, user_recommendations)) + "\n")

# zip the submission file
with zipfile.ZipFile('submission.zip', 'w') as file:
    file.write('submission.txt')

  0%|          | 0/21124 [00:00<?, ?it/s]

CPU times: user 1min 24s, sys: 6.32 s, total: 1min 30s
Wall time: 1min 57s
