In [None]:
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

# Define the dataset class
class RatingDataset(Dataset):
    """Dataset for loading user-item ratings for training"""
    def __init__(self, user_ids, item_ids, ratings):
        self.user_ids = torch.tensor(user_ids, dtype=torch.int64)
        self.item_ids = torch.tensor(item_ids, dtype=torch.int64)
        self.ratings = torch.tensor(ratings, dtype=torch.float32)

    def __len__(self):
        return len(self.user_ids)
    
    def __getitem__(self, idx):
        return self.user_ids[idx], self.item_ids[idx], self.ratings[idx]

# Define the NCF model
class NCF(nn.Module):
    def __init__(self, num_users, num_items, factors=20, hidden_layers=[64, 32, 16], dropout=0.2):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, factors)
        self.item_embedding = nn.Embedding(num_items, factors)
        self.fc_layers = nn.ModuleList()
        input_size = factors * 2  # Concatenate user and item embeddings
        for hidden_layer in hidden_layers:
            self.fc_layers.append(nn.Linear(input_size, hidden_layer))
            input_size = hidden_layer
        self.output = nn.Linear(input_size, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout)
    
    def forward(self, user_indices, item_indices):
        user_embedding = self.user_embedding(user_indices)
        item_embedding = self.item_embedding(item_indices)
        x = torch.cat([user_embedding, item_embedding], dim=-1)
        for layer in self.fc_layers:
            x = self.relu(layer(x))
            x = self.dropout(x)
        x = self.output(x)
        return x.squeeze()

# Training function
def train_model(model, data_loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for batch_idx, (users, items, ratings) in enumerate(data_loader):
            optimizer.zero_grad()
            outputs = model(users, items)
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if batch_idx % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {running_loss / 100:.4f}')
                running_loss = 0.0

# Prepare data
train_data = pd.read_csv("cs608_ip_train_v3.csv")
train_data['user_id'] = train_data['user_id'].astype('category').cat.codes
train_data['item_id'] = train_data['item_id'].astype('category').cat.codes
dataset = RatingDataset(train_data['user_id'], train_data['item_id'], train_data['rating'])

# Create DataLoader
data_loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize model, loss, and optimizer
num_users = train_data['user_id'].nunique()
num_items = train_data['item_id'].nunique()
model = NCF(num_users, num_items)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, data_loader, criterion, optimizer)

# Generating recommendations (not included here, please ask if needed)

In [None]:
def generate_recommendations(model, num_users, num_items, top_k=50):
    model.eval()  # Set the model to evaluation mode
    recommendations = []

    # Iterate over all users
    for user_id in range(num_users):
        user_tensor = torch.tensor(
            [user_id] * num_items, dtype=torch.int64
        )  # Repeat user ID for each item
        item_tensor = torch.tensor(range(num_items), dtype=torch.int64)  # All item IDs

        # Predict scores for all items for this user
        with torch.no_grad():
            scores = (
                model(user_tensor, item_tensor).cpu().numpy()
            )  # Get scores and move to CPU

        # Get the indices of the top k scores
        top_item_indices = scores.argsort()[-top_k:][
            ::-1
        ]  # Indices of top scoring items

        # Append to the list of recommendations
        recommendations.append(top_item_indices.tolist())

    return recommendations

In [None]:
import zipfile

# Number of users and items
num_users = train_data["user_id"].nunique()
num_items = train_data["item_id"].nunique()

# Generate recommendations for all users
top_k_recommendations = generate_recommendations(model, num_users, num_items)

with open("submission.txt", "w") as file:
    for user_recommendations in top_k_recommendations:
        file.write(" ".join(map(str, user_recommendations)) + "\n")

# zip the submission file
with zipfile.ZipFile('submission.zip', 'w') as file:
    file.write('submission.txt')