# Factorization Machine (FM)

## Imports

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import math

## Data pre processing

In [2]:
# Load and preprocess data
def load_data(file_path):
    df = pd.read_csv(file_path)

    # Map author_id (user) and hotel_id (item) to continuous indices
    user_mapping = {id: idx for idx, id in enumerate(df['author_id'].unique())}
    item_mapping = {id: idx for idx, id in enumerate(df['hotel_id'].unique())}

    df['author_id'] = df['author_id'].map(user_mapping)
    df['hotel_id'] = df['hotel_id'].map(item_mapping)

    num_users = len(user_mapping)
    num_items = len(item_mapping)

    return df, num_users, num_items

data_file = "../data/combined_filtered_reviews.csv"
df, num_users, num_items = load_data(data_file)
print(f"Number of users: {num_users}, Number of items: {num_items}")

# Create Dataset and DataLoader
class FMDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['author_id'].values, dtype=torch.long)
        self.items = torch.tensor(df['hotel_id'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

# Split data into train and test sets
dataset = FMDataset(df)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)


Number of users: 189992, Number of items: 329340


## Define FM

In [3]:
class FactorizationMachine(nn.Module):
    def __init__(self, num_users, num_items, latent_dim=10):
        super(FactorizationMachine, self).__init__()
        self.user_embedding = nn.Embedding(num_users, latent_dim)
        self.item_embedding = nn.Embedding(num_items, latent_dim)

        # Bias terms for user and item
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)

        # Global bias
        self.global_bias = nn.Parameter(torch.tensor(0.0))

    def forward(self, user, item):
        # Embedding lookup
        user_emb = self.user_embedding(user)  # Shape: [batch_size, latent_dim]
        item_emb = self.item_embedding(item)  # Shape: [batch_size, latent_dim]

        # Compute dot product
        interaction = torch.sum(user_emb * item_emb, dim=1)

        # Add bias terms and global bias
        prediction = interaction + self.user_bias(user).squeeze() + self.item_bias(item).squeeze() + self.global_bias

        return prediction


## Model

In [4]:
# Initialize model
latent_dim = 20  # old 10, now using higher values

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fm_model = FactorizationMachine(num_users, num_items, latent_dim).to(device)

# Define loss and optimizer
criterion = nn.MSELoss()
# optimizer = optim.Adam(fm_model.parameters(), lr=0.01) #old
optimizer = optim.Adam(fm_model.parameters(), lr=0.01, weight_decay=1e-4)



## Traim model

In [5]:
# Train the FM model
epochs = 10

for epoch in range(epochs):
    fm_model.train()
    train_loss = 0
    for user, item, rating in train_loader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)

        optimizer.zero_grad()
        predictions = fm_model(user, item)
        loss = criterion(predictions, rating)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    fm_model.eval()
    val_loss = 0
    with torch.no_grad():
        for user, item, rating in test_loader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            predictions = fm_model(user, item)
            val_loss += criterion(predictions, rating).item()
    val_loss /= len(test_loader)

    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")


Epoch 1, Train Loss: 1.0978, Validation Loss: 0.9494
Epoch 2, Train Loss: 0.9496, Validation Loss: 0.9489
Epoch 3, Train Loss: 0.9495, Validation Loss: 0.9480
Epoch 4, Train Loss: 0.9495, Validation Loss: 0.9491
Epoch 5, Train Loss: 0.9496, Validation Loss: 0.9487
Epoch 6, Train Loss: 0.9496, Validation Loss: 0.9492
Epoch 7, Train Loss: 0.9497, Validation Loss: 0.9484
Epoch 8, Train Loss: 0.9495, Validation Loss: 0.9482
Epoch 9, Train Loss: 0.9494, Validation Loss: 0.9505
Epoch 10, Train Loss: 0.9495, Validation Loss: 0.9491


## Evaluation and RMSE

In [6]:
def compute_rmse(model, data_loader):
    model.eval()
    mse_loss = 0
    total_samples = 0

    with torch.no_grad():
        for user, item, rating in data_loader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            predictions = model(user, item)
            mse_loss += torch.sum((predictions - rating) ** 2).item()
            total_samples += len(rating)

    rmse = math.sqrt(mse_loss / total_samples)
    return rmse

test_rmse = compute_rmse(fm_model, test_loader)
print(f"Test RMSE: {test_rmse:.4f}")


Test RMSE: 0.9742
