In [1]:
import pandas as pd
users = pd.read_csv(
    "ml-1m/users.dat",
    sep="::",
    names=["user_id", "sex", "age_group", "occupation", "zip_code"],
    engine='python'
)

movies = pd.read_csv(
    "ml-1m/movies.dat", sep="::", names=["movie_id", "title", "genres"],
    engine='python',encoding='ISO-8859-1'
)

ratings = pd.read_csv(
    "ml-1m/ratings.dat",
    sep="::",
    names=["user_id", "movie_id", "rating", "unix_timestamp"],
    engine='python'
)


In [2]:
print("Users data:")
print(users.head())
print("\nMovies data:")
print(movies.head())
print("\nRatings data:")
print(ratings.head())

Users data:
   user_id sex  age_group  occupation zip_code
0        1   F          1          10    48067
1        2   M         56          16    70072
2        3   M         25          15    55117
3        4   M         45           7    02460
4        5   M         25          20    55455

Movies data:
   movie_id                               title                        genres
0         1                    Toy Story (1995)   Animation|Children's|Comedy
1         2                      Jumanji (1995)  Adventure|Children's|Fantasy
2         3             Grumpier Old Men (1995)                Comedy|Romance
3         4            Waiting to Exhale (1995)                  Comedy|Drama
4         5  Father of the Bride Part II (1995)                        Comedy

Ratings data:
   user_id  movie_id  rating  unix_timestamp
0        1      1193       5       978300760
1        1       661       3       978302109
2        1       914       3       978301968
3        1      3408       4 

In [3]:
# 将原始ID转换为连续的索引，便于embedding层使用
def generate_remap_id_dict(df,col):
    ids = df[df[col].notnull()][col].unique().tolist()
    ids = sorted(ids)
    id_map_dict = {x: i+1 for i, x in enumerate(ids)}
    id_map_dict["UNK"]=0
    df[f"{col}_index"] = df[col].fillna("UNK").map(id_map_dict)
    return id_map_dict


In [4]:
user_id_map_dict=generate_remap_id_dict(users,col='user_id')
user_sex_map_dict=generate_remap_id_dict(users,col='sex')
user_age_group_map_dict=generate_remap_id_dict(users,col='age_group')
user_occupation_map_dict=generate_remap_id_dict(users,col='occupation')
movie_id_map_dict = generate_remap_id_dict(movies,col='movie_id')


评分缩放 1-5 => 0-1

In [5]:
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
ratings["norm_rating"] = min_max_scaler.fit_transform(
    ratings["rating"].values.reshape(-1, 1))[:, 0]

In [6]:
# save scaler
import joblib
joblib.dump(min_max_scaler, 'min_max_scaler.save')

['min_max_scaler.save']

In [7]:
users.head()

Unnamed: 0,user_id,sex,age_group,occupation,zip_code,user_id_index,sex_index,age_group_index,occupation_index
0,1,F,1,10,48067,1,1,1,11
1,2,M,56,16,70072,2,2,7,17
2,3,M,25,15,55117,3,2,3,16
3,4,M,45,7,2460,4,2,5,8
4,5,M,25,20,55455,5,2,3,21


In [8]:
df_user_full_matrix = users.merge(ratings[['user_id', 'movie_id', 'norm_rating','unix_timestamp']], on='user_id', how='left')
df_user_full_matrix['movie_id_index'] = ratings['movie_id'].map(movie_id_map_dict)
df_user_full_matrix['user_id_index'] = ratings['user_id'].map(user_id_map_dict)
df_user_full_matrix['sex_index'] = df_user_full_matrix['sex'].map(user_sex_map_dict)
df_user_full_matrix['age_group_index'] = df_user_full_matrix['age_group'].map(user_age_group_map_dict)
df_user_full_matrix['occupation_index'] = df_user_full_matrix['occupation'].map(user_occupation_map_dict)
df_user_full_matrix = df_user_full_matrix[['user_id_index', 'sex_index', 'age_group_index', 'occupation_index', 'movie_id_index', 'norm_rating','unix_timestamp']]
df_user_full_matrix.head()

Unnamed: 0,user_id_index,sex_index,age_group_index,occupation_index,movie_id_index,norm_rating,unix_timestamp
0,1,1,1,11,1177,1.0,978300760
1,1,1,1,11,656,0.5,978302109
2,1,1,1,11,903,0.5,978301968
3,1,1,1,11,3340,0.75,978300275
4,1,1,1,11,2287,1.0,978824291


In [9]:
# sort with user_id and unix_timestamp
df_user_full_matrix = df_user_full_matrix.sort_values(['user_id_index', 'unix_timestamp'])
df_user_full_matrix.head()

Unnamed: 0,user_id_index,sex_index,age_group_index,occupation_index,movie_id_index,norm_rating,unix_timestamp
31,1,1,1,11,3118,0.75,978300019
22,1,1,1,11,1251,1.0,978300055
27,1,1,1,11,1673,0.75,978300055
37,1,1,1,11,1010,1.0,978300055
24,1,1,1,11,2272,0.5,978300103



### 输入特征
```
{
    'user_id_index': 用户ID索引,
    'movie_sequence': [电影1, 电影2, 电影3, 目标电影],  # 长度为4的序列
    'rating_sequence': [评分1, 评分2, 评分3, 1.0],      # 前3个真实评分+掩码值
    'target_movie': 目标电影ID,                          # 序列最后一个电影
    'target_rating': 真实评分,                           # 预测目标
    'sex_index': 性别特征,
    'age_group_index': 年龄组索引,
    'occupation_index': 职业索引
}
```

In [10]:
def gen_sequence_data(df, window_size, step):
    sequences = []
    
    for user_id, user_data in df.groupby('user_id_index'):
        user_data = user_data.reset_index(drop=True)
    
        # 需要至少window_size个数据点才能创建一个序列
        for i in range(0, len(user_data) - window_size, step):
            sequence = user_data.iloc[i:i + window_size]
            movie_sequence = sequence['movie_id_index'].tolist()  
            rating_sequence = sequence['norm_rating'].tolist()    
            target_movie = sequence['movie_id_index'].iloc[-1]         # 最后一个作为目标
            target_rating = sequence['norm_rating'].iloc[-1]           # 最后一个作为目标
            
            # 对序列中的最后一个评分进行掩码处理（防止信息泄露）
            masked_rating_sequence = rating_sequence[:-1] + [1.0]  
            
            sequences.append({
                'user_id_index': user_id,
                'movie_sequence': movie_sequence,
                'rating_sequence': masked_rating_sequence,
                'target_movie': target_movie,
                'target_rating': target_rating,
                'sex_index': sequence['sex_index'].iloc[0],
                'age_group_index': sequence['age_group_index'].iloc[0],
                'occupation_index': sequence['occupation_index'].iloc[0]
            })
    return pd.DataFrame(sequences)

df_user_view = gen_sequence_data(df_user_full_matrix,window_size=4,step=2)
df_user_view.head()

Unnamed: 0,user_id_index,movie_sequence,rating_sequence,target_movie,target_rating,sex_index,age_group_index,occupation_index
0,1,"[3118, 1251, 1673, 1010]","[0.75, 1.0, 0.75, 1.0]",1010,1.0,1,1,11
1,1,"[1673, 1010, 2272, 1769]","[0.75, 1.0, 0.5, 1.0]",1769,1.0,1,1,11
2,1,"[2272, 1769, 3340, 2736]","[0.5, 1.0, 0.75, 1.0]",2736,1.0,1,1,11
3,1,"[3340, 2736, 1190, 1177]","[0.75, 1.0, 0.75, 1.0]",1177,1.0,1,1,11
4,1,"[1190, 1177, 712, 258]","[0.75, 1.0, 0.5, 1.0]",258,0.75,1,1,11


In [11]:
import numpy as np
random_selection = np.random.rand(len(df_user_view)) <= 0.85
train_data = df_user_view[random_selection]
test_data = df_user_view[~random_selection]
print(len(train_data),len(test_data))

415550 73956


In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, List, Optional, Union
import math

class SequenceEmbedding(nn.Module):
    def __init__(self, num_item, embed_dim,seq_length):
        super().__init__()
        self.item_embedding = nn.Embedding(num_item, embed_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(seq_length, embed_dim)
        
        self.layer_norm = nn.LayerNorm(embed_dim)  # add
        
    def forward(self, movie_sequence, rating_sequence=None):
        batch_size, seq_len = movie_sequence.shape
        
        item_embeds = self.item_embedding(movie_sequence)  # (B, L, D)
        
        # Position embeddings
        positions = torch.arange(seq_len, device=movie_sequence.device)
        pos_embeds = self.position_embedding(positions).unsqueeze(0).expand(batch_size, -1, -1)

        sequence_embeds = item_embeds + pos_embeds  # (B, L, D)

        # Rating weighting (BST uses ratings as attention weights)
        if rating_sequence is not None:
            rating_weights = rating_sequence.unsqueeze(-1)  # (B, L, 1)
            sequence_embeds = sequence_embeds * rating_weights
            
        return self.layer_norm(sequence_embeds)

class EmbeddingLayer(nn.Module):
    """
    BST模型的统一Embedding层
    """
    def __init__(self, 
                 embed_configs: Dict[str, Dict],
                 dropout: float = 0.2,
                 initialization: str = "xavier"):
        super().__init__()
        
        self.embed_configs = embed_configs
        self.dropout = dropout
        self.embed_dim = embed_configs['position']['embed_dim']
        self.seq_len = embed_configs['position']['num_embed']
        
        
        # 创建embedding层
        self.embeddings = nn.ModuleDict() # 各特征的Embedding
        self.feature_types = {}
        
        for feature_name, config in embed_configs.items():
            embed_dim_feat = config.get('embed_dim',self.embed_dim)
            num_embeddings = config['num_embed']
            feature_type = config.get('type', 'categorical')
            
            # 根据特征类型创建不同的embedding
            if feature_type == 'categorical':
                self.embeddings[feature_name] = nn.Embedding(
                    num_embeddings, embed_dim_feat, padding_idx=0
                )
            elif feature_type == 'sequence':
                # 序列特征
                seq_length = config.get('seq_length', self.seq_len)
                self.embeddings[feature_name] = SequenceEmbedding(
                    num_embeddings, embed_dim_feat, seq_length
                )
            
            self.feature_types[feature_name] = feature_type
            
            # 初始化embedding权重
            self._init_embedding(self.embeddings[feature_name], initialization)
            # inner
        # outer
        # Dropout层
        self.embedding_dropout = nn.Dropout(dropout)

        # Transformer输入维度
        self.transformer_dim = embed_configs['item']['embed_dim']
        
        # MLP输入维度：原始特征 + 交叉特征 + 目标电影 + 序列特征
        user_dim = embed_configs['user']['embed_dim']  # 32
        occupation_dim = embed_configs['occupation']['embed_dim']  # 32
        age_group_dim = embed_configs['age_group']['embed_dim']  # 32
        sex_dim = embed_configs['sex']['embed_dim']  # 32
        target_movie_dim = embed_configs['item']['embed_dim']  # 32
        sequence_dim = self.transformer_dim * self.seq_len  # 64 * 4 = 256
        
        self.total_dim = (user_dim + occupation_dim + age_group_dim + sex_dim +  # 原始特征: 128
                         user_dim + occupation_dim + age_group_dim + sex_dim +   # 交叉特征: 128  
                         target_movie_dim +  # 目标电影: 32
                         sequence_dim)  # 序列特征: 256
        # 总计: 128 + 128 + 32 + 256 = 544
        
        
    def _init_embedding(self, embedding_layer, init_type):
        """初始化embedding权重"""
        if hasattr(embedding_layer, 'weight'):
            if init_type == "xavier":
                nn.init.xavier_uniform_(embedding_layer.weight)
            elif init_type == "normal":
                nn.init.normal_(embedding_layer.weight, std=0.1)
            elif init_type == "kaiming":
                nn.init.kaiming_uniform_(embedding_layer.weight)
    
    def forward(self, features: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        embeddings = {}
        
        for feature_name, feature_tensor in features.items():
            if feature_name in self.embeddings:
                # 获取embedding
                embed = self.embeddings[feature_name](feature_tensor)
                # 应用dropout
                embed = self.embedding_dropout(embed)
                embeddings[feature_name] = embed
        return embeddings


In [23]:
# embedding config

num_user = len(user_id_map_dict)
num_movie = len(movie_id_map_dict)
num_occupation = len(user_occupation_map_dict)
num_age_group = len(user_age_group_map_dict)
num_sex = len(user_sex_map_dict)

embed_configs = {}
EMED_DIM=32
SEQUENCE_SIZE = 4

embed_configs['item']={"embed_dim":EMED_DIM,"num_embed":num_movie}
embed_configs['position'] = {"embed_dim":EMED_DIM,"num_embed":SEQUENCE_SIZE}
# sequence类型
embed_configs['sequence'] = {
    "embed_dim": EMED_DIM, 
    "num_embed": num_movie, 
    "type": "sequence",  
    "seq_length": SEQUENCE_SIZE
}

embed_configs['user']={"embed_dim":EMED_DIM,"num_embed":num_user}
embed_configs['sex'] = {"embed_dim": EMED_DIM, "num_embed":num_sex }
embed_configs['occupation']={"embed_dim":EMED_DIM,"num_embed":num_occupation}
embed_configs['age_group']={"embed_dim":EMED_DIM,"num_embed":num_age_group}

In [24]:
embedding_layer = EmbeddingLayer(embed_configs)
print(embedding_layer.embeddings)

ModuleDict(
  (item): Embedding(3884, 32, padding_idx=0)
  (position): Embedding(4, 32, padding_idx=0)
  (sequence): SequenceEmbedding(
    (item_embedding): Embedding(3884, 32, padding_idx=0)
    (position_embedding): Embedding(4, 32)
    (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
  )
  (user): Embedding(6041, 32, padding_idx=0)
  (sex): Embedding(3, 32, padding_idx=0)
  (occupation): Embedding(22, 32, padding_idx=0)
  (age_group): Embedding(8, 32, padding_idx=0)
)


In [25]:
class TransformerBlock(nn.Module):
    def __init__(self, input_size, output_size, num_heads, dropout_rate):
        super(TransformerBlock, self).__init__()

        self.multihead_attention = nn.MultiheadAttention(input_size, num_heads)
        self.layer_norm1 = nn.LayerNorm(input_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(input_size, 4*input_size),
            nn.ReLU(),
            nn.Linear(4*input_size, output_size),
            nn.Dropout(dropout_rate)
        )
        self.layer_norm2 = nn.LayerNorm(output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Multi-head Attention
        attn_output, _ = self.multihead_attention(x, x, x)
        x = self.layer_norm1(x + attn_output)

        # Feed-Forward Network
        ff_output = self.feed_forward(x)
        x = self.layer_norm2(x + ff_output)
        x = self.dropout(x)
        return x

class TransformerLayer(nn.Module):
    def __init__(self, d_model, num_heads=8, dropout_rate=0.2, num_layers=3):
        super(TransformerLayer, self).__init__()

        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(d_model, d_model, num_heads, dropout_rate)
            for _ in range(num_layers)
        ])

    def forward(self, x):
        for transformer_block in self.transformer_blocks:
            x = transformer_block(x)
        return x

In [26]:
class MLP(nn.Module):
    def __init__(self, dropout=0.2, hidden_units=[512, 256,128]):
        super(MLP, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.layers = nn.ModuleList()
        for i in range(len(hidden_units) - 1):
            self.layers.append(nn.Linear(hidden_units[i], hidden_units[i + 1]))
            self.layers.append(nn.LeakyReLU())
            self.layers.append(nn.Dropout(p=dropout))
        self.fc = nn.Linear(hidden_units[-1],1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        logits = self.fc(x)
        output = self.sigmoid(logits)
        return output

In [27]:
class BSTRecommender(nn.Module):    
    def __init__(self,embedding_layer,num_heads=8,transformer_num_layer=3,drop_out=0.2):
        super().__init__()
        # Embedding params
        self.seq_len = embedding_layer.seq_len
        self.totoal_dim = embedding_layer.total_dim
        self.transformer_dim = embedding_layer.transformer_dim
        
        self.drouput = drop_out
        self.num_heads = num_heads
        self.transformer_num_layer = transformer_num_layer
        
        # Embedding
        self.embedding_layer = embedding_layer
        
        # Transformer
        self.transformer_layer = TransformerLayer(d_model=self.transformer_dim,
                                            num_heads=self.num_heads,
                                            dropout_rate=self.drouput,
                                            num_layers=self.transformer_num_layer)
        
        # MLP
        self.mlp = MLP(dropout=self.drouput, hidden_units=[self.totoal_dim, 256, 64])
            
    def forward(self, batch):
        batch_size = batch['movie_sequence'].shape[0]
        
        # 用户特征
        user_embed = self.embedding_layer.embeddings['user'](batch['user_id_index'])
        occupation_embed = self.embedding_layer.embeddings['occupation'](batch['occupation_index'])
        age_group_embed = self.embedding_layer.embeddings['age_group'](batch['age_group_index'])
        sex_embed = self.embedding_layer.embeddings['sex'](batch['sex'])
        target_movie_embed = self.embedding_layer.embeddings['item'](batch['target_movie'])
        # 序列特征
        sequence_embeds = self.embedding_layer.embeddings['sequence'](
            batch['movie_sequence'], 
            batch.get('rating_sequence', None)
        )
        
        # Transformer编码
        transformer_output = self.transformer_layer(sequence_embeds)
        sequence_pooled = transformer_output.view(batch_size, -1) # Flatten
        
        # 特征交叉
        user_cross = torch.mul(user_embed, target_movie_embed)
        occupation_cross = torch.mul(occupation_embed, target_movie_embed)
        age_group_cross = torch.mul(age_group_embed, target_movie_embed)
        sex_cross = torch.mul(sex_embed, target_movie_embed)
        # 特征融合
        features = torch.cat([
        user_embed, occupation_embed, age_group_embed, sex_embed, # 原始特征
        user_cross, occupation_cross, age_group_cross, sex_cross, # 交叉特征
        target_movie_embed, # 目标电影特征
        sequence_pooled # 序列特征
        ], dim=-1)
        
        # MLP预测
        output = self.mlp(features)  # Use self.mlp, not self.mlp_predictor
        
        return output.squeeze(-1)


In [28]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)
model = BSTRecommender(embedding_layer=embedding_layer)
model.to(DEVICE)
model.train()

cuda


BSTRecommender(
  (embedding_layer): EmbeddingLayer(
    (embeddings): ModuleDict(
      (item): Embedding(3884, 32, padding_idx=0)
      (position): Embedding(4, 32, padding_idx=0)
      (sequence): SequenceEmbedding(
        (item_embedding): Embedding(3884, 32, padding_idx=0)
        (position_embedding): Embedding(4, 32)
        (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      )
      (user): Embedding(6041, 32, padding_idx=0)
      (sex): Embedding(3, 32, padding_idx=0)
      (occupation): Embedding(22, 32, padding_idx=0)
      (age_group): Embedding(8, 32, padding_idx=0)
    )
    (embedding_dropout): Dropout(p=0.2, inplace=False)
  )
  (transformer_layer): TransformerLayer(
    (transformer_blocks): ModuleList(
      (0-2): 3 x TransformerBlock(
        (multihead_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
        )
        (layer_norm1): LayerNorm((32,), eps=1e-05, ele

In [37]:
from torch.utils.data import Dataset, DataLoader

class BSTDataset(Dataset):
    def __init__(self, data, device):
        self.data = data
        self.device = device

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        
        return {
            'user_id_index': torch.tensor(row['user_id_index'], dtype=torch.long).to(self.device),
            'occupation_index': torch.tensor(row['occupation_index'], dtype=torch.long).to(self.device),
            'age_group_index': torch.tensor(row['age_group_index'], dtype=torch.long).to(self.device),
            'sex': torch.tensor(row['sex_index'], dtype=torch.long).to(self.device),
            'movie_sequence': torch.tensor(row['movie_sequence'], dtype=torch.long).to(self.device),
            'rating_sequence': torch.tensor(row['rating_sequence'], dtype=torch.float).to(self.device),
            'target_movie': torch.tensor(row['target_movie'], dtype=torch.long).to(self.device),  
            'target_rating': torch.tensor(row['target_rating'], dtype=torch.float).to(self.device)  
        }
# Create datasets
train_dataset = BSTDataset(train_data.reset_index(drop=True), DEVICE)
test_dataset = BSTDataset(test_data.reset_index(drop=True), DEVICE)

# Create dataloaders
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [38]:
# Training config
epochs = 20
learning_rate = 0.001
criterion = nn.L1Loss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [39]:
from tqdm import tqdm

test_scores = []
# Training loop
model.train()
for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}', postfix={'loss': 0} )
    
    for batch in progress_bar:
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(batch)
        
        # Calculate loss
        loss = criterion(predictions, batch['target_rating'])
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    
    avg_loss = total_loss / len(train_loader)
    avg_test_loss, predictions_list, targets_list = test()
    
    print(f'Epoch {epoch+1}, Average Loss: {avg_loss:.4f} Test loss: {avg_test_loss:.4f}')
    # Save checkpoint after each epoch
    test_scores.append(avg_test_loss)
    torch.save(model.state_dict(), f'./checkpoints/bst_model_epoch_{epoch+1}.pth')

Epoch 1/20:   0%|          | 0/1624 [00:00<?, ?it/s, loss=0]

Epoch 1/20: 100%|██████████| 1624/1624 [04:42<00:00,  5.74it/s, loss=0.166]
Testing: 100%|██████████| 289/289 [00:39<00:00,  7.25it/s]


Epoch 1, Average Loss: 0.1590 Test loss: 0.1698


Epoch 2/20: 100%|██████████| 1624/1624 [04:00<00:00,  6.75it/s, loss=0.138]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.09it/s]


Epoch 2, Average Loss: 0.1535 Test loss: 0.1698


Epoch 3/20: 100%|██████████| 1624/1624 [04:00<00:00,  6.76it/s, loss=0.134]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.12it/s]


Epoch 3, Average Loss: 0.1511 Test loss: 0.1693


Epoch 4/20: 100%|██████████| 1624/1624 [03:59<00:00,  6.77it/s, loss=0.174]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.13it/s]


Epoch 4, Average Loss: 0.1488 Test loss: 0.1702


Epoch 5/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.80it/s, loss=0.131]
Testing: 100%|██████████| 289/289 [00:39<00:00,  7.25it/s]


Epoch 5, Average Loss: 0.1467 Test loss: 0.1700


Epoch 6/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.81it/s, loss=0.171]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.14it/s]


Epoch 6, Average Loss: 0.1447 Test loss: 0.1701


Epoch 7/20: 100%|██████████| 1624/1624 [03:57<00:00,  6.84it/s, loss=0.151]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.17it/s]


Epoch 7, Average Loss: 0.1429 Test loss: 0.1706


Epoch 8/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.81it/s, loss=0.167]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.18it/s]


Epoch 8, Average Loss: 0.1415 Test loss: 0.1704


Epoch 9/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.80it/s, loss=0.153]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.20it/s]


Epoch 9, Average Loss: 0.1398 Test loss: 0.1709


Epoch 10/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.82it/s, loss=0.129]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.17it/s]


Epoch 10, Average Loss: 0.1385 Test loss: 0.1710


Epoch 11/20: 100%|██████████| 1624/1624 [03:57<00:00,  6.84it/s, loss=0.173]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.21it/s]


Epoch 11, Average Loss: 0.1370 Test loss: 0.1709


Epoch 12/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.82it/s, loss=0.13] 
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.16it/s]


Epoch 12, Average Loss: 0.1360 Test loss: 0.1708


Epoch 13/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.80it/s, loss=0.196]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.16it/s]


Epoch 13, Average Loss: 0.1348 Test loss: 0.1704


Epoch 14/20: 100%|██████████| 1624/1624 [03:57<00:00,  6.84it/s, loss=0.136]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.15it/s]


Epoch 14, Average Loss: 0.1336 Test loss: 0.1703


Epoch 15/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.80it/s, loss=0.121] 
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.15it/s]


Epoch 15, Average Loss: 0.1325 Test loss: 0.1707


Epoch 16/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.81it/s, loss=0.101]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.18it/s]


Epoch 16, Average Loss: 0.1315 Test loss: 0.1715


Epoch 17/20: 100%|██████████| 1624/1624 [03:59<00:00,  6.79it/s, loss=0.141]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.12it/s]


Epoch 17, Average Loss: 0.1304 Test loss: 0.1711


Epoch 18/20: 100%|██████████| 1624/1624 [03:56<00:00,  6.86it/s, loss=0.11]  
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.13it/s]


Epoch 18, Average Loss: 0.1295 Test loss: 0.1716


Epoch 19/20: 100%|██████████| 1624/1624 [03:57<00:00,  6.84it/s, loss=0.136] 
Testing: 100%|██████████| 289/289 [00:39<00:00,  7.31it/s]


Epoch 19, Average Loss: 0.1286 Test loss: 0.1713


Epoch 20/20: 100%|██████████| 1624/1624 [03:58<00:00,  6.80it/s, loss=0.115]
Testing: 100%|██████████| 289/289 [00:40<00:00,  7.13it/s]

Epoch 20, Average Loss: 0.1279 Test loss: 0.1712





In [40]:
# Save modelw
torch.save(model.state_dict(), 'bst_model.pth')
print('Model saved as bst_model.pth')

Model saved as bst_model.pth


In [46]:
print(test_scores)

[0.16980269030509937, 0.16975156328669883, 0.1693197827541292, 0.17021437571947962, 0.17001508980680088, 0.17006768935898184, 0.1705918589471533, 0.17039927731954516, 0.17088893641134448, 0.17095972705877363, 0.1708614778250559, 0.1707887686144522, 0.17040371637030868, 0.17032126247057866, 0.1706961907936215, 0.17151299144776222, 0.17114394310230202, 0.1716276722280212, 0.17132704248684089, 0.1711991835630476]


In [45]:
def load_bst_model(model_path='bst_model.pth', device=DEVICE):
    embed_configs = {
        'item': {"embed_dim": EMED_DIM, "num_embed": len(movie_id_map_dict)},
        'position': {"embed_dim": EMED_DIM, "num_embed": SEQUENCE_SIZE},
        'user': {"embed_dim": EMED_DIM, "num_embed": len(user_id_map_dict)},
        'sex': {"embed_dim": EMED_DIM, "num_embed": len(user_sex_map_dict)},
        'occupation': {"embed_dim": EMED_DIM, "num_embed": len(user_occupation_map_dict)},
        'age_group': {"embed_dim": EMED_DIM, "num_embed": len(user_age_group_map_dict)},
        'sequence': {
            "embed_dim": EMED_DIM,
            "num_embed": num_movie,
            "type": "sequence",
            "seq_length": SEQUENCE_SIZE
        }
    }
    
    embedding_layer = EmbeddingLayer(embed_configs)
    model = BSTRecommender(embedding_layer=embedding_layer)
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()
    return model


# load BST model
best_path = f'./checkpoints/bst_model_epoch_{np.argmin(test_scores)+1}.pth'
print(best_path)
bst_model = load_bst_model(model_path=best_path, device=DEVICE)
print('BST Model loaded from bst_model.pth')
min_max_scaler = joblib.load('min_max_scaler.save')

./checkpoints/bst_model_epoch_3.pth
BST Model loaded from bst_model.pth


In [44]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np



# test
def test():
    model.eval()
    with torch.no_grad():
        test_loss = 0
        predictions_list = []
        targets_list = []
        
        for batch in tqdm(test_loader, desc='Testing'):
            predictions = model(batch)
            loss = criterion(predictions, batch['target_rating'])
            test_loss += loss.item()
            
            predictions_list.extend(predictions.cpu().numpy())
            targets_list.extend(batch['target_rating'].cpu().numpy())

    avg_test_loss = test_loss / len(test_loader)
    return avg_test_loss, predictions_list, targets_list
avg_test_loss, predictions_list, targets_list = test()
print(f'Test Loss: {avg_test_loss:.4f}')

# Calculate additional metrics

mae = mean_absolute_error(targets_list, predictions_list)
rmse = np.sqrt(mean_squared_error(targets_list, predictions_list))

original_targets = min_max_scaler.inverse_transform(np.array(targets_list).reshape(-1,1))[:,0]
original_predictions = min_max_scaler.inverse_transform(np.array(predictions_list).reshape(-1,1))[:,0]

original_mae = mean_absolute_error(original_targets, original_predictions)
original_rmse = np.sqrt(mean_squared_error(original_targets, original_predictions))

print(f'MAE: {mae:.4f} ({original_mae:.4f})')
print(f'RMSE: {rmse:.4f} ({original_rmse:.4f})')


Testing: 100%|██████████| 289/289 [00:42<00:00,  6.73it/s]

Test Loss: 0.1712
MAE: 0.1712 (0.6848)
RMSE: 0.2467 (0.9867)



