# LHydra Recommender System

This notebook demonstrates the revised implementation of the ReNeLLM-based recommender system, incorporating the following improvements:

1. **Data Preprocessing Pipeline**
2. **Consistent Handling of Encoders and Vectorizers**
3. **Enhanced Model Training**
4. **Improved Inference Function**
5. **Optimized Recommendation Generation**

---


## 1. Import Necessary Modules

In [1]:
import pandas as pd
import torch
from preprocessing import DataPreprocessor
from model import HybridRecommender
from inference import get_recommendations, make_inference
import pickle

## 2. Load Pretrained Model and Preprocessors

In [3]:
preprocessor = DataPreprocessor()
filepath = '../data/cleaned_modv2.csv'
data = preprocessor.load_data(filepath)

# Encoding features
data_encoded = preprocessor.encode_features(data)
features = preprocessor.feature_engineering(data_encoded)

# Splitting data
train_features, test_features, train_target, test_target = preprocessor.split_data(features)

# Save preprocessors
preprocessor.save_preprocessors(directory='models/')

# Load preprocessors
preprocessor.load_preprocessors(directory='models/')

# Verify loaded preprocessors
print("Preprocessors loaded successfully.")


KeyError: 'track_name'

In [3]:
# Load the trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_users = len(preprocessor.user_id_encoder.classes_)
num_artists = len(preprocessor.artist_encoder.classes_)  # Ensure you have artist_encoder
num_tracks = len(preprocessor.track_encoder.classes_)    # Ensure you have track_encoder
num_genres = len(preprocessor.genre_encoder.classes_)
embedding_dim = 128
num_audio_features = train_features.shape[1] - (preprocessor.user_id_encoder.classes_.shape[0] + 
                                             preprocessor.artist_encoder.classes_.shape[0] +
                                             preprocessor.track_encoder.classes_.shape[0] +
                                             preprocessor.genre_encoder.classes_.shape[0] + 1) # Adjust as per actual features

model = HybridRecommender(
    num_users=num_users,
    num_artists=num_artists,
    num_tracks=num_tracks,
    num_genres=num_genres,
    embedding_dim=embedding_dim,
    num_audio_features=num_audio_features,
    num_layers=3,
    hidden_dims=[256, 128, 64],
    dropout_prob=0.2
)
model.load_state_dict(torch.load('models/model.pth', map_location=device))
model.to(device)
model.eval()

print("Model and preprocessors loaded successfully.")

Model and preprocessors loaded successfully.


  model.load_state_dict(torch.load('models/model.pth', map_location=device))


In [4]:
num_features
# train_features.shape[1]

12113

In [5]:
features.shape

(19974, 12114)

In [6]:
train_features.shape[1]

12113

In [7]:
user_features = train_features[train_features.columns[:num_features]]
item_features = train_features[train_features.columns[num_features:]]
user_features.shape, item_features.shape
print(f"Train User features shape: {user_features.shape}\nTrain item features shape: {item_features.shape}")

Train User features shape: (15979, 6099)
Train item features shape: (15979, 2)


In [8]:
test_user_features = test_features[test_features.columns[:num_features]]
test_item_features = test_features[test_features.columns[num_features:]]
print(f"Test user features shape: {test_user_features.shape}\nTest item features shape: {test_item_features.shape}")

Test user features shape: (3995, 6099)
Test item features shape: (3995, 2)


## 3. Generate Recommendations for a User

In [9]:
random_user_id = data['user_id'].sample(1).values[0]
print(f"Random user id: {random_user_id}")

Random user id: 32261


In [10]:
data[data['user_id'] == random_user_id]


Unnamed: 0,user_id,age,gender,music,artist_name,featured_artists,genre,plays,duration,music_id,...,speechiness,instrumentalness,liveness,valence,tempo,time_signature,explicit,user_id_encoded,music_id_encoded,gender_encoded
634,32261,47,F,I Fall Apart,Post Malone,"MKTO, Charli XCX",Rap,12,3.72,75ZvA4QfFiZvzhj2xkaWAh,...,0.0327,7.566667e-07,0.281133,0.261667,138.843,4.0,0.0,3198,3677,0
635,32261,47,F,Feelin' It,Home Free,none,Classical,1,3.05,14cikDRwGYOB1UfdA44V4P,...,0.1277,0.1673333,0.199333,0.711667,88.286,4.0,0.0,3198,2515,0


In [33]:
# Define user ID for whom recommendations are to be generated
user_id = '2456'  # Replace with an actual user ID from your dataset
top_k = 10

try:
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        artist_encoder=preprocessor.artist_encoder,
        track_encoder=preprocessor.track_encoder,
        genre_encoder=preprocessor.genre_encoder,
        music_id_to_info=music_id_to_info,  # Ensure this is defined
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item[0]} by {item[1]}")
except ValueError as ve:
    print(ve)

User Features Shape: torch.Size([1, 6099])
Item IDs Shape: torch.Size([9470])
User IDs Shape: torch.Size([9470])
Repeated Features Shape: torch.Size([9470, 6099])
Scores Shape: torch.Size([9470])
Top 10 recommendations for user 2456:
1. (I Can't Get No) Satisfaction
2. (Have You Heard The News) Dewey Cox Died
3. (Everything I Do) I Do It For You
4. (Ghost) Riders in the Sky
5. 'Round Midnight
6. $TING
7. 'Til I Die
8. (Don't Fear) The Reaper
9. (I've Had) The Time of My Life
10. (Let's Have A) Party


In [33]:
data['user_id']

0        83811
1        83811
2        13397
3        70645
4        70645
         ...  
19969    74433
19970    74433
19971    94134
19972    78124
19973    78124
Name: user_id, Length: 19974, dtype: int64

In [None]:
data.head()

In [27]:
# Define user ID for whom recommendations are to be generated
user_id = "35"  # Replace with an actual user ID from your dataset
top_k = 10

try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")

    # Debug: Print the encoded user ID
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")

    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        raise ValueError(f"Encoded user ID {encoded_user_id} not found in encoder.")

    recommended_items = get_recommendations(
        model=model,
        user_id=encoded_user_id,
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except ValueError as ve:
    print(ve)

Original user ID: 35
Encoded user ID: [2]
Encoded user ID [2] not found in encoder.


In [24]:
# print(preprocessor.user_id_encoder.classes_)


[    4    16    35 ... 99996 99997 99998]


In [20]:
data[['user_id','user_id_encoded']]


Unnamed: 0,user_id,user_id_encoded
0,83811,8175
1,83811,8175
2,13397,1294
3,70645,6890
4,70645,6890
...,...,...
19969,74433,7268
19970,74433,7268
19971,94134,9154
19972,78124,7639


In [34]:

try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")
    
    # Attempt transformation
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")
    
    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        print(f"Encoded user ID {encoded_user_id} not found in encoder. Handling as OOV...")
        # Handle OOV user (e.g., assign default ID)
        encoded_user_id = [preprocessor.user_id_encoder.classes_.shape[0] - 1]
        print(f"Assigned Default ID for OOV User: {encoded_user_id}")
    
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,  # Pass original user ID for logging clarity
        encoded_user_id=encoded_user_id,  # Updated to reflect potential OOV handling
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except Exception as e:
    print(f"An error occurred: {e}")

Original user ID: 2456
Encoded user ID: [228]
Encoded user ID [228] not found in encoder. Handling as OOV...
Assigned Default ID for OOV User: [9740]
An error occurred: get_recommendations() got an unexpected keyword argument 'encoded_user_id'


In [35]:
try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")
    
    # Attempt transformation
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")
    
    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        print(f"Encoded user ID {encoded_user_id} not found in encoder. Handling as OOV...")
        # Handle OOV user (e.g., assign default ID)
        encoded_user_id = [preprocessor.user_id_encoder.classes_.shape[0] - 1]
        print(f"Assigned Default ID for OOV User: {encoded_user_id}")
    
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,  # Pass original user ID for logging clarity
        # encoded_user_id=encoded_user_id,  // Removed this line
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        artist_encoder=preprocessor.artist_encoder,
        track_encoder=preprocessor.track_encoder,
        genre_encoder=preprocessor.genre_encoder,
        music_id_to_info=music_id_to_info,  # Ensure this is defined
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item[0]} by {item[1]}")
except Exception as e:
    print(f"An error occurred: {e}")

Original user ID: 2456
Encoded user ID: [228]
Encoded user ID [228] not found in encoder. Handling as OOV...
Assigned Default ID for OOV User: [9740]
User Features Shape: torch.Size([1, 6099])
Item IDs Shape: torch.Size([9470])
User IDs Shape: torch.Size([9470])
Repeated Features Shape: torch.Size([9470, 6099])
Scores Shape: torch.Size([9470])
Top 10 recommendations for user 2456:
1. (I Can't Get No) Satisfaction
2. (Have You Heard The News) Dewey Cox Died
3. (Everything I Do) I Do It For You
4. (Ghost) Riders in the Sky
5. 'Round Midnight
6. $TING
7. 'Til I Die
8. (Don't Fear) The Reaper
9. (I've Had) The Time of My Life
10. (Let's Have A) Party


## 4. Sample Predictions for Users
