# LHydra Recommender System

This notebook demonstrates the revised implementation of the ReNeLLM-based recommender system, incorporating the following improvements:

1. **Data Preprocessing Pipeline**
2. **Consistent Handling of Encoders and Vectorizers**
3. **Enhanced Model Training**
4. **Improved Inference Function**
5. **Optimized Recommendation Generation**

---


## 1. Import Necessary Modules

In [5]:
import pandas as pd
import torch
from preprocessing import DataPreprocessor
from tensorflow_docs.model import HybridRecommender
from inference import get_recommendations, make_inference
import pickle

## 2. Load Pretrained Model and Preprocessors

In [6]:
preprocessor = DataPreprocessor()
filepath = '../data/cleaned_modv2.csv'
data = preprocessor.load_data(filepath)
# data
data_encoded = preprocessor.encode_features(data)
features = preprocessor.feature_engineering(data_encoded)
train_features, test_features, train_target, test_target = preprocessor.split_data(features)
preprocessor.save_preprocessors(directory='models/')
loaded_preprocessor = preprocessor.load_preprocessors(directory='models/')



Artist TF-IDF Shape: (19974, 3995)
Total features after encoding: 4013
Unique user IDs: 9741
Unique artist features: 3995
Unique track IDs: 11528
Unique genre IDs: 21


In [9]:
# Load preprocessors
# preprocessor = DataPreprocessor()
# filepath = '../data/cleaned_modv2.csv'
# preprocessor.load_data(filepath=filepath)
# preprocessor.encode_features(data=data)
# preprocessor.load_preprocessors(directory='models/')


# Load encoded data (if saved separately) or load fresh
# data_encoded = pd.read_csv(filepath=filepath)  # Update with actual path if different

# Load the trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_users = len(preprocessor.user_id_encoder.classes_)
# num_items = len(preprocessor..classes_)
num_items = len(preprocessor.track_encoder.classes_)
embedding_dim = 128
num_layers = 3
hidden_dims = [256, 128, 64]
num_features = train_features.shape[1] -2  # Adjust based on feature columns

model = HybridRecommender(num_users, num_items, embedding_dim, num_features, num_layers, hidden_dims, dropout_prob=0.2)
model.load_state_dict(torch.load('models/model.pth', map_location=device))
model.to(device)
model.eval()

print("Model and preprocessors loaded successfully.")

TypeError: empty(): argument 'size' failed to unpack the object at pos 2 with error "type must be tuple of ints,but got list"

In [4]:
num_features
# train_features.shape[1]

6099

In [5]:
features.shape

(19974, 6102)

In [6]:
train_features.shape[1]

6101

In [10]:
user_features = train_features[train_features.columns[:num_features]]
item_features = train_features[train_features.columns[num_features:]]
user_features.shape, item_features.shape
print(f"Train User features shape: {user_features.shape}\nTrain item features shape: {item_features.shape}")

Train User features shape: (15979, 4012)
Train item features shape: (15979, 2)


In [11]:
test_user_features = test_features[test_features.columns[:num_features]]
test_item_features = test_features[test_features.columns[num_features:]]
print(f"Test user features shape: {test_user_features.shape}\nTest item features shape: {test_item_features.shape}")

Test user features shape: (3995, 4012)
Test item features shape: (3995, 2)


## 3. Generate Recommendations for a User

In [12]:
random_user_id = data['user_id'].sample(1).values[0]
print(f"Random user id: {random_user_id}")

Random user id: 41166


In [13]:
data[data['user_id'] == random_user_id]


Unnamed: 0,user_id,age,gender,music,artist_name,featured_artists,genre,plays,duration,music_id,...,instrumentalness,liveness,valence,tempo,time_signature,explicit,user_id_encoded,gender_encoded,genre_encoded,track_encoded
10861,41166,64,M,Lay Me Down,Dirty Heads,none,Jazz,909,3.63,4duRCqphyoANSfH8oUl6c2,...,0.042212,0.159155,0.360706,115.661939,4.0,0.0,4069,1,10,6876
10862,41166,64,M,Astronaut,Sir Sly,none,Classical,291,3.74,3q5Zm5NnYcpXKLNmTO1tyo,...,0.1371,0.137337,0.361295,124.700947,4.0,0.0,4069,1,2,5665
10863,41166,64,M,Pretty Brown Eyes,Cody Simpson,none,Rock,541,2.8,5BHWQ78pw9yIjIWa6p1z6G,...,0.0,0.1114,0.732,87.3885,4.0,0.0,4069,1,17,7626
10864,41166,64,M,Boo,2 Chainz,"JP Cooper, Alec Benjamin",Hip-Hop,530,4.38,7bf70LmWt3EJ8ZOGGQvkUV,...,0.437899,0.310714,0.392514,148.443286,4.0,0.0,4069,1,9,10939


In [14]:
# Define user ID for whom recommendations are to be generated
user_id = '2456'  # Replace with an actual user ID from your dataset
top_k = 10

try:
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except ValueError as ve:
    print(ve)

NameError: name 'model' is not defined

In [15]:
data['user_id']

0        83811
1        83811
2        13397
3        70645
4        70645
         ...  
19969    74433
19970    74433
19971    94134
19972    78124
19973    78124
Name: user_id, Length: 19974, dtype: int64

In [16]:
data.head()

Unnamed: 0,user_id,age,gender,music,artist_name,featured_artists,genre,plays,duration,music_id,...,instrumentalness,liveness,valence,tempo,time_signature,explicit,user_id_encoded,gender_encoded,genre_encoded,track_encoded
0,83811,16,F,Bank Account,21 Savage,"Birdy, Zoé",Dark Trap,11,3.67,2fQrGHiQOvpL9UgPvtYy6G,...,7e-06,0.0871,0.376,75.016,4.0,1.0,8175,0,5,3936
1,83811,16,F,Little Talks,Of Monsters and Men,"Ninho, Snoop Dogg, Russ, Paramore",Unknown,686,4.44,2ihCaVdNZmnHZWt0fvAM7B,...,0.0,0.2845,0.413,101.8905,4.0,0.0,8175,0,20,4020
2,13397,17,M,Wherever I Go,OneRepublic,"Keith Urban, DJ Khaled, NIKI, MF DOOM",Unknown,136,2.83,46jLy47W8rkf8rEX04gMKB,...,0.043432,0.273133,0.404183,120.8505,4.0,0.0,1294,1,20,6090
3,70645,44,M,No New Friends,DJ Khaled,"The xx, LIT killah",Pop,230,5.14,5oVlbbiKGdGeZkWCFy0mqk,...,0.0,0.0966,0.4,109.283,4.0,0.0,6890,1,13,8577
4,70645,44,M,Dreams,Campsite Dream,none,Country,391,3.2,1SNoSoQ3JZldOhzBY9gw0n,...,0.235527,0.180354,0.380815,120.488479,4.0,0.0,6890,1,3,2130


In [17]:
# Define user ID for whom recommendations are to be generated
user_id = "35"  # Replace with an actual user ID from your dataset
top_k = 10

try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")

    # Debug: Print the encoded user ID
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")

    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        raise ValueError(f"Encoded user ID {encoded_user_id} not found in encoder.")

    recommended_items = get_recommendations(
        model=model,
        user_id=encoded_user_id,
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except ValueError as ve:
    print(ve)

Original user ID: 35
Encoded user ID: [2]
Encoded user ID [2] not found in encoder.


In [18]:
# print(preprocessor.user_id_encoder.classes_)


In [19]:
data[['user_id','user_id_encoded']]


Unnamed: 0,user_id,user_id_encoded
0,83811,8175
1,83811,8175
2,13397,1294
3,70645,6890
4,70645,6890
...,...,...
19969,74433,7268
19970,74433,7268
19971,94134,9154
19972,78124,7639


In [20]:

try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")
    
    # Attempt transformation
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")
    
    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        print(f"Encoded user ID {encoded_user_id} not found in encoder. Handling as OOV...")
        # Handle OOV user (e.g., assign default ID)
        encoded_user_id = [preprocessor.user_id_encoder.classes_.shape[0] - 1]
        print(f"Assigned Default ID for OOV User: {encoded_user_id}")
    
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,  # Pass original user ID for logging clarity
        encoded_user_id=encoded_user_id,  # Updated to reflect potential OOV handling
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except Exception as e:
    print(f"An error occurred: {e}")

Original user ID: 35
Encoded user ID: [2]
Encoded user ID [2] not found in encoder. Handling as OOV...
Assigned Default ID for OOV User: [9740]
An error occurred: name 'model' is not defined


In [4]:
try:
    # Debug: Print the user ID before encoding
    print(f"Original user ID: {user_id}")
    
    # Attempt transformation
    encoded_user_id = preprocessor.user_id_encoder.transform([user_id])
    print(f"Encoded user ID: {encoded_user_id}")
    
    # Check if the encoded user ID exists in the encoder's classes
    if encoded_user_id[0] not in preprocessor.user_id_encoder.classes_:
        print(f"Encoded user ID {encoded_user_id} not found in encoder. Handling as OOV...")
        # Handle OOV user (e.g., assign default ID)
        encoded_user_id = [preprocessor.user_id_encoder.classes_.shape[0] - 1]
        print(f"Assigned Default ID for OOV User: {encoded_user_id}")
    
    recommended_items = get_recommendations(
        model=model,
        user_id=user_id,  # Pass original user ID for logging clarity
        # encoded_user_id=encoded_user_id,  // Removed this line
        data_encoded=data_encoded,
        user_id_encoder=preprocessor.user_id_encoder,
        item_encoder=preprocessor.music_id_encoder,
        device=device,
        top_k=top_k
    )
    print(f"Top {top_k} recommendations for user {user_id}:")
    for idx, item in enumerate(recommended_items, start=1):
        print(f"{idx}. {item}")
except Exception as e:
    print(f"An error occurred: {e}")

An error occurred: name 'user_id' is not defined


## 4. Sample Predictions for Users
