In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from recbole.model.general_recommender import NFM

In [None]:
# Define the data path and other parameters
data_path = "https://raw.githubusercontent.com/jabhij/Tag-based-Recommendation-System/main/Dataset/combined_movie_genres_tags.csv?token=GHSAT0AAAAAACND5HXJIC7UJTADDEVZE5H6ZQF7MAQ"
embedding_size = 64
num_clusters = 10  # number of tag clusters  

# Create a dataset object
dataset = RecBole.create_dataset(data_path)

# Preprocess the data (tag clustering or tag count)
def preprocess_tags(tag_data, num_clusters=10, use_tfidf=True):
  """
  Preprocesses tag data for tag-based recommendation.

  Args:
      tag_data (list): List of lists representing tags for each item.
      num_clusters (int, optional): Number of clusters for tag grouping (default: 10).
      use_tfidf (bool, optional): Whether to use TF-IDF weighting (default: True).

  Returns:
      tuple: (tag_embeddings, context_features)
          - tag_embeddings (np.ndarray): Item-tag matrix with TF-IDF weights (if enabled).
          - context_features (np.ndarray): Item-cluster matrix representing tag groups.
  """

  # 1. Vectorize Tags (TF-IDF or simple counts)
  if use_tfidf:
    vectorizer = TfidfVectorizer()
    tag_embeddings = vectorizer.fit_transform([", ".join(tags) for tags in tag_data])  # Join tags with comma
  else:
    tag_embeddings = np.array([[len(tag) for tag in item_tags] for item_tags in tag_data])  # Simple tag count

  # 2. Cluster Tags (if desired)
  if num_clusters > 0:
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)  # Set random state for reproducibility
    kmeans.fit(tag_embeddings)
    cluster_labels = kmeans.labels_.reshape(-1, 1)  # Reshape for concatenation

    # 3. Create Context Features (one-hot encoding by default)
    context_features = np.eye(num_clusters)[cluster_labels]  # One-hot encoding
  else:
    context_features = tag_embeddings  # Use tag embeddings directly if not clustering

  return tag_embeddings, context_features

# Define the model
model = NFM(
    user_num=dataset.user_num,
    item_num=dataset.item_num,
    embedding_size=embedding_size,
)

# Train the model
model.fit(dataset)

In [None]:
# Evaluate the model
from recbole.evaluator import Evaluator

# Define the evaluation metrics (e.g., NDCG, Recall)
metrics = ["NDCG@10", "Recall@20"]

# Create an evaluator object
evaluator = Evaluator(model=model, dataset=dataset, metrics=metrics)

# Evaluate the model
result = evaluator.run()

# Print the evaluation results
print(f"Evaluation Results:")
for metric, score in result.items():
    print(f"\t- {metric}: {score:.4f}")

# ... (use the evaluation results for further analysis)

In [None]:
# Make recommendations
user_id = 100
item_list = model.recommend(user_id)

for item in item_list:
    print(f"Recommended item for user {user_id}: {item}")