In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

### MOVIE recommendation using MAML

In [5]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Load data and prepare genre mapping
def load_data(file_path):
    df = pd.read_csv(file_path)
    
    # Create a unique mapping for genres
    unique_genres = set()
    for genre_list in df['genres'].apply(lambda x: x.split('|')):
        unique_genres.update(genre_list)
    
    genre_to_idx = {genre: idx for idx, genre in enumerate(unique_genres)}
    
    # Map genres to integers and prepare tensors
    user_ids = torch.tensor(df['userId'].values, dtype=torch.long)
    movie_ids = torch.tensor(df['movieId'].values, dtype=torch.long)
    ratings = torch.tensor(df['rating'].values, dtype=torch.float)
    genres = torch.tensor(df['genres'].apply(lambda x: genre_to_idx[x.split('|')[0]]).values, dtype=torch.long)
    
    return user_ids, movie_ids, ratings, genres, genre_to_idx, list(unique_genres)

# Define a custom dataset for the movie recommendation task
class MovieDataset(Dataset):
    def __init__(self, user_ids, movie_ids, ratings, genres):
        self.user_ids = user_ids
        self.movie_ids = movie_ids
        self.ratings = ratings
        self.genres = genres

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx], self.genres[idx]

# Define the MetaRecModel class
class MetaRecModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim, num_genres=None):
        super(MetaRecModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.has_genres = num_genres is not None
        
        if self.has_genres:
            self.genre_embedding = nn.Embedding(num_genres, embedding_dim)
            self.fc = nn.Linear(embedding_dim * 3, 1)  # With genre
        else:
            self.fc = nn.Linear(embedding_dim * 2, 1)  # Without genre

    def forward(self, user_id, item_id, genre_ids=None):
        user_embed = self.user_embedding(user_id)
        item_embed = self.item_embedding(item_id)
    
        if self.has_genres and genre_ids is not None:
            genre_embed = self.genre_embedding(genre_ids.unsqueeze(1)).mean(dim=1)
            x = torch.cat([user_embed, item_embed, genre_embed], dim=-1)
        else:
            x = torch.cat([user_embed, item_embed], dim=-1)
        return self.fc(x).squeeze()

# MAML training loop
def maml_train(model, datasets, inner_lr, outer_lr, inner_steps, outer_steps, batch_size, num_genres=None):
    outer_optimizer = optim.Adam(model.parameters(), lr=outer_lr)
    loss_fn = nn.MSELoss()

    for step in range(outer_steps):
        meta_loss = 0
        for domain_data in datasets:
            fast_model = MetaRecModel(model.user_embedding.num_embeddings, model.item_embedding.num_embeddings, model.user_embedding.embedding_dim, num_genres)
            fast_model.load_state_dict(model.state_dict())
            inner_optimizer = optim.SGD(fast_model.parameters(), lr=inner_lr)
            
            dataloader = DataLoader(domain_data, batch_size=batch_size, shuffle=True)
            for user_ids, movie_ids, ratings, genres in dataloader:
                predictions = fast_model(user_ids, movie_ids, genre_ids=genres)
                loss = loss_fn(predictions, ratings)
                
                inner_optimizer.zero_grad()
                loss.backward()
                inner_optimizer.step()

            meta_loss += loss_fn(fast_model(user_ids, movie_ids, genre_ids=genres), ratings)
        
        outer_optimizer.zero_grad()
        meta_loss.backward()
        outer_optimizer.step()

        print(f'Step {step+1}/{outer_steps}, Meta Loss: {meta_loss.item()}')

# Function to generate recommendations based on user-selected genre
def recommend(model, user_id, preferred_genre=None, genre_to_idx=None, num_recommendations=5):
    item_scores = []
    preferred_genre_id = genre_to_idx.get(preferred_genre) if preferred_genre else None

    for item_id in range(model.item_embedding.num_embeddings):
        genre_id = preferred_genre_id if preferred_genre_id is not None else 0
        score = model(torch.tensor([user_id]), torch.tensor([item_id]), genre_ids=torch.tensor([genre_id]))
        item_scores.append((item_id, score.item()))
    
    top_items = sorted(item_scores, key=lambda x: x[1], reverse=True)[:num_recommendations]
    return [item[0] for item in top_items]

# Load the dataset
file_path = "movie.csv"  # Specify the correct path to your dataset
user_ids, movie_ids, ratings, genres, genre_to_idx, unique_genres = load_data(file_path)

# Prompt user for genre preference
print("Available genres:", unique_genres)
preferred_genre = input("Enter your preferred genre from the above list: ")

# Create the dataset and model
dataset = MovieDataset(user_ids, movie_ids, ratings, genres)
num_users = max(user_ids) + 1
num_items = max(movie_ids) + 1
num_genres = max(genres) + 1
embedding_dim = 32
meta_model = MetaRecModel(num_users, num_items, embedding_dim, num_genres=num_genres)

# MAML training
datasets = [dataset]
maml_train(meta_model, datasets, inner_lr=0.01, outer_lr=0.001, inner_steps=1, outer_steps=5, batch_size=16, num_genres=num_genres)

# Generate recommendations for the specified user and genre
user_id = 2
recommendations = recommend(meta_model, user_id=user_id, preferred_genre=preferred_genre, genre_to_idx=genre_to_idx, num_recommendations=5)
print(f"Top recommendations for user in genre '{preferred_genre}':", recommendations)


Available genres: ['Thriller', 'Western', 'Children', 'Musical', 'Adventure', 'Sci-Fi', 'Comedy', 'Horror', 'Romance', 'Film-Noir', 'Animation', 'Drama', 'Crime', '(no genres listed)', 'Mystery', 'Fantasy', 'Action', 'War', 'IMAX', 'Documentary']


Enter your preferred genre from the above list:  Action


Step 1/5, Meta Loss: 1.6721354722976685
Step 2/5, Meta Loss: 0.9503170251846313
Step 3/5, Meta Loss: 1.4726827144622803
Step 4/5, Meta Loss: 0.6329823732376099
Step 5/5, Meta Loss: 0.43404367566108704
Top recommendations for user in genre 'Action': [57426, 59125, 116022, 32981, 46320]


### MUSIC recommendation using MAML

In [12]:


# Load the dataset and display unique artists
def load_data(file_path):
    df = pd.read_csv(file_path)
    
    # Extract unique artist names
    unique_artists = df['Artist'].unique()
    
    # Display unique artists
    print("Available artists:")
    for artist in unique_artists:
        print(artist)
    
    # Prompt the user to enter an artist name
    selected_artist = input("Enter an artist's name from the list above: ").strip()
    
    if selected_artist not in unique_artists:
        print("Artist not found. Please enter a valid artist name.")
        return None, None, None, None, None, None

    # Filter data based on the selected artist
    artist_data = df[df['Artist'] == selected_artist]
    user_to_idx = {username: idx for idx, username in enumerate(df['Username'].unique())}
    track_to_idx = {track: idx for idx, track in enumerate(df['Track'].unique())}

    user_ids = torch.tensor(artist_data['Username'].map(user_to_idx).values, dtype=torch.long)
    track_ids = torch.tensor(artist_data['Track'].map(track_to_idx).values, dtype=torch.long)
    ratings = torch.tensor(artist_data['rating'].values, dtype=torch.float)

    return user_ids, track_ids, ratings, user_to_idx, track_to_idx, selected_artist

# Define a custom dataset for the music recommendation task
class MusicDataset(Dataset):
    def __init__(self, user_ids, track_ids, ratings):
        self.user_ids = user_ids
        self.track_ids = track_ids
        self.ratings = ratings

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.track_ids[idx], self.ratings[idx]

# Model definition
class RecModel(nn.Module):
    def __init__(self, num_users, num_tracks, embedding_dim):
        super(RecModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.track_embedding = nn.Embedding(num_tracks, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)

    def forward(self, user_id, track_id):
        user_embed = self.user_embedding(user_id)
        track_embed = self.track_embedding(track_id)
        
        x = torch.cat([user_embed, track_embed], dim=-1)
        return self.fc(x).squeeze()

# Training function
def train_model(model, dataset, epochs, lr, batch_size):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        total_loss = 0
        for user_ids, track_ids, ratings in dataloader:
            predictions = model(user_ids, track_ids)
            loss = loss_fn(predictions, ratings)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader)}')

# Function to generate recommendations for a specific artist
def recommend(model, track_to_idx, selected_artist, num_recommendations=5):
    track_scores = []
    for track, track_idx in track_to_idx.items():
        track_id_tensor = torch.tensor([track_idx], dtype=torch.long)
        
        # We can assume a default user_id (e.g., 0) for recommendation purposes
        user_id_tensor = torch.tensor([0], dtype=torch.long)
        
        score = model(user_id_tensor, track_id_tensor)
        track_scores.append((track, score.item()))
    
    # Get the top recommended tracks
    top_tracks = sorted(track_scores, key=lambda x: x[1], reverse=True)[:num_recommendations]
    return [track[0] for track in top_tracks]

# Example usage
file_path = "C://Users//91829//Downloads//dataset_zip//dataset//Last.fm_data.csv"
user_ids, track_ids, ratings, user_to_idx, track_to_idx, selected_artist = load_data(file_path)

if user_ids is not None:
    # Create the dataset
    dataset = MusicDataset(user_ids, track_ids, ratings)

    # Initialize the model
    embedding_dim = 32
    num_users = len(user_to_idx)
    num_tracks = len(track_to_idx)
    model = RecModel(num_users, num_tracks, embedding_dim)

    # Train the model
    train_model(model, dataset, epochs=1, lr=0.001, batch_size=16)

    # Generate recommendations
    recommendations = recommend(model, track_to_idx, selected_artist, num_recommendations=5)
    print(f"Top recommendations for tracks by {selected_artist}:", recommendations)


Available artists:
Isobel Campbell
The Coral
Gidge
LNZNDRF
Goat Girl
Everything Everything
serpentwithfeet
Giggs
The Koreatown Oddity
Billy Ocean
Pale Waves
Khruangbin
Bill Callahan
Clean Bandit
Rod Thomas
Fela Kuti
Jellyfish
The Box Tops
Rudimental
Kacey Musgraves
Florrie
ROY3LS
Black M
Machel Montano
LÉON
Nelson Freitas
Lonely The Brave
Charlie Sloth
Karen Harding
Future Utopia
Stephen Oaks
Sonny Fodera
Röyksopp
R3hab
Aluna
Rat City
Pink Sweat$
Elliphant
Rebecca Black
Crafty 893
Anitta
Maluma
Imelda May
Lil Peep
araabMUZIK
Gary Bartz
Gentleman's Dub Club
The Smashing Pumpkins
Wu-Tang Clan
Salt-N-Pepa
Felt
Roméo Elvis
Nick Cave & the Bad Seeds
Fat Joe
Dr. Dre
Cage & Camu
Postman
Starflam
LL Cool J
Rodríguez
Mr. Vegas
Ghostface Killah
Groundation
Akhenaton
Stephen Marley
Fashawn
Fantan Mojah
Chimurenga Renaissance
Emika
Sizzla
The Uncluded
The Aggrolites
Alpha 5.20
FKA twigs
Jah Cure
Andrés Cotter
Stikstof
Romain Virgo
Guru
SOJA
Daks
Loyle Carner
Vicious
Buraka Som Sistema
Damian Marle

Enter an artist's name from the list above:  Anna Domino


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/1, Loss: 34.02828598022461
Top recommendations for tracks by Anna Domino: ['Juliet', 'In This Moment', 'In the Moment - Monrroe Remix', 'attitude', "Gary's Theme - Remastered"]


### BOOK recommendation using MAML

In [13]:

# Load the dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    
    # Ensure that 'average_rating' is a float, handling non-numeric values
    df['average_rating'] = pd.to_numeric(df['average_rating'], errors='coerce')
    df['average_rating'] = df['average_rating'].fillna(df['average_rating'].mean())  # Fill NaNs with mean rating
    
    # Create unique mappings for users and books
    user_to_idx = {user_id: idx for idx, user_id in enumerate(df['userId'].unique())}
    book_to_idx = {book_id: idx for idx, book_id in enumerate(df['bookID'].unique())}
    
    # Map users and books to indices
    user_ids = torch.tensor(df['userId'].map(user_to_idx).values, dtype=torch.long)
    book_ids = torch.tensor(df['bookID'].map(book_to_idx).values, dtype=torch.long)
    ratings = torch.tensor(df['average_rating'].values, dtype=torch.float)
    
    return user_ids, book_ids, ratings, user_to_idx, book_to_idx, df

# Define custom Dataset class
class BookDataset(Dataset):
    def __init__(self, user_ids, book_ids, ratings):
        self.user_ids = user_ids
        self.book_ids = book_ids
        self.ratings = ratings

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.book_ids[idx], self.ratings[idx]

# Model definition
class RecModel(nn.Module):
    def __init__(self, num_users, num_books, embedding_dim):
        super(RecModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.book_embedding = nn.Embedding(num_books, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)  # Concatenate user and book embeddings

    def forward(self, user_id, book_id):
        user_embed = self.user_embedding(user_id)
        book_embed = self.book_embedding(book_id)
        
        x = torch.cat([user_embed, book_embed], dim=-1)
        return self.fc(x).squeeze()

# Training function
def train_model(model, dataset, epochs, lr, batch_size):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        total_loss = 0
        for user_ids, book_ids, ratings in dataloader:
            predictions = model(user_ids, book_ids)
            loss = loss_fn(predictions, ratings)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader)}')

# Function to generate recommendations based on user-defined rating threshold
def recommend(model, user_id, book_to_idx, df, rating_threshold=4.0, num_recommendations=5):
    book_scores = []
    for book, book_idx in book_to_idx.items():
        book_id_tensor = torch.tensor([book_idx], dtype=torch.long)
        user_id_tensor = torch.tensor([user_id], dtype=torch.long)
        
        score = model(user_id_tensor, book_id_tensor)
        book_scores.append((book, score.item()))
    
    # Filter books that meet or exceed the rating threshold
    recommended_books = [(book, score) for book, score in book_scores if score >= rating_threshold]
    
    # Sort and get top recommended books
    top_books = sorted(recommended_books, key=lambda x: x[1], reverse=True)[:num_recommendations]
    
    # Retrieve the book titles from the original dataframe
    top_book_titles = df[df['bookID'].isin([book[0] for book in top_books])]['title'].tolist()
    return top_book_titles

# Example usage
file_path = "C:\\Users\\91829\\Downloads\\dataset_zip\\dataset\\Book.csv"  # Path to your dataset file
user_ids, book_ids, ratings, user_to_idx, book_to_idx, df = load_data(file_path)

# Create dataset
dataset = BookDataset(user_ids, book_ids, ratings)

# Initialize the model
embedding_dim = 32
num_users = len(user_to_idx)
num_books = len(book_to_idx)
model = RecModel(num_users, num_books, embedding_dim)

# Train the model
train_model(model, dataset, epochs=5, lr=0.001, batch_size=16)

# Prompt user for rating threshold and recommend books
rating_threshold = float(input("Enter your preferred minimum average rating (e.g., 4.0): "))
user_id = user_to_idx[1]  # Assuming user with ID 1
recommendations = recommend(model, user_id, book_to_idx, df, rating_threshold=rating_threshold, num_recommendations=5)

print(f"Top recommendations for user with a minimum rating of {rating_threshold}:", recommendations)


Epoch 1/5, Loss: 6.310076052751178
Epoch 2/5, Loss: 0.7211456100895316
Epoch 3/5, Loss: 0.21767909295492988
Epoch 4/5, Loss: 0.15106595597690206
Epoch 5/5, Loss: 0.13186917024471895


Enter your preferred minimum average rating (e.g., 4.0):  4.0


Top recommendations for user with a minimum rating of 4.0: ['A Book of Common Prayer', 'The Assignation: Stories', 'Goddess of the Sea (Goddess Summoning  #1)', "Sunny Chandler's Return", 'Practical DV Filmmaking']


### BOOK recommendation using REPTILE

In [20]:
# Load your dataset from a CSV file
df = pd.read_csv('book.csv')  # Replace 'your_dataset.csv' with your actual file name

# Create user and book index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['userId'].unique())}
book_to_idx = {book: idx for idx, book in enumerate(df['bookID'].unique())}

# Recommender Model Definition
class RecommenderModel(nn.Module):
    def __init__(self, n_users, n_books, embedding_dim=8):
        super(RecommenderModel, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.book_embedding = nn.Embedding(n_books, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)  # Output layer for rating prediction

    def forward(self, user_ids, book_ids):
        user_embeds = self.user_embedding(user_ids)
        book_embeds = self.book_embedding(book_ids)
        concatenated = torch.cat([user_embeds, book_embeds], dim=-1)
        return self.fc(concatenated)

# Reptile Class Definition
class Reptile:
    def __init__(self, model, inner_lr, inner_steps):
        self.model = model
        self.inner_lr = inner_lr
        self.inner_steps = inner_steps

    def inner_update(self, task_data, task_labels):
        user_ids, book_ids = task_data  # Unpack the task data
        # Create a new instance of the model for inner update
        task_model = RecommenderModel(n_users, n_books)
        task_model.load_state_dict(self.model.state_dict())
        optimizer = optim.SGD(task_model.parameters(), lr=self.inner_lr)

        for _ in range(self.inner_steps):
            # Forward pass and loss calculation
            predictions = task_model(user_ids, book_ids)
            loss = nn.MSELoss()(predictions.squeeze(), task_labels.float())  # Adjust to squeeze for loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return task_model

    def train(self, tasks):
        batch_task_models = []
        for task_data, task_labels in tasks:
            task_model = self.inner_update(task_data, task_labels)
            batch_task_models.append(task_model)

        # Here you can implement the meta-update logic for model aggregation across tasks

# Sample task sampling function
def sample_task(df, user_to_idx, book_to_idx, task_size=5):
    """Sample a task of user and book pairs."""
    sampled_users = df['userId'].sample(task_size).map(user_to_idx).astype(int)  # Ensure integer type
    
    # Sample books
    sampled_books = df['bookID'].sample(task_size).map(book_to_idx)
    sampled_books = pd.to_numeric(sampled_books, errors='coerce')  # Convert to numeric, setting non-convertible values to NaN
    sampled_books = sampled_books.dropna().astype(int)  # Drop NaN values and convert to integer

   

    # Ensure that the index of sampled_books corresponds to df
    if len(sampled_users) != len(sampled_books):
        print("Warning: Mismatch in number of users and books!")
        return None, None  # Or handle the size mismatch according to your logic

    # Get labels and convert to numeric
    labels = df.loc[sampled_books.index, 'average_rating'].values
    labels = pd.to_numeric(labels, errors='coerce')  # Convert to numeric

    # Use NumPy to filter out NaN values
    labels = labels[~np.isnan(labels)]  # Keep only non-NaN labels

    # Convert to PyTorch tensors
    try:
        user_tensor = torch.tensor(sampled_users.values, dtype=torch.long)
        book_tensor = torch.tensor(sampled_books.values, dtype=torch.long)
        labels_tensor = torch.tensor(labels, dtype=torch.float32)
    except Exception as e:
        
        raise e  # Re-raise the exception to stop execution

    return (user_tensor, book_tensor), labels_tensor



# Hyperparameters
num_epochs = 10
tasks_per_epoch = 5
task_size = 5

# Initialize model
n_users = len(user_to_idx)
n_books = len(book_to_idx)
model = RecommenderModel(n_users, n_books)

# Set hyperparameters for Reptile
inner_lr = 0.01  # Inner learning rate
inner_steps = 5  # Inner update steps

# Initialize the Reptile instance
reptile = Reptile(model, inner_lr, inner_steps)

# Training loop
for epoch in range(num_epochs):
    tasks = []
    for _ in range(tasks_per_epoch):
        task_data, task_labels = sample_task(df, user_to_idx, book_to_idx, task_size=task_size)
        tasks.append((task_data, task_labels))
    
    reptile.train(tasks)
    print(f"Epoch {epoch + 1}/{num_epochs} completed.")

# Function to recommend books for a specific user
# Function to recommend books for a specific user based on a rating threshold
# Ensure 'average_rating' is converted to a float and replace non-numeric values with NaN
df['average_rating'] = pd.to_numeric(df['average_rating'], errors='coerce')
df['average_rating'] = df['average_rating'].fillna(df['average_rating'].mean())  # Fill NaNs with the mean rating

# Function to recommend books for a specific user based on a rating threshold
def recommend_books(user_id, min_avg_rating=3.5, n_recommendations=3):
    # Convert user_id to internal index
    user_idx = user_to_idx.get(user_id)
    if user_idx is None:
        print("User ID not found in dataset.")
        return None

    # Filter books based on the average rating threshold
    high_rating_books = df[df['average_rating'] >= min_avg_rating]
    if high_rating_books.empty:
        print(f"No books found with an average rating of {min_avg_rating} or higher.")
        return None

    # Map book IDs to indices and keep only those with valid book indices
    valid_books = high_rating_books['bookID'].apply(lambda book: book_to_idx.get(book)).dropna().astype(int)
    book_indices = torch.tensor(valid_books.values, dtype=torch.long)
    
    # Prepare user input tensor with the same size as book_indices
    user_input = torch.full((book_indices.size(0),), user_idx, dtype=torch.long)
    
    # Predict ratings for each book
    with torch.no_grad():
        predictions = model(user_input, book_indices).squeeze()
    
    # Sort and select top N recommendations
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    recommended_books = high_rating_books.iloc[top_indices.numpy()]
    
    return recommended_books[['bookID', 'title', 'average_rating']]

# Example usage:
min_avg_rating = float(input("Enter the minimum average rating for recommendations: "))
recommended_books = recommend_books(user_id=1, min_avg_rating=min_avg_rating, n_recommendations=3)
print("\nRecommended Books based on rating:")
print(recommended_books)


Epoch 1/10 completed.
Epoch 2/10 completed.
Epoch 3/10 completed.
Epoch 4/10 completed.
Epoch 5/10 completed.
Epoch 6/10 completed.
Epoch 7/10 completed.
Epoch 8/10 completed.
Epoch 9/10 completed.
Epoch 10/10 completed.


Enter the minimum average rating for recommendations:  4



Recommended Books based on rating:
      bookID                                              title  \
8856   34247                 200 Quilt Blocks: To Mix and Match   
3106   11431       The Red Gloves Collection (Red Gloves  #1-4)   
481     1533  The Suppliant Maidens/The Persians/Seven again...   

      average_rating  
8856            4.09  
3106            4.49  
481             4.10  


### MUSIC recommendation using REPTILE

In [21]:
# Load your dataset from a CSV file
df = pd.read_csv("C://Users//91829//Downloads//dataset_zip//dataset//Last.fm_data.csv")  # Replace 'music.csv' with your actual file name

# Create user and track index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['Username'].unique())}
track_to_idx = {track: idx for idx, track in enumerate(df['Track'].unique())}

# Recommender Model Definition
class RecommenderModel(nn.Module):
    def __init__(self, n_users, n_tracks, embedding_dim=8):
        super(RecommenderModel, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.track_embedding = nn.Embedding(n_tracks, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)  # Output layer for rating prediction

    def forward(self, user_ids, track_ids):
        user_embeds = self.user_embedding(user_ids)
        track_embeds = self.track_embedding(track_ids)
        concatenated = torch.cat([user_embeds, track_embeds], dim=-1)
        return self.fc(concatenated)

# Reptile Class Definition
class Reptile:
    def __init__(self, model, inner_lr, inner_steps):
        self.model = model
        self.inner_lr = inner_lr
        self.inner_steps = inner_steps

    def inner_update(self, task_data, task_labels):
        user_ids, track_ids = task_data  # Unpack the task data
        # Create a new instance of the model for inner update
        task_model = RecommenderModel(n_users, n_tracks)
        task_model.load_state_dict(self.model.state_dict())
        optimizer = optim.SGD(task_model.parameters(), lr=self.inner_lr)

        for _ in range(self.inner_steps):
            # Forward pass and loss calculation
            predictions = task_model(user_ids, track_ids)
            loss = nn.MSELoss()(predictions.squeeze(), task_labels.float())  # Adjust to squeeze for loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return task_model

    def train(self, tasks):
        for task_data, task_labels in tasks:
            task_model = self.inner_update(task_data, task_labels)

# Sample task sampling function
def sample_task(df, user_to_idx, track_to_idx, task_size=5):
    """Sample a task of user and track pairs."""
    sampled_users = df['Username'].sample(task_size).map(user_to_idx).astype(int)
    sampled_tracks = df['Track'].sample(task_size).map(track_to_idx).astype(int)

    # Ensure that the sampled indices are valid
    sampled_users = sampled_users[~sampled_users.isna()]
    sampled_tracks = sampled_tracks[~sampled_tracks.isna()]

    # Get labels
    labels = df.loc[sampled_users.index, 'rating'].values
    labels = pd.to_numeric(labels, errors='coerce')
    labels = labels[~np.isnan(labels)]

    if len(sampled_users) != len(sampled_tracks) or len(sampled_users) != len(labels):
        print("Warning: Mismatch in number of users, tracks, and labels!")
        return None, None  # Or handle the size mismatch according to your logic

    # Convert to PyTorch tensors
    user_tensor = torch.tensor(sampled_users.values, dtype=torch.long)
    track_tensor = torch.tensor(sampled_tracks.values, dtype=torch.long)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)

    return (user_tensor, track_tensor), labels_tensor

# Hyperparameters
num_epochs = 10
tasks_per_epoch = 5
task_size = 5

# Initialize model
n_users = len(user_to_idx)
n_tracks = len(track_to_idx)
model = RecommenderModel(n_users, n_tracks)

# Set hyperparameters for Reptile
inner_lr = 0.01  # Inner learning rate
inner_steps = 5  # Inner update steps

# Initialize the Reptile instance
reptile = Reptile(model, inner_lr, inner_steps)

# Training loop
for epoch in range(num_epochs):
    tasks = []
    for _ in range(tasks_per_epoch):
        task_data, task_labels = sample_task(df, user_to_idx, track_to_idx, task_size=task_size)
        if task_data is not None and task_labels is not None:
            tasks.append((task_data, task_labels))
    
    if tasks:
        reptile.train(tasks)
        print(f"Epoch {epoch + 1}/{num_epochs} completed.")

# Function to recommend tracks for a specific user
def recommend_tracks(username, n_recommendations=3):
    user_idx = user_to_idx[username]
    # Get all track indices
    track_indices = torch.tensor(list(track_to_idx.values()))
    # Prepare the user input
    user_input = torch.full((track_indices.size(0),), user_idx)
    # Get predictions
    with torch.no_grad():
        predictions = model(user_input, track_indices).squeeze()
    # Get top n recommendations
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    recommended_tracks = df.iloc[top_indices.numpy()]
    return recommended_tracks[['Track', 'Artist', 'Album', 'rating']]

# Example of recommending tracks for a user
recommended_tracks = recommend_tracks('Babs_05')
print("\nRecommended Tracks for Babs_05:")
print(recommended_tracks)


Unique artists available:
1. Isobel Campbell
2. The Coral
3. Gidge
4. LNZNDRF
5. Goat Girl
6. Everything Everything
7. serpentwithfeet
8. Giggs
9. The Koreatown Oddity
10. Billy Ocean
11. Pale Waves
12. Khruangbin
13. Bill Callahan
14. Clean Bandit
15. Rod Thomas
16. Fela Kuti
17. Jellyfish
18. The Box Tops
19. Rudimental
20. Kacey Musgraves
21. Florrie
22. ROY3LS
23. Black M
24. Machel Montano
25. LÉON
26. Nelson Freitas
27. Lonely The Brave
28. Charlie Sloth
29. Karen Harding
30. Future Utopia
31. Stephen Oaks
32. Sonny Fodera
33. Röyksopp
34. R3hab
35. Aluna
36. Rat City
37. Pink Sweat$
38. Elliphant
39. Rebecca Black
40. Crafty 893
41. Anitta
42. Maluma
43. Imelda May
44. Lil Peep
45. araabMUZIK
46. Gary Bartz
47. Gentleman's Dub Club
48. The Smashing Pumpkins
49. Wu-Tang Clan
50. Salt-N-Pepa
51. Felt
52. Roméo Elvis
53. Nick Cave & the Bad Seeds
54. Fat Joe
55. Dr. Dre
56. Cage & Camu
57. Postman
58. Starflam
59. LL Cool J
60. Rodríguez
61. Mr. Vegas
62. Ghostface Killah
63. Groun

Enter the name of the artist from the list above:  Timo Lassy


IndexError: index out of range in self

### MOVIE recommendation using REPTILE

In [60]:
# Load your dataset from a CSV file
df = pd.read_csv("C://Users//91829//Downloads//dataset_zip//dataset//movie.csv")  # Update with your actual file name

# Check the columns in the DataFrame
print("Columns in the DataFrame:", df.columns.tolist())

# Create user and movie index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['userId'].unique())}
movie_to_idx = {movie_id: idx for idx, movie_id in enumerate(df['movieId'].unique())}

# Recommender Model Definition
class RecommenderModel(nn.Module):
    def __init__(self, n_users, n_movies, embedding_dim=8):
        super(RecommenderModel, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)  # Output layer for rating prediction

    def forward(self, user_ids, movie_ids):
        user_embeds = self.user_embedding(user_ids)
        movie_embeds = self.movie_embedding(movie_ids)
        concatenated = torch.cat([user_embeds, movie_embeds], dim=-1)
        return self.fc(concatenated)

# Reptile Class Definition
class Reptile:
    def __init__(self, model, inner_lr, inner_steps):
        self.model = model
        self.inner_lr = inner_lr
        self.inner_steps = inner_steps

    def inner_update(self, task_data, task_labels):
        user_ids, movie_ids = task_data  # Unpack the task data
        task_model = RecommenderModel(n_users, n_movies)
        task_model.load_state_dict(self.model.state_dict())
        optimizer = optim.SGD(task_model.parameters(), lr=self.inner_lr)

        for _ in range(self.inner_steps):
            predictions = task_model(user_ids, movie_ids)
            loss = nn.MSELoss()(predictions.squeeze(), task_labels.float())  # Adjust to squeeze for loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return task_model

    def train(self, tasks):
        for task_data, task_labels in tasks:
            task_model = self.inner_update(task_data, task_labels)

# Sample task sampling function
def sample_task(df, user_to_idx, movie_to_idx, task_size=5):
    """Sample a task of user and movie pairs."""
    sampled_users = df['userId'].sample(task_size).map(user_to_idx).astype(int)
    sampled_movies = df['movieId'].sample(task_size).map(movie_to_idx).astype(int)

    # Ensure that the sampled indices are valid
    sampled_users = sampled_users[~sampled_users.isna()]
    sampled_movies = sampled_movies[~sampled_movies.isna()]

    # Get labels (ratings)
    labels = df.loc[sampled_users.index, 'rating'].values
    labels = pd.to_numeric(labels, errors='coerce')
    labels = labels[~np.isnan(labels)]

    if len(sampled_users) != len(sampled_movies) or len(sampled_users) != len(labels):
        print("Warning: Mismatch in number of users, movies, and labels!")
        return None, None

    # Convert to PyTorch tensors
    user_tensor = torch.tensor(sampled_users.values, dtype=torch.long)
    movie_tensor = torch.tensor(sampled_movies.values, dtype=torch.long)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)

    return (user_tensor, movie_tensor), labels_tensor

# Hyperparameters
num_epochs = 10
tasks_per_epoch = 5
task_size = 5

# Initialize model
n_users = len(user_to_idx)
n_movies = len(movie_to_idx)
model = RecommenderModel(n_users, n_movies)

# Set hyperparameters for Reptile
inner_lr = 0.01  # Inner learning rate
inner_steps = 5  # Inner update steps

# Initialize the Reptile instance
reptile = Reptile(model, inner_lr, inner_steps)

# Training loop
for epoch in range(num_epochs):
    tasks = []
    for _ in range(tasks_per_epoch):
        task_data, task_labels = sample_task(df, user_to_idx, movie_to_idx, task_size=task_size)
        if task_data is not None and task_labels is not None:
            tasks.append((task_data, task_labels))
    
    if tasks:
        reptile.train(tasks)
        print(f"Epoch {epoch + 1}/{num_epochs} completed.")

# Function to recommend movies for a specific user
# Function to recommend movies for a specific user
def recommend_movies(user_id, n_recommendations=3):
    if user_id not in user_to_idx:
        print(f"User ID {user_id} not found.")
        return pd.DataFrame()  # Return empty DataFrame if user not found

    user_idx = user_to_idx[user_id]
    # Get all movie indices
    movie_indices = torch.tensor(list(movie_to_idx.values()))
    # Prepare the user input
    user_input = torch.full((movie_indices.size(0),), user_idx)
    
    # Get predictions
    with torch.no_grad():
        predictions = model(user_input, movie_indices).squeeze()
    
    # Get top n recommendations
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    
    # Get recommended movieIds
    recommended_movie_ids = movie_indices[top_indices.numpy()]
    
    # Match movieIds back to the original DataFrame
    recommended_movies = df[df['movieId'].isin(recommended_movie_ids.numpy())]
    
    if recommended_movies.empty:
        print("No recommended movies found for User", user_id)
    return recommended_movies[['movieId', 'rating']]  # Change to desired columns

# Example of recommending movies for a user
recommended_movies = recommend_movies(3)  # Replace with the userId you want to recommend movies for
print("\nRecommended Movies for User 1:")
print(recommended_movies)



Columns in the DataFrame: ['userId', 'movieId', 'rating', 'genres']
Epoch 1/10 completed.
Epoch 2/10 completed.
Epoch 3/10 completed.
Epoch 4/10 completed.
Epoch 5/10 completed.
Epoch 6/10 completed.
Epoch 7/10 completed.
Epoch 8/10 completed.
Epoch 9/10 completed.
Epoch 10/10 completed.

Recommended Movies for User 1:
       movieId  rating
805       4700     3.0
1939      4700     3.0
2972      4033     4.0
3569      4700     4.0
7187      4700     3.0
10080     4033     3.0
10563     4700     3.0
11390     4700     4.0
17970     4033     3.0
18092     4033     3.0
18132     4033     3.0
21438     4033     4.0
21706     4700     3.0
25707     4700     3.0


### MOVIE recommendation using NCF

In [1]:
# Load your dataset from a CSV file
df = pd.read_csv("C://Users//91829//Downloads//dataset_zip//dataset//movie.csv")  # Update with your actual file name

# Check the columns in the DataFrame
print("Columns in the DataFrame:", df.columns.tolist())

# Create user and movie index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['userId'].unique())}
movie_to_idx = {movie_id: idx for idx, movie_id in enumerate(df['movieId'].unique())}

# Neural Collaborative Filtering Model Definition
class NCFModel(nn.Module):
    def __init__(self, n_users, n_movies, embedding_dim=8, hidden_layers=[64, 32, 16, 8]):
        super(NCFModel, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
        
        # Define a feed-forward neural network
        layers = []
        input_dim = embedding_dim * 2
        for output_dim in hidden_layers:
            layers.append(nn.Linear(input_dim, output_dim))
            layers.append(nn.ReLU())
            input_dim = output_dim
        layers.append(nn.Linear(input_dim, 1))  # Final output layer for rating prediction
        self.fc_layers = nn.Sequential(*layers)

    def forward(self, user_ids, movie_ids):
        user_embeds = self.user_embedding(user_ids)
        movie_embeds = self.movie_embedding(movie_ids)
        concatenated = torch.cat([user_embeds, movie_embeds], dim=-1)
        return self.fc_layers(concatenated)

# Training the NCF Model
def train_ncf_model(model, df, user_to_idx, movie_to_idx, num_epochs=10, lr=0.001, batch_size=32):
    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Convert data into torch tensors
    user_ids = torch.tensor(df['userId'].map(user_to_idx).values, dtype=torch.long)
    movie_ids = torch.tensor(df['movieId'].map(movie_to_idx).values, dtype=torch.long)
    ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for i in range(0, len(user_ids), batch_size):
            user_batch = user_ids[i:i+batch_size]
            movie_batch = movie_ids[i:i+batch_size]
            ratings_batch = ratings[i:i+batch_size]

            # Forward pass
            predictions = model(user_batch, movie_batch).squeeze()
            loss = criterion(predictions, ratings_batch)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / (len(user_ids) // batch_size)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")

# Initialize model
n_users = len(user_to_idx)
n_movies = len(movie_to_idx)
embedding_dim = 8
hidden_layers = [64, 32, 16, 8]  # Define the layers for the NCF model
model = NCFModel(n_users, n_movies, embedding_dim=embedding_dim, hidden_layers=hidden_layers)

# Train the NCF model
train_ncf_model(model, df, user_to_idx, movie_to_idx, num_epochs=10, lr=0.001, batch_size=32)

# Function to recommend movies for a specific user using the trained NCF model
def recommend_movies_ncf(user_id, model, n_recommendations=3):
    if user_id not in user_to_idx:
        print(f"User ID {user_id} not found.")
        return pd.DataFrame()  # Return empty DataFrame if user not found

    user_idx = user_to_idx[user_id]
    # Get all movie indices
    movie_indices = torch.tensor(list(movie_to_idx.values()))
    # Prepare the user input
    user_input = torch.full((movie_indices.size(0),), user_idx)
    
    # Get predictions
    model.eval()
    with torch.no_grad():
        predictions = model(user_input, movie_indices).squeeze()
    
    # Get top n recommendations
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    
    # Get recommended movieIds
    recommended_movie_ids = movie_indices[top_indices.numpy()]
    
    # Match movieIds back to the original DataFrame
    recommended_movies = df[df['movieId'].isin(recommended_movie_ids.numpy())]
    
    if recommended_movies.empty:
        print("No recommended movies found for User", user_id)
    return recommended_movies[['movieId', 'rating']]  # Change to desired columns

# Example of recommending movies for a user
recommended_movies_ncf = recommend_movies_ncf(3, model)  # Replace with the userId you want to recommend movies for
print("\nRecommended Movies for User 1:")
print(recommended_movies_ncf)


Columns in the DataFrame: ['userId', 'movieId', 'rating', 'genres']
Epoch 1/10, Loss: 2.3078
Epoch 2/10, Loss: 1.2557
Epoch 3/10, Loss: 1.1494
Epoch 4/10, Loss: 1.0875
Epoch 5/10, Loss: 1.0339
Epoch 6/10, Loss: 0.9833
Epoch 7/10, Loss: 0.9354
Epoch 8/10, Loss: 0.8934
Epoch 9/10, Loss: 0.8554
Epoch 10/10, Loss: 0.8226

Recommended Movies for User 1:
       movieId  rating
181        266     5.0
840        266     5.0
1522       266     5.0
2575       266     5.0
3184       266     3.0
3232       266     4.0
3302       266     1.0
3394       266     4.0
4647      2186     3.5
5180       266     4.0
6159       266     4.0
6382       266     4.0
6679      2186     5.0
7693       266     4.0
9191       266     1.0
9665       266     3.5
10376      266     3.5
12065     2186     3.0
12490     2186     3.0
14421      266     3.0
15503      266     4.0
18479      266     4.0
18709     2186     4.0
19960      266     5.0
22031      266     5.0
22406      266     2.0
22611     2186     3.5
22910

### MUSIC recommendation using NCF

In [10]:
# Load your dataset
df = pd.read_csv("C://Users//91829//Downloads//dataset_zip//dataset//Last.fm_data.csv")

# Create user and track index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['Username'].unique())}
track_to_idx = {track: idx for idx, track in enumerate(df['Track'].unique())}

# Define Neural Collaborative Filtering Model
class NCFModel(nn.Module):
    def __init__(self, n_users, n_tracks, embedding_dim=16, hidden_dims=[64, 32, 16]):
        super(NCFModel, self).__init__()
        # Embeddings for users and tracks
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.track_embedding = nn.Embedding(n_tracks, embedding_dim)

        # Define fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, hidden_dims[0]),
            nn.ReLU(),
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(),
            nn.Linear(hidden_dims[1], hidden_dims[2]),
            nn.ReLU()
        )
        
        # Final layer to output a single rating prediction
        self.output_layer = nn.Linear(hidden_dims[2], 1)

    def forward(self, user_ids, track_ids):
        user_embeds = self.user_embedding(user_ids)
        track_embeds = self.track_embedding(track_ids)
        # Concatenate user and track embeddings
        x = torch.cat([user_embeds, track_embeds], dim=-1)
        # Pass through fully connected layers
        x = self.fc_layers(x)
        return self.output_layer(x).squeeze()  # Output single rating prediction

# Sample task sampling function
def sample_task(df, user_to_idx, track_to_idx, task_size=5):
    """Sample a task of user and track pairs."""
    sampled_users = df['Username'].sample(task_size).map(user_to_idx).astype(int)
    sampled_tracks = df['Track'].sample(task_size).map(track_to_idx).astype(int)

    # Ensure that the sampled indices are valid
    sampled_users = sampled_users[~sampled_users.isna()]
    sampled_tracks = sampled_tracks[~sampled_tracks.isna()]

    # Get labels
    labels = df.loc[sampled_users.index, 'rating'].values
    labels = pd.to_numeric(labels, errors='coerce')
    labels = labels[~np.isnan(labels)]

    if len(sampled_users) != len(sampled_tracks) or len(sampled_users) != len(labels):
        print("Warning: Mismatch in number of users, tracks, and labels!")
        return None, None  # Or handle the size mismatch according to your logic

    # Convert to PyTorch tensors
    user_tensor = torch.tensor(sampled_users.values, dtype=torch.long)
    track_tensor = torch.tensor(sampled_tracks.values, dtype=torch.long)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)

    return (user_tensor, track_tensor), labels_tensor

# Hyperparameters
embedding_dim = 16
num_epochs = 10
tasks_per_epoch = 5
task_size = 5

# Initialize model
n_users = len(user_to_idx)
n_tracks = len(track_to_idx)
model = NCFModel(n_users, n_tracks, embedding_dim=embedding_dim)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.MSELoss()

# Training loop
for epoch in range(num_epochs):
    total_loss = 0
    for _ in range(tasks_per_epoch):
        task_data, task_labels = sample_task(df, user_to_idx, track_to_idx, task_size=task_size)
        if task_data is None or task_labels is None:
            continue
        
        user_ids, track_ids = task_data
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(user_ids, track_ids)
        loss = loss_function(predictions, task_labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / tasks_per_epoch
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

# Function to recommend tracks for a specific user
def recommend_tracks(username, n_recommendations=3):
    user_idx = user_to_idx[username]
    # Get all track indices
    track_indices = torch.tensor(list(track_to_idx.values()))
    # Prepare the user input
    user_input = torch.full((track_indices.size(0),), user_idx, dtype=torch.long)
    
    # Get predictions
    with torch.no_grad():
        predictions = model(user_input, track_indices).squeeze()
    
    # Get top n recommendations
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    recommended_track_ids = [list(track_to_idx.keys())[i] for i in top_indices]
    
    # Filter recommended tracks from DataFrame
    recommended_tracks = df[df['Track'].isin(recommended_track_ids)]
    return recommended_tracks[['Track', 'Artist', 'Album', 'rating']]

# Example of recommending tracks for a user
recommended_tracks = recommend_tracks('Babs_05')
print("\nRecommended Tracks for Babs_05:")
print(recommended_tracks)


Epoch 1/10, Average Loss: 10.8480
Epoch 2/10, Average Loss: 14.0929
Epoch 3/10, Average Loss: 12.3185
Epoch 4/10, Average Loss: 13.1272
Epoch 5/10, Average Loss: 12.0197
Epoch 6/10, Average Loss: 9.2109
Epoch 7/10, Average Loss: 11.7861
Epoch 8/10, Average Loss: 11.9006
Epoch 9/10, Average Loss: 12.4079
Epoch 10/10, Average Loss: 10.9385

Recommended Tracks for Babs_05:
                                                Track             Artist  \
16564   Please, Please, Please Let Me Get What I Want         The Smiths   
131273  Please, Please, Please Let Me Get What I Want         The Smiths   
142691                          Citizen of the Planet  Alanis Morissette   
143058                                The Perfect One             16volt   

                                   Album  rating  
16564                  Louder Than Bombs     4.0  
131273                 Louder Than Bombs     3.0  
142691  Flavors Of Entanglement (Deluxe)     3.0  
143058                The Negative Space  

### BOOK recommendation using NCF

In [11]:
# Load your dataset from a CSV file
df = pd.read_csv('book.csv')  # Replace with your actual file name

# Create user and book index mappings
user_to_idx = {user: idx for idx, user in enumerate(df['userId'].unique())}
book_to_idx = {book: idx for idx, book in enumerate(df['bookID'].unique())}

# NCF Model Definition
class NCFModel(nn.Module):
    def __init__(self, n_users, n_books, embedding_dim=8, hidden_dim=16):
        super(NCFModel, self).__init__()
        # Embeddings for GMF and MLP
        self.user_embedding_gmf = nn.Embedding(n_users, embedding_dim)
        self.book_embedding_gmf = nn.Embedding(n_books, embedding_dim)
        
        self.user_embedding_mlp = nn.Embedding(n_users, embedding_dim)
        self.book_embedding_mlp = nn.Embedding(n_books, embedding_dim)

        # MLP layers
        self.mlp = nn.Sequential(
            nn.Linear(embedding_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU()
        )

        # Final layer
        self.fc = nn.Linear(embedding_dim + hidden_dim // 2, 1)

    def forward(self, user_ids, book_ids):
        # GMF pathway
        user_embeds_gmf = self.user_embedding_gmf(user_ids)
        book_embeds_gmf = self.book_embedding_gmf(book_ids)
        gmf_output = user_embeds_gmf * book_embeds_gmf

        # MLP pathway
        user_embeds_mlp = self.user_embedding_mlp(user_ids)
        book_embeds_mlp = self.book_embedding_mlp(book_ids)
        concatenated = torch.cat([user_embeds_mlp, book_embeds_mlp], dim=-1)
        mlp_output = self.mlp(concatenated)

        # Concatenate GMF and MLP outputs
        final_output = torch.cat([gmf_output, mlp_output], dim=-1)
        return self.fc(final_output)

# Reptile Class Definition
class NCF:
    def __init__(self, model, inner_lr, inner_steps):
        self.model = model
        self.inner_lr = inner_lr
        self.inner_steps = inner_steps

    def inner_update(self, task_data, task_labels):
        user_ids, book_ids = task_data
        task_model = NCFModel(n_users, n_books)  # Initialize a fresh task model
        task_model.load_state_dict(self.model.state_dict())  # Copy weights
        optimizer = optim.SGD(task_model.parameters(), lr=self.inner_lr)

        for _ in range(self.inner_steps):
            predictions = task_model(user_ids, book_ids)
            loss = nn.MSELoss()(predictions.squeeze(), task_labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return task_model

    def train(self, tasks):
        for task_data, task_labels in tasks:
            task_model = self.inner_update(task_data, task_labels)
            # Meta-update can be implemented here if needed

# Task sampling function
def sample_task(df, user_to_idx, book_to_idx, task_size=5):
    sampled_users = df['userId'].sample(task_size).map(user_to_idx).astype(int)
    sampled_books = df['bookID'].sample(task_size).map(book_to_idx).dropna().astype(int)
    
    if len(sampled_users) != len(sampled_books):
        print("Warning: Mismatch in number of users and books!")
        return None, None

    labels = df.loc[sampled_books.index, 'average_rating'].values
    labels = pd.to_numeric(labels, errors='coerce')
    labels = labels[~np.isnan(labels)]

    user_tensor = torch.tensor(sampled_users.values, dtype=torch.long)
    book_tensor = torch.tensor(sampled_books.values, dtype=torch.long)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)

    return (user_tensor, book_tensor), labels_tensor

# Hyperparameters
num_epochs = 10
tasks_per_epoch = 5
task_size = 5

# Initialize model
n_users = len(user_to_idx)
n_books = len(book_to_idx)
model = NCFModel(n_users, n_books)

# Set hyperparameters for Reptile
inner_lr = 0.01
inner_steps = 5

# Initialize the Reptile instance
ncf_mod = NCF(model, inner_lr, inner_steps)

# Training loop
for epoch in range(num_epochs):
    tasks = []
    for _ in range(tasks_per_epoch):
        task_data, task_labels = sample_task(df, user_to_idx, book_to_idx, task_size=task_size)
        if task_data is not None:
            tasks.append((task_data, task_labels))
    
    ncf_mod.train(tasks)
    print(f"Epoch {epoch + 1}/{num_epochs} completed.")

# Function to recommend books for a specific user
def recommend_books(user_id, n_recommendations=3):
    user_idx = user_to_idx[user_id]
    book_indices = torch.tensor(list(book_to_idx.values()))
    user_input = torch.full((book_indices.size(0),), user_idx, dtype=torch.long)
    
    with torch.no_grad():
        predictions = model(user_input, book_indices).squeeze()
    top_indices = torch.argsort(predictions, descending=True)[:n_recommendations]
    recommended_books = df.iloc[top_indices.numpy()]
    return recommended_books[['bookID', 'title', 'average_rating']]

# Example of recommending books for userId 1
recommended_books = recommend_books(1)
print("\nRecommended Books for User 1:")
print(recommended_books)


Epoch 1/10 completed.
Epoch 2/10 completed.
Epoch 3/10 completed.
Epoch 4/10 completed.
Epoch 5/10 completed.
Epoch 6/10 completed.
Epoch 7/10 completed.
Epoch 8/10 completed.
Epoch 9/10 completed.
Epoch 10/10 completed.

Recommended Books for User 1:
       bookID                              title average_rating
10325   41814                          I. Asimov            4.2
7552    28977  The Littles and the Lost Children           3.76
9593    38238                          Australia           4.07
