In [3]:
#|exporti
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import joblib
import pandas as pd
import streamlit as st

from streamlit_jupyter import StreamlitPatcher, tqdm

StreamlitPatcher().jupyter()  # register streamlit with jupyter-compatible wrappers

cleaned_df = pd.read_csv('cleaned_df.csv')
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [4]:
#|exporti
import scipy.sparse as sp
import torch

def load_sparse_matrix_to_tensor(path):
    # Load the sparse matrix from disk
    sparse_matrix = sp.load_npz(path)
    
    # Convert the sparse matrix to a dense NumPy array
    dense_array = sparse_matrix.toarray()
    
    # Convert the dense NumPy array to a PyTorch tensor
    tensor = torch.tensor(dense_array, dtype=torch.float)
    
    return tensor


In [5]:
#|exporti
class CollabFiltModel(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super().__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
    
    def forward(self, user, item):
        user_emb = self.user_emb(user)
        item_emb = self.item_emb(item)
        return (user_emb * item_emb).sum(1)


In [6]:
#|exporti
# Initialize the model (make sure it has the same architecture)
loaded_model = CollabFiltModel(num_users=cleaned_df['User_id'].nunique(),
                               num_items=cleaned_df['Title'].nunique()).to(device)

# Load the model state dictionary
loaded_model.load_state_dict(torch.load('collab_filt_model_state_dict.pth'))

# Ensure to switch the model to evaluation mode
loaded_model.eval()

# Load the encoders
user_encoder = joblib.load('user_encoder.joblib')
item_encoder = joblib.load('item_encoder.joblib')

In [7]:
#|exporti
loaded_model.eval()
loaded_model.to('cpu') ## Faster

# Extract item embeddings
item_embeddings = loaded_model.item_emb.weight.data.cpu().numpy()

filtered_df = cleaned_df[cleaned_df['categories'].isin(cleaned_df['categories'].value_counts()[cleaned_df['categories'].value_counts() > 20000].index)]

unique_genres = filtered_df['categories'].unique()

# Display the genres to the user
print("Please choose a genre from the following list:")
for i, genre in enumerate(unique_genres, 1):
    print(f"{i}. {genre}")

choice = int(input("Enter the number corresponding to your choice: ")) - 1  # Subtract 1 to match the list index

genre_choice = unique_genres[choice]

Please choose a genre from the following list:
1. ['Biography & Autobiography']
2. ['Religion']
3. ['Fiction']
4. ['Social Science']
5. ['Juvenile Nonfiction']
6. ['History']
7. ['Political Science']
8. ['Health & Fitness']
9. ['Cooking']
10. ['Philosophy']
11. ['Sports & Recreation']
12. ['Body, Mind & Spirit']
13. ['Juvenile Fiction']
14. ['Family & Relationships']
15. ['Science']
16. ['Business & Economics']
17. ['Computers']
18. ['Self-Help']
19. ['Young Adult Fiction']
Enter the number corresponding to your choice: 2


In [8]:
#|exporti
sample_titles = cleaned_df[cleaned_df['categories']==genre_choice]['Title'].sample(5).to_numpy()

decoded_titles = item_encoder.inverse_transform(sample_titles)

user_ratings = {}
print('Rate these books 1-5')
for title in decoded_titles:
    score = input(f'{title}: ')

    encoded_value = item_encoder.transform([title])[0]

    user_ratings[encoded_value] = float(score)


Rate these books 1-5
God's Dawn for Every Darkness: Morning-Fresh Glimpses into His Gracious Heart: 3
Blessings: 4
Emotional Transformation: 5
Five Festal Garments: Christian Reflections on the Song of Songs, Ruth, Lamentations, Ecclesiastes and Esther (New Studies in Biblical Theology): 2
Mere Christianity: 3


In [9]:
#|exporti
from sklearn.linear_model import Ridge
import numpy as np

# Prepare the data for ridge regression
rated_item_indices = list(user_ratings.keys())
X = item_embeddings[rated_item_indices]
y = np.array(list(user_ratings.values()))

# Fit the ridge regression model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X, y)

# The user's "embedding" is approximated by the coefficients
user_preferences = ridge_model.coef_


In [10]:
#|exporti
# Predict ratings for all items
predicted_ratings = np.dot(item_embeddings, user_preferences)

# Rank items by predicted rating, excluding already rated items
recommended_indices = np.argsort(-predicted_ratings)
top_recommendations = [index for index in recommended_indices if index not in rated_item_indices][:5]

# Decode the top recommended item indices to original IDs
top_recommended_item_ids = item_encoder.inverse_transform(top_recommendations)

print(top_recommended_item_ids)

['David Sylvian: The Last Romantic' 'Earthling'
 'MCSE NT Server 4 in the Enterprise Exam Cram Personal Trainer (Exam: 70-068)'
 'Spelling (Longman English guides)' 'The autobiography of a super-tramp']


In [11]:
from nbdev.export import nb_export

nb_export("Streamlit App.ipynb", lib_path="./", name="example")