In [1]:
import numpy as np
import pandas as pd

In [2]:
rating_df = pd.read_csv("data/Ratings.csv", sep=";")
user_df = pd.read_csv("data/Users.csv", sep=";")
book_df = pd.read_csv("data/Books.csv", sep=";")

  user_df = pd.read_csv("data/Users.csv", sep=";")


In [3]:
# unique ids
user_ids = rating_df['User-ID'].unique()
book_ids = rating_df['ISBN'].unique()

# mapping to indices
user_map = {id:i for i,id in enumerate(user_ids)}
book_map = {id:i for i,id in enumerate(book_ids)}

num_users = len(user_ids)
num_books = len(book_ids)



In [9]:
k = 100  # latent dimensions

P = np.random.normal(scale=1./k, size=(num_users, k))
Q = np.random.normal(scale=1./k, size=(num_books, k))


In [5]:
global_mean = rating_df['Rating'].mean()

ratings_data = []

for _, row in rating_df.iterrows():
    u = user_map[row['User-ID']]
    b = book_map[row['ISBN']]
    r = row['Rating']-global_mean
    ratings_data.append((u, b, r))

In [6]:
len(ratings_data)

1149780

In [32]:
learning_rate = 0.025
reg = 0.02
epochs = 20

for epoch in range(epochs):
    total_error = 0
    
    for u, b, r in ratings_data:
        
        prediction = np.dot(P[u], Q[b])
        error = r - prediction
        
        total_error += error**2
        
        # update rules
        P[u] += learning_rate * (error * Q[b] - reg * P[u])
        Q[b] += learning_rate * (error * P[u] - reg * Q[b])
    
    print(f"Epoch {epoch+1}, Error: {total_error}")


Epoch 1, Error: 19280.569235427145
Epoch 2, Error: 19211.98727963728
Epoch 3, Error: 19147.612000600173
Epoch 4, Error: 19086.948970841866
Epoch 5, Error: 19029.566886945915
Epoch 6, Error: 18975.09040477732
Epoch 7, Error: 18923.193615234955
Epoch 8, Error: 18873.594110888218
Epoch 9, Error: 18826.047610397414
Epoch 10, Error: 18780.34311400251
Epoch 11, Error: 18736.29856366429
Epoch 12, Error: 18693.756979301445
Epoch 13, Error: 18652.583039538076
Epoch 14, Error: 18612.6600728732
Epoch 15, Error: 18573.8874236284
Epoch 16, Error: 18536.17815653889
Epoch 17, Error: 18499.45706451678
Epoch 18, Error: 18463.65894549851
Epoch 19, Error: 18428.727116455793
Epoch 20, Error: 18394.612135136056


In [15]:
import pickle


In [33]:
with open('P_list.pkl','wb') as f:
    pickle.dump(P,f)
with open('Q_list.pkl','wb') as f:
    pickle.dump(Q,f)

In [31]:
with open('P_list.pkl','rb') as f:
    P=pickle.load(f)
with open('Q_list.pkl','rb') as f:
    Q=pickle.load(f)

In [34]:
def predict(user_id, isbn):
    if user_id not in user_map:
        return None  
    
    if isbn not in book_map:
        return None 
    
    u = user_map[user_id]
    b = book_map[isbn]
    
    return np.dot(P[u], Q[b])

In [53]:
def recommend_books(user_id, top_n=5):
    
    if user_id not in user_map:
        return popularity_df.head(top_n).index.tolist()
        
    rated_books = rating_df[rating_df['User-ID'] == user_id]['ISBN'].values
    
    scores = []
    
    for isbn in book_ids:
        if isbn in rated_books:
            continue
        score = predict(user_id, isbn)
        scores.append((isbn, score))
    
    scores.sort(key=lambda x: x[1], reverse=True)
    
    top_books = scores[:top_n]
    
    recommendations = []
        
    for isbn, _ in top_books:
        if isbn in book_lookup.index:
            title = book_lookup.loc[isbn]['Title']
            author = book_lookup.loc[isbn]['Author']
            recommendations.append((title, author))
        
    return recommendations

In [39]:
popularity_df = pd.read_csv('data/data.csv')

In [54]:
print(recommend_books(999))

[('To Kill a Mockingbird', 'Harper Lee'), ('Mercy', 'Julie Garwood'), ('Children of Dune (Dune Chronicles, Book 3)', 'Frank Herbert'), ('Mama Makes Up Her Mind: And Other Dangers of Southern Living', 'Bailey White'), ('The Forest House', 'Marion Zimmer Bradley')]


In [55]:
model_data = {
    'P': P,
    'Q': Q,
    'user_map': user_map,
    'book_map': book_map,
    'book_df': book_df,
    'popular_books':popularity_df
}

with open('model_data.pkl', 'wb') as f:
    pickle.dump(model_data, f)