In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3156223 sha256=d26e713fdd77fcf18e1f803ba9e7f652b783aafbad48960fa9f956d72d28a08c
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [None]:
from surprise import Dataset
from surprise import Reader
from surprise import SVD #Using Singular value decompostion as the ml algorithm for user based prediction
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
# Loading the dataset
def loaddata(filename):
    df = pd.read_csv(f'/content/drive/MyDrive/data-brm/{filename}.csv', sep=',', encoding='latin-1')
    return df

def loaddata2():
    df = pd.read_csv(f'/content/drive/MyDrive/cleaned_book_data.csv', sep=',', encoding='latin-1')
    return df

books   = loaddata2()
ratings = loaddata("ratings")

In [None]:
books.head(5)
books.shape

(10000, 11)

In [None]:
ratings.head(2)

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4


In [None]:
ratings['rating'] = ratings['rating'].astype(float)

In [None]:
ratings

Unnamed: 0,user_id,book_id,rating
0,1,258,5.0
1,2,4081,4.0
2,2,260,5.0
3,2,9296,5.0
4,2,2318,3.0
...,...,...,...
5976474,49925,510,5.0
5976475,49925,528,4.0
5976476,49925,722,4.0
5976477,49925,949,5.0


In [None]:
rating_users = ratings['user_id'].value_counts().reset_index().\
               rename({'index':'user_id','user_id':'rating'}, axis=1)

In [None]:
rating_users.head(2)

Unnamed: 0,user_id,rating
0,12874,200
1,30944,200


In [None]:
rating_books = ratings['book_id'].value_counts().reset_index().\
               rename({'index':'book_id','book_id':'rating'}, axis=1)

In [None]:
rating_books.head(2)

Unnamed: 0,book_id,rating
0,1,22806
1,2,21850


In [None]:
ratings = ratings[ratings['user_id'].isin(rating_users[rating_users['rating']>100]['user_id'])]
ratings = ratings[ratings['book_id'].isin(rating_books[rating_books['rating']> 5000]['book_id'])]

ratings

Unnamed: 0,user_id,book_id,rating
12,4,70,4.0
15,4,18,5.0
16,4,27,5.0
17,4,21,5.0
18,4,2,5.0
...,...,...,...
5976299,48801,1,5.0
5976302,48801,17,5.0
5976304,48801,20,5.0
5976309,48801,39,5.0


In [None]:
ratings.drop_duplicates(inplace=True)
ratings

Unnamed: 0,user_id,book_id,rating
12,4,70,4.0
15,4,18,5.0
16,4,27,5.0
17,4,21,5.0
18,4,2,5.0
...,...,...,...
5976299,48801,1,5.0
5976302,48801,17,5.0
5976304,48801,20,5.0
5976309,48801,39,5.0


In [None]:
# creating a surprise object

reader = Reader(rating_scale=(0, 5))
data   = Dataset.load_from_df(ratings[['user_id','book_id','rating']], reader)

In [None]:
num_ratings = len(data.raw_ratings)
print(f"Number of ratings in the dataset: {num_ratings}")


Number of ratings in the dataset: 1035940


In [None]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# Initialize and train SVD model
model = SVD(n_factors=50, random_state=42)  # edit necessary hyper-params here
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7a05ab202e60>

In [None]:
# Get predictions on the test set
test_predictions = model.test(testset)

In [None]:
test_predictions

[Prediction(uid=5100, iid=111, r_ui=3.0, est=3.5859370640316546, details={'was_impossible': False}),
 Prediction(uid=45764, iid=6, r_ui=5.0, est=4.094598112790237, details={'was_impossible': False}),
 Prediction(uid=47642, iid=5, r_ui=3.0, est=4.23863831824897, details={'was_impossible': False}),
 Prediction(uid=31871, iid=106, r_ui=4.0, est=3.8943851192289194, details={'was_impossible': False}),
 Prediction(uid=32094, iid=1, r_ui=3.0, est=3.7958940328621544, details={'was_impossible': False}),
 Prediction(uid=37244, iid=39, r_ui=2.0, est=4.7734846152533885, details={'was_impossible': False}),
 Prediction(uid=7495, iid=142, r_ui=2.0, est=3.575858387589593, details={'was_impossible': False}),
 Prediction(uid=13068, iid=30, r_ui=3.0, est=3.823657397758891, details={'was_impossible': False}),
 Prediction(uid=4256, iid=157, r_ui=4.0, est=4.07618305832029, details={'was_impossible': False}),
 Prediction(uid=31115, iid=32, r_ui=4.0, est=4.628599474560844, details={'was_impossible': False}),


In [None]:
# Calculate RMSE
rmse = accuracy.rmse(test_predictions)

# Calculate MAE (Mean Absolute Error)
mae = accuracy.mae(test_predictions)

RMSE: 0.8627
MAE:  0.6686


In [None]:
def get_user_rated_books(user_id):
    all_book_ids = ratings['book_id'].unique()

    # Get book IDs that the user has not rated
    user_rated_books = ratings[ratings['user_id'] == user_id]['book_id']
    # user_unrated_books = list(set(all_book_ids) - set(user_rated_books))

    return user_rated_books;

In [None]:
def get_top_n_recommendations(user_id, n=10):
    # Create a list of tuples containing book_id and predicted rating
    predictions = [(book_id, model.predict(user_id,book_id).est) for book_id in rating_books['book_id']]
    # print(predictions)

    # Sort the list by predicted rating in descending order
    predictions.sort(key=lambda x: x[1], reverse=True)


    # # Get the top N book recommendations
    top_n_recommendations = predictions[:n]
    return top_n_recommendations

    # # Extract book IDs from the recommendations
    # recommended_books = [book_id for book_id, _ in top_n_recommendations]

    # # Get book titles from the 'books' DataFrame
    # book_titles = books[books['book_id'].isin(recommended_books)]['title']

    # return book_titles

In [None]:
user_id = 87  # Replace with the desired user ID
recomm = get_top_n_recommendations(user_id)

for i in recomm:
  book_id = i[0]
  book_details = books[books['book_id'] == book_id]

  book_title = book_details['title'].values[0]

  print(book_id," ",book_title," ",i[1])



189   Wacousta; or, A Tale of the Canadas   4.747963989318
135   A Buddhist Bible   4.650017508153979
39   I Want a Pony   4.6083321164386755
161   Baby Beluga   4.594276383390634
110   The Little Dog Laughed   4.586109932071154
155   How We Choose to Be Happy: The 9 Choices of Extremely Happy People--Their Secrets, Their Stories   4.5594801010087735
19   Parnassus on Wheels   4.532403906264138
89   Paul: His Letters and His Theology: An Introduction to Paul's Epistles   4.433949243437573
25   Toward Rational Exuberance: The Evolution of the Modern Stock Market   4.4198530071982685
7   Hawaii: An Uncommon History   4.396645397543814
