In [None]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD
from collections import defaultdict
import random

In [None]:
df = pd.read_csv('csv/bid_userid_rating.csv')
df = df.reindex(columns=['userid','bid','rating'])
df

In [None]:
def get_top_n(uid, predictions, trainset, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n+n]
    
    # retrieve rated books based on user id
    rated_books_raw_ids = []
    rated_books_inner_ids = trainset.ur[trainset.to_inner_uid(uid)] #outputs item inner ids
    for item in rated_books_inner_ids:
        rated_books_raw_ids.append(trainset.to_raw_iid(item[0]))
    
    # remove from already rated books by user from top recommendations
    for book in top_n[uid]:
        if book[0] in rated_books_raw_ids:
            top_n[uid].remove(book)
    top_n[uid] = top_n[uid][:n]

    return top_n

#Function to get book meta data(id and title) from goodreads dataset
def get_books_details(uid, top_n, trainset):
    df_temp = pd.read_csv('csv/Final_Dataset.csv')
    good_bid_list=[]
    rating_list=[]
    title_list=[]
    inner_bid_list=[]
    for bid, rating in top_n[uid]:
        good_bid_list.append(bid)
        rating_list.append(rating)
        inner_bid_list.append(trainset.to_inner_iid(bid))
        title_list.append(str(list(df_temp['good_title'].loc[df_temp['good_bid']==bid])))
    final_df = pd.DataFrame({'good_bid':good_bid_list, 'title':title_list, 'inner_bid':inner_bid_list, 'estimated_rating': rating_list})
    return final_df

#Function to predict
def predict(uid, trainset, algo, n=10):
    predictions = []
    for item in trainset.all_items(): #iterates over inside ids
        predictions.append(algo.predict(uid, iid = trainset.to_raw_iid(item))) #inside id to raw id as predict function takes in raw ids
    top_n = get_top_n(uid, predictions, trainset, n)
    return get_books_details(uid, top_n, trainset)

#Function to manipulate and evaluate
def manipulate(uid, df, trainset, max_rating=3, num_books=3):
    inner_bid_list=[]
    #randomly selecting books which user has rated and rating is above given rating
    selected_books=list(df['bid'].loc[(df['userid']==uid) & (df['rating']>=max_rating)].sample(n=num_books))
    #replacing the rating value
    df.loc[(df['userid']==uid) & (df['bid'].isin(selected_books)), 'rating']=0
    #printing good reads is and inner id
    inner_bid_list.extend([trainset.to_inner_iid(x) for x in selected_books])
    selected_df=pd.DataFrame({'good_bid':selected_books, 'inner_bid':inner_bid_list})
    print(selected_df)
    return df

#Function to initialize algorithm and loading data
def initialize_algo(df):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df[['userid','bid', 'rating']], reader)
    trainset = data.build_full_trainset()
    algo = SVD(biased=False)
    algo.fit(trainset)
    return trainset, algo

In [None]:
trainset, algo = initialize_algo(df)

In [None]:
predict(0, trainset, algo)

In [None]:
df = manipulate(0, df, trainset)

In [None]:
trainset, algo = initialize_algo(df)

In [None]:
predict(0, trainset, algo)

In [None]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import GridSearchCV

# Use movielens-100K
#data = Dataset.load_builtin('ml-100k')
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userid','bid', 'rating']], reader)
    
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

In [None]:
algo = SVD(biased=False)
algo