##### Step - 1 Opening The File And Parsing Contents Into Pandas DataFrame ####

In [1]:
#Defining Function For Rating Data Correction
def rating_num_correction(ratings):
    ratings = ratings.split()
    return ratings[3]

In [2]:
#Defining Function To Parse Fields
def parse_fields(novel_data):
    return {
        "url": novel_data["url"],
        "title": novel_data["title"],
        "ratings": rating_num_correction(novel_data["rating"]),
        "cover": novel_data["image"]
    }

In [3]:
#Creating A List Of Dictionaries To Make A Pandas DataFrame
import json 

Books = []
with open('noveldata.json','r',encoding='utf8') as f:
    data = f.read()
    noveldata = json.loads(data)
    i = 0
    while i!= len(noveldata)-1:
        fields = parse_fields(noveldata[i])
        try:
            ratings = int(fields["ratings"])
        except ValueError:
            continue
        if ratings>10: #Filtering Out Books With <10 Ratings
            Books.append(fields)
        i = i + 1

import pandas as pd
titles = pd.DataFrame.from_dict(Books)

In [4]:
#Cleaning Up User Interaction Json Data 
import json
with open('userinteraction.json','r',encoding='utf8') as f:
    noveldata = f.read()
json_novel_data = json.loads(noveldata)
noveldata = json.loads(noveldata)[0]
userdata = []
j = 0
while j!= len(noveldata["user_interaction"])-1:
    userdata.append({'bookurl': noveldata["url"], 'userurl':noveldata["user_interaction"][j][2], 'username': noveldata["user_interaction"][j][1],'rating':noveldata["user_interaction"][j][0]})
    j = j+1

def userdatacorrection(file):
    userdata = []
    with open(file,'r',encoding='utf8') as f:
        data = f.read()
        noveldata = json.loads(data)
    i = 0
    while i <len(noveldata):
        j = 0
        while j<len(noveldata[i]["user_interaction"]):
            userdata.append({'bookurl': noveldata[i]["url"], 'userurl':noveldata[i]["user_interaction"][j][2], 'username': noveldata[i]["user_interaction"][j][1],'rating':noveldata[i]["user_interaction"][j][0]})
            j = j+1
        i = i+1
    return userdata

import pandas as pd
user_interaction = pd.DataFrame.from_dict(userdatacorrection('userinteraction.json'))
user_interaction.to_csv("userinteractiondata.csv")

In [5]:
#Cleaning Up The Search Range 
titles["ratings"] = pd.to_numeric(titles["ratings"])
titles["reduced_titles"] = titles["title"].str.replace("[^a-zA-z0-9]","", regex=True)
titles["reduced_titles"] = titles["reduced_titles"].str.lower()
titles["reduced_titles"] = titles["reduced_titles"].str.replace("\s+"," ", regex = True)
titles = titles[titles["reduced_titles"].str.len()>0]
titles["ratings"] = pd.to_numeric(titles["ratings"])
titles.to_json("books_titles.json")
titles

Unnamed: 0,url,title,ratings,cover,reduced_titles
0,https://www.novelupdates.com/series/taming-the...,Taming The Villainesses,183,https://cdn.novelupdates.com/images/2022/06/Ta...,tamingthevillainesses
1,https://www.novelupdates.com/series/the-main-h...,The Main Heroines are Trying to Kill Me,428,https://cdn.novelupdates.com/images/2022/04/Th...,themainheroinesaretryingtokillme
2,https://www.novelupdates.com/series/the-regres...,The Regressed Demon Lord is Kind,529,https://cdn.novelupdates.com/images/2021/06/Th...,theregresseddemonlordiskind
3,https://www.novelupdates.com/series/kidnapped-...,Kidnapped Dragons,683,https://cdn.novelupdates.com/images/2020/10/Ki...,kidnappeddragons
4,https://www.novelupdates.com/series/im-really-...,I’m Really Not The Demon God’s Lackey,509,https://cdn.novelupdates.com/images/2021/06/Im...,imreallynotthedemongodslackey
...,...,...,...,...,...
11318,https://www.novelupdates.com/series/little-ger...,"Little Ger, Slow Life in Another World",69,https://cdn.novelupdates.com/images/2021/10/Li...,littlegerslowlifeinanotherworld
11319,https://www.novelupdates.com/series/sword-and-...,Sword and Love,24,https://cdn.novelupdates.com/images/2018/09/Sw...,swordandlove
11320,https://www.novelupdates.com/series/misunderst...,Misunderstood Lousy Cop: Getting Promoted for ...,17,https://www.novelupdates.com/img/noimagefound.jpg,misunderstoodlousycopgettingpromotedforrunning...
11321,https://www.novelupdates.com/series/the-queens...,The Queen’s Husband,37,https://cdn.novelupdates.com/images/2019/03/tq...,thequeenshusband


In [6]:
#Uploading The Data Frame to CSV
titles.to_csv("bookdata.csv")

#### Step 2 - Making A Search Engine For Searching Through The Parsed Content 

In [7]:
#Search Engine For Title Search 
from sklearn.feature_extraction.text import TfidfVectorizer 
vectorizer = TfidfVectorizer()

tfidf = vectorizer.fit_transform(titles["reduced_titles"])
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

In [8]:
#HTML Elements For  Easier View
def make_clickable(val): 
    return '<a target="_blank" href="{}">NovelUpdates</a>'.format(val)

def show_image(val):
    return '<img src="{}" width=50></img>'.format(val)

def search(query,vectorizer):
    processed = re.sub("[^a-zA-Z0-9]","", query.lower())
    query_vec = vectorizer.transform([processed])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity,-5)[-5:]
    results = titles.iloc[indices]
    results = results.sort_values("ratings", ascending=False)
    return results.head(5).style.format({'url': make_clickable, 'cover':show_image })

search("Soul Of Negary", vectorizer)

Unnamed: 0,url,title,ratings,cover,reduced_titles
8986,NovelUpdates,Soul of Negary,140,,soulofnegary
3774,NovelUpdates,HP1 kara Hajimeru Isekai Musou,129,,hp1karahajimeruisekaimusou
3773,NovelUpdates,I Became the Youngest Daughter of the Mafia Family,43,,ibecametheyoungestdaughterofthemafiafamily
3772,NovelUpdates,"Of All the Transmigrations, Why Am I a Prisoner?",28,,ofallthetransmigrationswhyamiaprisoner
3771,NovelUpdates,"I’m an S-class Knight, and I was Appointed as a Captain of an Elite Unit, but all my Subordinates were Older S-class Female Knights",21,,imansclassknightandiwasappointedasacaptainofaneliteunitbutallmysubordinateswereoldersclassfemaleknights


In [9]:
#Liked Books
liked_books =['https://www.novelupdates.com/series/soul-of-negary/']
titles[titles["url"].isin(liked_books)].to_csv("liked_books.csv")

#### Step - 3 Collaborative Filtering Model For Recommendations

In [10]:
#Making A Book Set Of Liked Books
import pandas as pd
my_books = pd.read_csv("liked_books.csv", index_col=0)
my_books["url"] = my_books["url"].astype(str)
book_set = set(my_books["url"])

In [11]:
#Data Frame For User Input Data
my_books
list1 = []
import csv
with open("liked_books.csv") as f:
    csv = csv.reader(f)
    for row in csv:
        if row[1] == "url":
            continue
        list1.append({"user_id": -1, "book_id": row[1],"rating": 5})
list1 = pd.DataFrame.from_dict(list1)

In [12]:
#Function To Clean Up The Dataset 
def urlcorrector(string):
    if "#comments" in string:
        string = string.rsplit("/",1)[0]
        if "comment-page" in string:
            string = string.rsplit("/",1)[0]
        string = string + "/"
    return(string)

In [13]:
#Finding Overlaping Users
overlap_users = {}
import csv
with open("userinteractiondata.csv") as f:
    csv = csv.reader(f)   
    for row in csv:
        url = urlcorrector(row[1])
        if url in book_set:
            if row[2] not in overlap_users:
                overlap_users[row[2]] = 1
            else:
                overlap_users[row[2]] = overlap_users[row[2]] + 1

In [14]:
#Filtering Users
filtered_overlap_users = set([k for k in overlap_users if overlap_users[k]>my_books.shape[0]/10])
filtered_overlap_users

{'https://www.novelupdates.com/user/124334/Rextraos/',
 'https://www.novelupdates.com/user/130085/Zeusomega/',
 'https://www.novelupdates.com/user/138528/bhankit/',
 'https://www.novelupdates.com/user/142575/EFermi/',
 'https://www.novelupdates.com/user/147765/The Hamster Overlord/',
 'https://www.novelupdates.com/user/154209/HelluvaReader/',
 'https://www.novelupdates.com/user/15786/darthpsykoz/',
 'https://www.novelupdates.com/user/159825/Uhoh_/',
 'https://www.novelupdates.com/user/169770/bheyer/',
 'https://www.novelupdates.com/user/174241/Syntheticspirit/',
 'https://www.novelupdates.com/user/183711/Poireau/',
 'https://www.novelupdates.com/user/214211/Subline Act/',
 'https://www.novelupdates.com/user/264271/qonquirette/',
 'https://www.novelupdates.com/user/268492/TKing1517/',
 'https://www.novelupdates.com/user/271361/Daresan/',
 'https://www.novelupdates.com/user/284417/Liero Dirlewanger/',
 'https://www.novelupdates.com/user/322878/Alex Woods/',
 'https://www.novelupdates.com

In [15]:
#Getting Properties Of Filtered Users From User Interaction Data 
import csv
interactions_list = []
with open("userinteractiondata.csv") as f:
    userinteraction = csv.reader(f)
    for row in userinteraction:
        if row[2] in filtered_overlap_users:
             interactions_list.append([row[2],row[1],row[4]])

In [16]:
#Making A Pandas DataFrame of the data
interactions = pd.DataFrame(interactions_list, columns=["user_id", "book_id", "rating"])
interactions = pd.concat([list1[["user_id", "book_id", "rating"]], interactions])

In [17]:
#Cleaning Up The Data
interactions["book_id"] = interactions["book_id"].astype(str)
interactions["user_id"] = interactions["user_id"].astype(str)
interactions["rating"] = pd.to_numeric(interactions["rating"])
interactions["user_index"] = interactions["user_id"].astype("category").cat.codes
interactions["book_index"] = interactions["book_id"].astype("category").cat.codes

interactions

Unnamed: 0,user_id,book_id,rating,user_index,book_index
0,-1,https://www.novelupdates.com/series/soul-of-ne...,5,0,493
0,https://www.novelupdates.com/user/95898/Queen ...,https://www.novelupdates.com/series/world-of-c...,4,28,718
1,https://www.novelupdates.com/user/322878/Alex ...,https://www.novelupdates.com/series/return-of-...,3,17,445
2,https://www.novelupdates.com/user/76750/Gsicht...,https://www.novelupdates.com/series/overgeared/,2,27,391
3,https://www.novelupdates.com/user/413166/bakae...,https://www.novelupdates.com/series/the-demon-...,5,24,563
...,...,...,...,...,...
1064,https://www.novelupdates.com/user/381558/Valix...,https://www.novelupdates.com/series/my-house-o...,3,23,356
1065,https://www.novelupdates.com/user/338694/cpzom...,https://www.novelupdates.com/series/my-house-o...,3,18,358
1066,https://www.novelupdates.com/user/322878/Alex ...,https://www.novelupdates.com/series/i-have-a-s...,5,17,229
1067,https://www.novelupdates.com/user/381558/Valix...,https://www.novelupdates.com/series/i-have-a-s...,4,23,229


In [18]:
#Making A Sparse Matrix
from scipy.sparse import coo_matrix
ratings_mat_coo = coo_matrix((interactions["rating"], (interactions["user_index"],interactions["book_index"])))
ratings_mat = ratings_mat_coo.tocsr()
interactions[interactions["user_id"]=="-1"]
my_index = 0

In [19]:
#Cosine Similairty Match For Similar Users
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(ratings_mat[my_index,:], ratings_mat).flatten()
import numpy as np
indices = np.argpartition(similarity, -20)[-20:]

In [20]:
#Filtering and Cleaning Up Similar Users
similar_users = interactions[interactions["user_index"].isin(indices)].copy()
similar_users = similar_users[similar_users["user_id"]!="-1"]
similar_users

Unnamed: 0,user_id,book_id,rating,user_index,book_index
0,https://www.novelupdates.com/user/95898/Queen ...,https://www.novelupdates.com/series/world-of-c...,4,28,718
1,https://www.novelupdates.com/user/322878/Alex ...,https://www.novelupdates.com/series/return-of-...,3,17,445
3,https://www.novelupdates.com/user/413166/bakae...,https://www.novelupdates.com/series/the-demon-...,5,24,563
4,https://www.novelupdates.com/user/413166/bakae...,https://www.novelupdates.com/series/im-really-...,5,24,246
7,https://www.novelupdates.com/user/381558/Valix...,https://www.novelupdates.com/series/kidnapped-...,5,23,274
...,...,...,...,...,...
1064,https://www.novelupdates.com/user/381558/Valix...,https://www.novelupdates.com/series/my-house-o...,3,23,356
1065,https://www.novelupdates.com/user/338694/cpzom...,https://www.novelupdates.com/series/my-house-o...,3,18,358
1066,https://www.novelupdates.com/user/322878/Alex ...,https://www.novelupdates.com/series/i-have-a-s...,5,17,229
1067,https://www.novelupdates.com/user/381558/Valix...,https://www.novelupdates.com/series/i-have-a-s...,4,23,229


#### Filtering and Displaying Top Recommendations

In [21]:
#Book Recommendations
book_recs = similar_users.groupby("book_id").rating.agg(['count', 'mean'])
books_titles = pd.read_json("books_titles.json")
books_titles["url"] = books_titles["url"].astype(str)
books_recs = book_recs.merge(books_titles, how="inner",left_on ="book_id",right_on ="url")
books_recs

Unnamed: 0,count,mean,url,title,ratings,cover,reduced_titles
0,4,4.0,https://www.novelupdates.com/series/48-hours-a...,48 Hours a Day,132,https://cdn.novelupdates.com/images/2021/06/Ho...,48hoursaday
1,1,0.0,https://www.novelupdates.com/series/6-year-old...,6-year-old Wise Man Wants to Walk in the Shade,51,https://cdn.novelupdates.com/images/2020/07/91...,6yearoldwisemanwantstowalkintheshade
2,1,1.0,https://www.novelupdates.com/series/80-years-o...,"80 Years Of Signing-In At The Cold Palace, I A...",36,https://cdn.novelupdates.com/images/2021/09/Ye...,80yearsofsigninginatthecoldpalaceiamunrivalled
3,2,1.0,https://www.novelupdates.com/series/9-heavenly...,9 Heavenly Thunder Manual,338,https://cdn.novelupdates.com/images/2015/07/9m...,9heavenlythundermanual
4,1,1.0,https://www.novelupdates.com/series/a-demons-p...,A Demon’s Path,73,https://cdn.novelupdates.com/images/2017/11/11...,ademonspath
...,...,...,...,...,...,...,...
233,2,4.0,https://www.novelupdates.com/series/why-fall-i...,Why Fall in Love if You Can Attend Tsinghua Un...,356,https://cdn.novelupdates.com/images/2020/05/Ts...,whyfallinloveifyoucanattendtsinghuauniversity
234,2,1.0,https://www.novelupdates.com/series/world-of-w...,World of Warcraft: Foreign Realm Domination,423,https://cdn.novelupdates.com/images/2017/07/wo...,worldofwarcraftforeignrealmdomination
235,1,3.0,https://www.novelupdates.com/series/yama-rising/,Yama Rising,89,https://cdn.novelupdates.com/images/2020/08/Ya...,yamarising
236,2,5.0,https://www.novelupdates.com/series/you-cannot...,You Cannot Afford To Offend My Woman,270,https://cdn.novelupdates.com/images/2019/03/yc...,youcannotaffordtooffendmywoman


In [22]:
#Filtering and Cleaning Up Book Recommendations
books_recs["ratings"] = books_recs["ratings"].astype(int)
books_recs["adjusted_count"] = books_recs["count"]**(books_recs["count"] / books_recs["ratings"])
books_recs["score"] = books_recs["mean"]*books_recs["adjusted_count"]
books_recs = books_recs[~books_recs["url"].isin(my_books["url"])]
my_books["reduced_titles"] = my_books["title"].str.replace("[^a-zA-Z0-9 ]", "", regex=True).str.lower()
my_books["reduced_titles"] = my_books["reduced_titles"].str.replace("\s+"," ", regex=True)
books_recs = books_recs[~books_recs["reduced_titles"].isin(my_books["reduced_titles"])]
books_recs = books_recs[books_recs["count"]>1]
books_recs = books_recs[books_recs["mean"]>1]
top_recs = books_recs.sort_values("score", ascending=False)

In [23]:
#Displaying Top Recommendations
def make_clickable(val):
    return '<a target="_blank" href="{}">NovelUpdates</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

top_recs.style.format({'url': make_clickable, 'cover': show_image})

Unnamed: 0,count,mean,url,title,ratings,cover,reduced_titles,adjusted_count,score
172,4,5.0,NovelUpdates,Surviving a Shounen Manga,130,,survivingashounenmanga,1.043578,5.21789
203,4,5.0,NovelUpdates,The Protagonists Are Murdered by Me,311,,theprotagonistsaremurderedbyme,1.01799,5.08995
19,2,5.0,NovelUpdates,Ascending the Heavens as an Evil God,90,,ascendingtheheavensasanevilgod,1.015523,5.077613
137,2,5.0,NovelUpdates,Power and Wealth,103,,powerandwealth,1.01355,5.067751
158,2,5.0,NovelUpdates,Shepherding Humanity,109,,shepherdinghumanity,1.0128,5.063998
106,2,5.0,NovelUpdates,Low Dimensional Game,110,,lowdimensionalgame,1.012682,5.063412
111,2,5.0,NovelUpdates,Master of Untold Daos,112,,masterofuntolddaos,1.012455,5.062273
149,2,5.0,NovelUpdates,Rise,119,,rise,1.011718,5.058588
187,2,5.0,NovelUpdates,The Empress’s Gigolo,124,,theempresssgigolo,1.011243,5.056213
139,2,5.0,NovelUpdates,Pursuing Immortality,138,,pursuingimmortality,1.010096,5.050481
