In [88]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [89]:
# Dataset source https://zenodo.org/record/4265096#.Ytx7lHZBy3A
df = pd.read_csv('../data/best_books.csv')
df = df[['title', 'genres']]
df

Unnamed: 0,title,genres
0,The Hunger Games,"['Young Adult', 'Fiction', 'Dystopia', 'Fantas..."
1,Harry Potter and the Order of the Phoenix,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',..."
2,To Kill a Mockingbird,"['Classics', 'Fiction', 'Historical Fiction', ..."
3,Pride and Prejudice,"['Classics', 'Fiction', 'Romance', 'Historical..."
4,Twilight,"['Young Adult', 'Fantasy', 'Romance', 'Vampire..."
...,...,...
52473,Fractured,"['Vampires', 'Paranormal', 'Young Adult', 'Rom..."
52474,Anasazi,"['Mystery', 'Young Adult']"
52475,Marked,"['Fantasy', 'Young Adult', 'Paranormal', 'Ange..."
52476,Wayward Son,"['Fiction', 'Mystery', 'Historical Fiction', '..."


In [90]:
def remove_extra_chars(s):
    s = s.replace("'","")
    s = s.replace("[","")
    s = s.replace("]","")
    return s.replace(",","")

In [91]:
df.genres = df.genres.apply(remove_extra_chars)
df

Unnamed: 0,title,genres
0,The Hunger Games,Young Adult Fiction Dystopia Fantasy Science F...
1,Harry Potter and the Order of the Phoenix,Fantasy Young Adult Fiction Magic Childrens Ad...
2,To Kill a Mockingbird,Classics Fiction Historical Fiction School Lit...
3,Pride and Prejudice,Classics Fiction Romance Historical Fiction Li...
4,Twilight,Young Adult Fantasy Romance Vampires Fiction P...
...,...,...
52473,Fractured,Vampires Paranormal Young Adult Romance Fantas...
52474,Anasazi,Mystery Young Adult
52475,Marked,Fantasy Young Adult Paranormal Angels Romance ...
52476,Wayward Son,Fiction Mystery Historical Fiction Adventure C...


In [92]:
df = df[df.genres.str.contains('Fantasy')]
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,title,genres
0,The Hunger Games,Young Adult Fiction Dystopia Fantasy Science F...
1,Harry Potter and the Order of the Phoenix,Fantasy Young Adult Fiction Magic Childrens Ad...
2,Twilight,Young Adult Fantasy Romance Vampires Fiction P...
3,Animal Farm,Classics Fiction Dystopia Fantasy Literature P...
4,The Chronicles of Narnia,Fantasy Classics Fiction Young Adult Childrens...
...,...,...
15316,Nameless Fate,Aliens Science Fiction Romance Erotica BDSM Fa...
15317,Elemental,Fantasy Young Adult Angels Romance Paranormal ...
15318,Fractured,Vampires Paranormal Young Adult Romance Fantas...
15319,Marked,Fantasy Young Adult Paranormal Angels Romance ...


In [93]:
cm = CountVectorizer().fit_transform(df.genres)
cs = cosine_similarity(cm)
cs

array([[1.        , 0.60858062, 0.53311399, ..., 0.38533732, 0.4738791 ,
        0.64605828],
       [0.60858062, 1.        , 0.43259046, ..., 0.40201513, 0.43259046,
        0.54062051],
       [0.53311399, 0.43259046, 1.        , ..., 0.83149718, 0.89473684,
        0.47836487],
       ...,
       [0.38533732, 0.40201513, 0.83149718, ..., 1.        , 0.78258558,
        0.31118796],
       [0.4738791 , 0.43259046, 0.89473684, ..., 0.78258558, 1.        ,
        0.43052839],
       [0.64605828, 0.54062051, 0.47836487, ..., 0.31118796, 0.43052839,
        1.        ]])

In [94]:
#my_title = 'The Fellowship of the Ring'
my_title = 'The Lord of the Rings'

In [95]:
df['score'] = cs[df[df.title == my_title].index.to_numpy()[0]]
df.sort_values(by='score', ascending=False).head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['score'] = cs[df[df.title == my_title].index.to_numpy()[0]]


Unnamed: 0,title,genres,score
1693,"The Hobbit, Part One",Fantasy Fiction Classics Science Fiction Fanta...,1.0
160,The Lord of the Rings,Fantasy Classics Fiction Adventure Science Fic...,1.0
72,"The Hobbit, or There and Back Again",Fantasy Classics Fiction Adventure Young Adult...,0.965517
5,J.R.R. Tolkien 4-Book Boxed Set: The Hobbit an...,Fantasy Fiction Classics Adventure Science Fic...,0.965517
5708,Heir of Sea and Fire,Fantasy Fiction Science Fiction Fantasy High F...,0.965517
566,The Children of Húrin,Fantasy Fiction Classics High Fantasy Epic Fan...,0.949289
255,The Return of the King,Fantasy Fiction Classics Adventure Science Fic...,0.947514
280,The Two Towers,Fantasy Fiction Classics Adventure Science Fic...,0.947514
105,The Silmarillion,Fantasy Fiction Classics High Fantasy Science ...,0.947514
14329,Owlsight,Fantasy Fiction Science Fiction Fantasy Magic ...,0.933852
