In [552]:
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
%matplotlib inline
import ipywidgets
import csv
import json

---
### Books Dataset

In [553]:
data =[]
with open('booksummaries.txt','r', encoding='utf-8') as f:
    reader = csv.reader(f, dialect='excel-tab')
    for row in reader:
        data.append(row)

books = pd.DataFrame.from_records(data, columns=['book_id', 'freebase_id', 'book_title', 'author', 'publication_date', 'genre', 'summary'])

In [554]:
books.head(1)

Unnamed: 0,book_id,freebase_id,book_title,author,publication_date,genre,summary
0,620,/m/0hhy,Animal Farm,George Orwell,1945-08-17,"{""/m/016lj8"": ""Roman \u00e0 clef"", ""/m/06nbt"":...","Old Major, the old boar on the Manor Farm, ca..."


In [555]:
books.genre.at[0]

'{"/m/016lj8": "Roman \\u00e0 clef", "/m/06nbt": "Satire", "/m/0dwly": "Children\'s literature", "/m/014dfn": "Speculative fiction", "/m/02xlf": "Fiction"}'

In [556]:
def parse_genre_entry(genre_info):
    if genre_info == '':
        return []
    genre_dict = json.loads(genre_info)
    genres = list(genre_dict.values())
    return genres

books['genre'] = books['genre'].apply(parse_genre_entry)

In [557]:
books = books.drop(columns=['freebase_id'])

In [558]:
books

Unnamed: 0,book_id,book_title,author,publication_date,genre,summary
0,620,Animal Farm,George Orwell,1945-08-17,"[Roman à clef, Satire, Children's literature, ...","Old Major, the old boar on the Manor Farm, ca..."
1,843,A Clockwork Orange,Anthony Burgess,1962,"[Science Fiction, Novella, Speculative fiction...","Alex, a teenager living in near-future Englan..."
2,986,The Plague,Albert Camus,1947,"[Existentialism, Fiction, Absurdist fiction, N...",The text of The Plague is divided into five p...
3,1756,An Enquiry Concerning Human Understanding,David Hume,,[],The argument of the Enquiry proceeds by a ser...
4,2080,A Fire Upon the Deep,Vernor Vinge,,"[Hard science fiction, Science Fiction, Specul...",The novel posits that space around the Milky ...
...,...,...,...,...,...,...
16554,36934824,Under Wildwood,Colin Meloy,2012-09-25,[],"Prue McKeel, having rescued her brother from ..."
16555,37054020,Transfer of Power,Vince Flynn,2000-06-01,"[Thriller, Fiction]",The reader first meets Rapp while he is doing...
16556,37122323,Decoded,Jay-Z,2010-11-16,[Autobiography],The book follows very rough chronological ord...
16557,37132319,America Again: Re-becoming The Greatness We Ne...,Stephen Colbert,2012-10-02,[],Colbert addresses topics including Wall Stree...


In [559]:
def clean_ids(x):
    try:
        return int(x)
    except:
        return np.nan

books[''] = books['book_id'].apply(clean_ids)
books = books[books['book_id'].notnull()]

In [560]:
def book_soup(x):
    genres = ""
    for genre in x[5]:
        genres += genre + " "
    return f"{x['book_title']} {genres} {x['summary']}"

In [561]:
books["soup"] = books.apply(book_soup, axis=1)
books.sample()

Unnamed: 0,book_id,book_title,author,publication_date,genre,summary,Unnamed: 7,soup
13684,21719995,The Covent Garden Tragedy,Henry Fielding,,[],The play deals with a love triangle in a brot...,21719995,The Covent Garden Tragedy T h e p l a y ...


In [562]:
books.shape[0]

16559

---
### Taylor Swift Songs Dataset

In [563]:
ts_1 = pd.read_csv("01-taylor_swift.csv")
ts_2 = pd.read_csv("02-fearless_taylors_version.csv")
ts_3 = pd.read_csv("03-speak_now_deluxe_package.csv")
ts_4 = pd.read_csv("04-red_deluxe_edition.csv")
ts_5 = pd.read_csv("05-1989_deluxe.csv")
ts_6 = pd.read_csv("06-reputation.csv")
ts_7 = pd.read_csv("07-lover.csv")
ts_8 = pd.read_csv("08-folklore_deluxe_version.csv")
ts_9 = pd.read_csv("09-evermore_deluxe_version.csv")

lyrics = pd.concat([ts_1, ts_2, ts_3, ts_4, ts_5, ts_6, ts_7, ts_8, ts_9])
lyrics.reset_index(drop=True, inplace=True)

In [564]:
lyrics.sample(5)

Unnamed: 0,album_name,track_title,track_n,lyric,line
654,Fearless (Taylor’s Version),Fifteen (Taylor’s Version),2,Hopin' one of those senior boys will wink at y...,7
4852,reputation,...Ready for It?,1,So I take my time,51
5721,reputation,Call It What You Want,14,"Holding my breath, slowly, I said",50
4969,reputation,I Did Something Bad,3,I can feel the flames on my skin,28
6631,Lover,ME!,16,"Strike the band up, 1, 2, 3",60


In [565]:
lyrics = lyrics.drop(columns=['line'])

lyrics = lyrics.groupby('track_title').agg({
    'album_name': 'first',  # You can use 'first' to retain the first occurrence of the author
    'track_n': 'first',
     'lyric': ' '.join
}).reset_index()

lyrics.drop_duplicates(subset=['lyric'])

lyrics['lyric']

0      Knew he was a killer first time that I saw him...
1      It feels like a perfect night To dress up like...
2      Why would you wanna break a perfectly good hea...
3      I don't know what I want, so don't ask me 'Cau...
4      I blew things out of proportion, now you're bl...
                             ...                        
158    Friends break up, friends get married Stranger...
159    Is it romantic how all my elegies eulogize me?...
160    Rebekah rode up on the afternoon train, it was...
161    I sit and watch you reading with your head low...
162    If I wanted to know who you were hanging with ...
Name: lyric, Length: 163, dtype: object

In [566]:
def lyrics_soup(x):
    return x['album_name'] + " " + x['track_title']  + " " + x['lyric']

In [567]:
lyrics["soup"] = lyrics.apply(lyrics_soup, axis=1)
lyrics.sample()

Unnamed: 0,track_title,album_name,track_n,lyric,soup
74,Our Song,Taylor Swift,11,I was ridin' shotgun with my hair undone In th...,Taylor Swift Our Song I was ridin' shotgun wit...


---
### Creating Similarity Matrices

In [568]:
books_lyrics_soups = pd.concat([books['soup'], lyrics['soup']],ignore_index=True)

In [569]:
books_lyrics_count = CountVectorizer(stop_words = "english")
books_lyrics_count.fit(books_lyrics_soups)

books_to_lyrics_matrix = books_lyrics_count.transform(books['soup'])
lyrics_to_books_matrix = books_lyrics_count.transform(lyrics['soup'])

print(books_to_lyrics_matrix.shape, lyrics_to_books_matrix.shape)

(16559, 122097) (163, 122097)


In [570]:
cosine_sim_books_lyrics = cosine_similarity(books_to_lyrics_matrix, lyrics_to_books_matrix)
cosine_sim_books_lyrics

array([[0.00648568, 0.01462985, 0.01007148, ..., 0.01811943, 0.01946311,
        0.01733723],
       [0.01403105, 0.00852527, 0.01447489, ..., 0.02969649, 0.01801338,
        0.02746737],
       [0.03839536, 0.00523713, 0.02340006, ..., 0.09851085, 0.01521539,
        0.02443731],
       ...,
       [0.00646369, 0.01080017, 0.00514734, ..., 0.        , 0.03423018,
        0.01279884],
       [0.        , 0.        , 0.00838679, ..., 0.        , 0.        ,
        0.        ],
       [0.0226413 , 0.01592899, 0.01850484, ..., 0.07768074, 0.03786412,
        0.04601215]])

In [571]:
cosine_sim_lyrics_books = cosine_similarity(lyrics_to_books_matrix, books_to_lyrics_matrix)
cosine_sim_lyrics_books

array([[0.00648568, 0.01403105, 0.03839536, ..., 0.00646369, 0.        ,
        0.0226413 ],
       [0.01462985, 0.00852527, 0.00523713, ..., 0.01080017, 0.        ,
        0.01592899],
       [0.01007148, 0.01447489, 0.02340006, ..., 0.00514734, 0.00838679,
        0.01850484],
       ...,
       [0.01811943, 0.02969649, 0.09851085, ..., 0.        , 0.        ,
        0.07768074],
       [0.01946311, 0.01801338, 0.01521539, ..., 0.03423018, 0.        ,
        0.03786412],
       [0.01733723, 0.02746737, 0.02443731, ..., 0.01279884, 0.        ,
        0.04601215]])

In [572]:
books = books.reset_index()
books_indices = pd.Series(books.index, index=books['book_title'].apply(lambda x: x.lower() if x is not np.nan else "")).drop_duplicates()

books.head()

Unnamed: 0,index,book_id,book_title,author,publication_date,genre,summary,Unnamed: 8,soup
0,0,620,Animal Farm,George Orwell,1945-08-17,"[Roman à clef, Satire, Children's literature, ...","Old Major, the old boar on the Manor Farm, ca...",620,"Animal Farm O l d M a j o r , t h e o ..."
1,1,843,A Clockwork Orange,Anthony Burgess,1962,"[Science Fiction, Novella, Speculative fiction...","Alex, a teenager living in near-future Englan...",843,"A Clockwork Orange A l e x , a t e e n a..."
2,2,986,The Plague,Albert Camus,1947,"[Existentialism, Fiction, Absurdist fiction, N...",The text of The Plague is divided into five p...,986,The Plague T h e t e x t o f T h e P...
3,3,1756,An Enquiry Concerning Human Understanding,David Hume,,[],The argument of the Enquiry proceeds by a ser...,1756,An Enquiry Concerning Human Understanding T ...
4,4,2080,A Fire Upon the Deep,Vernor Vinge,,"[Hard science fiction, Science Fiction, Specul...",The novel posits that space around the Milky ...,2080,A Fire Upon the Deep T h e n o v e l p o...


In [573]:
lyrics = lyrics.reset_index()
lyrics_indices = pd.Series(lyrics.index, index=lyrics['track_title'].apply(lambda x: x.lower() if x is not np.nan else "")).drop_duplicates()

lyrics.head()

Unnamed: 0,index,track_title,album_name,track_n,lyric,soup
0,0,...Ready for It?,reputation,1,Knew he was a killer first time that I saw him...,reputation ...Ready for It? Knew he was a kill...
1,1,22,Red (Deluxe Edition),6,It feels like a perfect night To dress up like...,Red (Deluxe Edition) 22 It feels like a perfec...
2,2,A Perfectly Good Heart,Taylor Swift,14,Why would you wanna break a perfectly good hea...,Taylor Swift A Perfectly Good Heart Why would ...
3,3,A Place In This World,Taylor Swift,4,"I don't know what I want, so don't ask me 'Cau...",Taylor Swift A Place In This World I don't kno...
4,4,Afterglow,Lover,15,"I blew things out of proportion, now you're bl...",Lover Afterglow I blew things out of proportio...


In [574]:
class RecommendationEngine:
    def get_books_from_lyrics(title):
        try:
            idx = lyrics_indices[title.lower()]
            sim_scores = list(enumerate(cosine_sim_lyrics_books[idx]))
            sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)

            sim_scores = sim_scores[:5]

            book_indices = [i[0] for i in sim_scores]

            return books.iloc[book_indices]
        except :
            print("No song found in our database. Please try again.")

    def get_lyrics_from_books(title):
        try:
            idx = books_indices[title.lower()]
            sim_scores = list(enumerate(cosine_sim_books_lyrics[idx]))
            sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)

            sim_scores = sim_scores[:5]

            lyrics_indices = [i[0] for i in sim_scores]

            return lyrics.iloc[lyrics_indices]
        except :
            print("No book found in our database. Please try again.")

In [575]:
rand = books["book_title"].sample(1).values[0]
rand

'Tomorrow'

In [576]:
RecommendationEngine.get_lyrics_from_books(rand)

Unnamed: 0,index,track_title,album_name,track_n,lyric,soup
54,54,Invisible,Taylor Swift,13,She can't see the way your eyes Light up when ...,Taylor Swift Invisible She can't see the way y...
3,3,A Place In This World,Taylor Swift,4,"I don't know what I want, so don't ask me 'Cau...",Taylor Swift A Place In This World I don't kno...
62,62,Look What You Made Me Do,reputation,6,I don't like your little games Don't like your...,reputation Look What You Made Me Do I don't li...
60,60,London Boy,Lover,11,"We can go driving in, on my scooter Uh, you kn...","Lover London Boy We can go driving in, on my s..."
71,71,Never Grow Up,Speak Now (Deluxe),8,Your little hand's wrapped around my finger An...,Speak Now (Deluxe) Never Grow Up Your little h...


In [577]:
RecommendationEngine.get_lyrics_from_books('The Ramsay Scallop')

Unnamed: 0,index,track_title,album_name,track_n,lyric,soup
39,39,Girl at Home,Red (Deluxe Edition),19,"Don't look at me, you got a girl at home And e...",Red (Deluxe Edition) Girl at Home Don't look a...
50,50,I Think He Knows,Lover,6,I think he knows His footprints on the sidewal...,Lover I Think He Knows I think he knows His fo...
75,75,Ours,Speak Now (Deluxe),15,Elevator buttons and morning air Strangers' si...,Speak Now (Deluxe) Ours Elevator buttons and m...
133,133,hoax,folklore (deluxe version),16,My only one My smoking gun My eclipsed sun Thi...,folklore (deluxe version) hoax My only one My ...
130,130,cardigan,folklore (deluxe version),2,"Vintage tee, brand new phone High heels on cob...",folklore (deluxe version) cardigan Vintage tee...


In [578]:
rand = lyrics["track_title"].sample(1).values[0]
rand

"Should've Said No"

In [579]:
RecommendationEngine.get_books_from_lyrics(rand)

Unnamed: 0,index,book_id,book_title,author,publication_date,genre,summary,Unnamed: 8,soup
7472,7472,6727906,M/F,Anthony Burgess,1971,"[Comic novel, Speculative fiction]",From the blurb of Cape's first edition 'The s...,6727906,M/F F r o m t h e b l u r b o f C a ...
13712,13712,21802138,For Lust of Knowing,Robert Graham Irwin,2006,[Non-fiction],While For Lust of Knowing is a riposte to Sai...,21802138,For Lust of Knowing W h i l e F o r L u ...
9451,9451,10254205,Five Go Off In A Caravan,Enid Blyton,1946,"[Mystery, Adventure novel]",A caravan holiday for the Famous Five! It is ...,10254205,Five Go Off In A Caravan A c a r a v a n ...
1762,1762,872478,Kate Vaiden,Reynolds Price,1986-06,"[Fiction, Novel]",Kate Vaiden is the first person narrator of t...,872478,Kate Vaiden K a t e V a i d e n i s t ...
3374,3374,2192851,The Thief and the Dogs,Naguib Mahfouz,1961,[Novel],"Said's world revolves around Nabawiyya, his f...",2192851,The Thief and the Dogs S a i d ' s w o r l...


In [580]:
RecommendationEngine.get_books_from_lyrics('Back to December')

Unnamed: 0,index,book_id,book_title,author,publication_date,genre,summary,Unnamed: 8,soup
963,963,380013,The Time Ships,Stephen Baxter,1995,"[Science Fiction, Novel, Alternate history, Sp...","After the events related in The Time Machine,...",380013,The Time Ships A f t e r t h e e v e n t...
10643,10643,13139490,Let Time Pass,Svend Aage Madsen,,[Novel],Johanna who is a lector at a Danish universit...,13139490,Let Time Pass J o h a n n a w h o i s ...
3827,3827,2611550,A Tale of Time City,Diana Wynne Jones,1987,"[Science Fiction, Children's literature, Fanta...","It is September, 1939, the start of World War...",2611550,A Tale of Time City I t i s S e p t e m ...
12112,12112,17139588,The Fermata,Nicholson Baker,,"[Speculative fiction, Fiction, Novel]",Arno Strine discovers he can stop time when h...,17139588,The Fermata A r n o S t r i n e d i s c ...
4947,4947,3744486,The Longest Night,J. N. Williamson,2002-12-02,"[Fantasy, Speculative fiction, Horror]","It's December 21, and hour by hour Angel and ...",3744486,The Longest Night I t ' s D e c e m b e r ...


In [581]:
books.sample(5)['book_title']

5607                               Pamela
10124               The House at Riverton
16398          Debt: The First 5000 Years
3245     The Mystery of the Aztec Warrior
6370               Murder Being Once Done
Name: book_title, dtype: object

In [582]:
lyrics.sample(5)['track_title']

124    You All Over Me (Taylor’s Version) [From the V...
34                           Fearless (Taylor’s Version)
99       That’s When (Taylor’s Version) [From the Vault]
56                         I’m Only Me When I’m with You
22                           Dancing with Our Hands Tied
Name: track_title, dtype: object