In [2]:
# pandas to read the final dataframe of the books 
import pandas as pd 

# pickle package to import the various files (e.g. vocabulary, inverted_index, ...)
import pickle 


# nltk packages for cleaning the plots 
import nltk as nl
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer 
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk import WordNetLemmatizer

We import the vocabulary and the inverted index:

In [3]:
with open( 'vocabulary.pkl', 'rb') as f:
        vocabulary= pickle.load(f)
        f.close()


In [4]:
with open( 'Inverted_Index.pkl', 'rb') as f:
        ID= pickle.load(f)
        f.close()

We import the clenaed dataframe of the books: 

In [5]:
df=pd.read_csv('dataset/Dataset_300Pages.tsv', sep='\t')

df.dropna(subset=['Plot'],inplace=True)

df.reset_index(drop=True,inplace=True)

In [6]:
df.head(3)

Unnamed: 0,document_ID,bookTitle,bookSeries,bookAuthors,ratingValue,ratingCount,reviewCount,Plot_Values,Plot,NumberofPages,Publishing_Date,Characters,Setting,Url
0,0,MARS,,Jasmine Rose,4.38,69,13,"{5: '47', 4: '7', 3: '11', 2: '2', 1: '2'}",❝�� my heart has become a planetand you are th...,,2014,,,https://www.goodreads.com/book/show/23279048-mars
1,1,Black Box,,Cassia Leo,4.02,6244,903,"{5: '2297', 4: '2320', 3: '1181', 2: '345', 1:...",♥️ Three fateful encounters....♥️ Two heart-br...,400.0,February 28th 2014,"['Mikki Gladstone', 'William ""Crush"" Slayer']","['Boston, Massachusetts']",https://www.goodreads.com/book/show/29539518-b...
2,2,Ruin and Rising,The Shadow and Bone Trilogy #3,Leigh Bardugo,4.09,158624,19396,"{5: '62107', 4: '59607', 3: '27962', 2: '6810'...",▶ \nAlternative Cover Edition #1\nThe capital ...,422.0,June 17th 2014,"['Alina Starkov', 'Malyen Oretsev', 'Darkling'...",['Ravka '],https://www.goodreads.com/book/show/14061957-r...


We will also need these functions to run the query:  

In [7]:
def clean_info (string):
    # this command will split the string given in input in substrings by using 
    # the words given to RegexpTokenizer as argument
    

    
    # filter the punctuation
    tmp = nl.RegexpTokenizer(r"['\w-]+").tokenize(string)  
    
    #first we lower all the words otherwise words such as AND,IS,MY are not consider stopwords 
    tmp = [word.lower() for word in tmp]
    
     # filter the stopwords (e.g. 'the', 'my', 'an', ...)
    tmp = [word for word in tmp if not word in stopwords.words("english")]
    
    #we lemmatize all the words (e.g. 'dirn')
    lemma = WordNetLemmatizer()
    tmp = [lemma.lemmatize(word, pos = "v") for word in tmp]    # v for verbs
    tmp = [lemma.lemmatize(word, pos = "n") for word in tmp]    # n for nouns
    
    
    final = ' '.join(tmp)
    
    return final

In [8]:
# we will need first these two functions to implement the search function 


# this function takes as input a list of lists and gives back the index of the list that has minimus first element
def find_min_list (L):
    min_elem = L[0][0]
    count = 0
    index = 0
    for l in L[1:]: 
        if min_elem > l[0]:
            count = count + 1 
            index = count 
            min_elem = l[0]
        else: 
            count = count + 1 

    return index 
        

# this function takes as input a list of lists and gives back the list created from the intersection of the lists 
def intersect_list (L):
    results = []
    while all(len(l) > 0 for l in L):

        if all([L[0][0] == l[0] for l in L[1:]]):     
            results.append(L[0][0])
            L = [l[1:] for l in L]

        else : 
            min_index = find_min_list(L)
            L[min_index] = L[min_index][1:]

    return results 

This is the function that finds the documents containing the query and prints their Title, Plot and URL:  

In [41]:
from IPython.display import display, HTML


In [53]:
def find_query(inverted_index, vocabulary, data_frame):
    
    string = input() # asks the user a string of words to look up 
    
    string_cleaned = clean_info(string)
    list_words = string_cleaned.split(' ')
    
    
    # now translate the list of words in term_id )
    list_termID = []
    for word in list_words: 
        list_termID.append(vocabulary.get(word))    
        
   

    # retrieve the documents in the inverted index and collect them in a list 
    list_documents = []
    for term_id in list_termID:
        if term_id in inverted_index.keys():   # checking if the word we are looking for is in the inverted_index
            list_documents.append(inverted_index.get(term_id))
    
     
    # now intersect these lists (here it's fundamental to suppose that the documents are collected as increasing sequences)
    results = intersect_list(list_documents)
    
    # say to pandas to print the full plot 
    pd.set_option('display.max_colwidth', None)
    
    # print the results 
    df = data_frame.iloc[results][['bookTitle', 'Plot', 'Url']]
    df.style.hide_index()
    rowindex = False
    
    display(HTML(df.to_html(index=False)))
    
    return 
    
    


Let's see some examples that implement the function: 

In [54]:
find_query(ID, vocabulary, df)

heart darkness


bookTitle,Plot,Url
Shadow and Bone,"▶ \nAlternative Cover Edition #1\nSurrounded by enemies, the once-great nation of Ravka has been torn in two by the Shadow Fold, a swath of near impenetrable darkness crawling with monsters who feast on human flesh. Now its fate may rest on the shoulders of one lonely refugee.Alina Starkov has never been good at anything. But when her regiment is attacked on the Fold and her best friend is brutally injured, Alina reveals a dormant power that saves his life—a power that could be the key to setting her war-ravaged country free. Wrenched from everything she knows, Alina is whisked away to the royal court to be trained as a member of the Grisha, the magical elite led by the mysterious Darkling.Yet nothing in this lavish world is what it seems. With darkness looming and an entire kingdom depending on her untamed power, Alina will have to confront the secrets of the Grisha . . . and the secrets of her heart.",https://www.goodreads.com/book/show/10194157-shadow-and-bone
Tears of Tess,"“My life was complete. Happy, content, everything neat and perfect.Then it all changed.I was sold.”Tess Snow has everything she ever wanted: one more semester before a career in property development, a loving boyfriend, and a future dazzling bright with possibility. For their two year anniversary, Brax surprises Tess with a romantic trip to Mexico. Sandy beaches, delicious cocktails, and soul-connecting sex set the mood for a wonderful holiday. With a full heart, and looking forward to a passion filled week, Tess is on top of the world.But lusty paradise is shattered.Kidnapped. Drugged. Stolen. Tess is forced into a world full of darkness and terror. Captive and alone with no savior, no lover, no faith, no future, Tess evolves from terrified girl to fierce fighter. But no matter her strength, it can’t save her from the horror of being sold.Can Brax find Tess before she’s broken and ruined, or will Tess’s new owner change her life forever?A New Adult Dark Contemporary Romance, not suitable for people sensitive to grief, slavery, and nonconsensual sex. A story about finding love in the strangest of places, a will of iron that grows from necessity, and forgiveness that may not be enough.",https://www.goodreads.com/book/show/18134894-tears-of-tess
Written In Blood (Book One Of The Unnatural Brethren),"“His silhouette immersed deep in the shadows of the room, sitting in the chair in the farthest corner from where I lay. A stray ray of light filtered into the pond of darkness where he skulked, and then I caught a glimpse of his vacant eyes, their surface white and opaque and devoid of any speck of life.”—Silvana G. Sánchez, WRITTEN IN BLOOD.Ivan Lockhart cheated death for the last time.When Ivan escapes a fatal tragedy and another dies in his stead, victory leads to damnation. Haunted by the ghost of his guilt, desperate to break away from his past, Lockhart flees seventeenth-century Winterbourne and embarks to the city of Venice, where ancient secrets sleep and legends rise from their graves.Poveglia, an island cast deep in the shadows of the Venetian horizon, conceals a rare lineage of immortals, the Sartie Mangiatori, obscure creatures that feed on human blood—Vampire’s predecessors.Lockhart's unforgettable journey will offer more than he ever expected as he travels through the darkest corners of Europe's elite destinations in a pilgrimage to the remotest depths of the human heart.",https://www.goodreads.com/book/show/33383034-written-in-blood
Sire,"“Because love is the easiest way to control someone and I needed a way to control you.” Still mending a broken heart, Matthew has spent a year avoiding incubi and their army of hunters. Now he has a new goal: lift the shroud on his past. Why had he been turned and abandoned by his sire? Why didn’t he feel the initial bond between sire and child that vampires experience? Who were his parents, and why had they given him up at birth? Frustrated by dead ends, his luck changes when a teenage girl tracks him down. She’s strange, socially awkward, and she can see the future. Matthew’s future. She’ll help him if he agrees to save her from the darkness…from Tarrick. With the incubi armies closing in, a mysterious protector appears, swearing his fealty to Matthew. He’s ancient, seductive, and cursed—unable to explain why he keeps calling Matthew ‘my prince’.",https://www.goodreads.com/book/show/32721063-sire
Twisted Together,"“After battling through hell, I brought my esclave back from the brink of ruin. I sacrificed everything—my heart, my mind, my very desires to bring her back to life. And for a while, I thought it broke me, that I’d never be the same. But slowly the beast is growing bolder, and it’s finally time to show Tess how beautiful the dark can be.” Q gave everything to bring Tess back. In return, he expects nothing less. Tess may have leashed and tamed him, but he’s still a monster inside. After surviving the darkness, a new dawn has begun. Twisted Together wades through black to grey, chasing the light of true love to banish the shadows forever. Pain is a requirement, connection a necessity. But ultimately Q and Tess must face their demons, before they can embrace their future.",https://www.goodreads.com/book/show/19015729-twisted-together
The Holy War,"A Masterful Spiritual Classic Once upon a time, the residents of the town of Mansoul were tricked into defying their ruler, Shaddai. Their new ruler, Diabolus, brought them great harm. When Shaddai sends Prince Emmanuel, his son, to rescue them, a great battle is fought. Who will emerge victorious—Diabolus or Emmanuel? And what can the inhabitants of Mansoul do to resist the attacks of the evil one? From the author of The Pilgrim’s Progress comes a powerful allegory about the battle being fought for man’s heart, mind, and spirit. Your soul is under attack from the forces of evil. Through this compelling read, you will learn how to build up your defenses, flood your moat, and prepare for victory in the war against Satan and the forces of darkness!",https://www.goodreads.com/book/show/230877.The_Holy_War
Dance with the Devil,"Zarek’s Point of View:Dark-Hunter: A soulless guardian who stands between mankind and those who would see mankind destroyed. Yeah, right. The only part of that Code of Honor I got was eternity and solitude.Insanity: A condition many say I suffer from after being alone for so long. But I don’t suffer from my insanity-I enjoy every minute of it.Trust: I can’t trust anyone…not even myself. The only thing I trust in is my ability to do the wrong thing in any situation and to put a hurt on anyone who gets in my way.Truth: I endured a lifetime as a Roman slave, and 900 years as an exiled Dark-Hunter. Now I’m tired of enduring. I want the truth about what happened the night I was exiled-I have nothing to lose and everything to gain.Astrid (Greek, meaning star): An exceptional woman who can see straight to the truth. Brave and strong, she is a point of light in the darkness. She touches me and I tremble. She smiles and my cold heart shatters.Zarek: They say even the most damned man can be forgiven. I never believed that until the night Astrid opened her door to me and made this feral beast want to be human again. Made me want to love and be loved. But how can an ex-slave whose soul is owned by a Greek goddess ever dream of touching, let alone holding, a fiery star?",https://www.goodreads.com/book/show/84145.Dance_with_the_Devil
Fire Bringer,"Young buck Rannoch was born on the night his father was murdered and into a herd of deer where hunger for power has gradually whittled away at all that is true and good. He knows he must escape to survive. Chased by stags, with their fearsome antlers sharpened for the kill, he begins a treacherous journey into the unknown, and ahead of him lies a shocking and formidable search for truth and goodwill in the shadow of the Great Mountain. One day he will have to return to his home and face his destiny among the deer to fulfill the prophecy that has persistently given them hope: that one day a fawn will be born with the mark of an oak leaf on his forehead and that fawn's courage will lead all the deer to freedom. Filled with passion and a darkness that gradually, through Rannoch's courage in the face of adversity, lifts to reveal an overwhelming feeling of light, Fire Bringer is a tremendous, spirited story that takes the reader deep into the hearts and minds of its characters as they fight for their right to live in peace.",https://www.goodreads.com/book/show/58087.Fire_Bringer
Volition,"You know that feeling in the pit of your stomach that stays with you, then tears you apart slowly at first, and all at once shreds every fiber of your being? It’s because you’re contradicting the universe. Everything lines up so perfectly that you couldn’t have imagined it to work out better, but then you have to go and do everything humanly possible to ruin it because you can’t stand to have it go right? That’s what I did. I did it because there’s a darkness that surrounds me, and I think I want it there. My name is Tate McKenna, and my soul is blacker than my heart.",https://www.goodreads.com/book/show/24307444-volition
Simple Perfection,"Woods had his perfect life mapped out for him. Rise up the ranks of the family business. Marry the rich girl of his parents’ dreams. Pretend that wealth and privilege was all he’d ever wanted. Then a girl named Della breezed into town, a beautifully imperfect stranger who captured his heart and opened his eyes to a new kind of future. Woods is ready and willing to sacrifice everything for her when the sudden death of his father leaves him with his mother to care for and a business to manage.Della is determined to be strong for Woods, even as she’s quietly falling to pieces. No matter how far from home she’s run, the ghosts of her past have never stopped haunting her. Struggling to hide her true feelings from Woods, Della fears she can’t be his rock without dragging him down into the darkness with her. But is she strong enough to let go of the last thing holding her together?",https://www.goodreads.com/book/show/17449417-simple-perfection
