In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("books.csv")
print(df.head())

          isbn13      isbn10           title subtitle  \
0  9780002005883  0002005883          Gilead      NaN   
1  9780002261982  0002261987    Spider's Web  A Novel   
2  9780006163831  0006163831    The One Tree      NaN   
3  9780006178736  0006178731  Rage of angels      NaN   
4  9780006280897  0006280897  The Four Loves      NaN   

                           authors                     categories  \
0               Marilynne Robinson                        Fiction   
1  Charles Osborne;Agatha Christie  Detective and mystery stories   
2             Stephen R. Donaldson               American fiction   
3                   Sidney Sheldon                        Fiction   
4              Clive Staples Lewis                 Christian life   

                                           thumbnail  \
0  http://books.google.com/books/content?id=KQZCP...   
1  http://books.google.com/books/content?id=gA5GP...   
2  http://books.google.com/books/content?id=OmQaw...   
3  http://books.go

In [3]:
#take row and turn it into textual_representation 
def textual_representation(row):
    textual_representation = f"""Title: {row["title"]}
Authors: {row["authors"]}
Description: {row["description"]}
Categories: {row["categories"]}
Publishing Year: {row["published_year"]}
Average Rating: {row["average_rating"]}
Number of Pages: {row["num_pages"]}
"""
    
    return textual_representation

In [4]:
print(df.iloc[:5].apply(textual_representation, axis=1).values[0])

Title: Gilead
Authors: Marilynne Robinson
Description: A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration

In [42]:
#apply to all individual rows 
df["textual_representation"] = df.apply(textual_representation, axis = 1)
df["textual_representation"]

#save to a new csv 
df.to_csv("books_with_textual.csv", index=False)

In [22]:
#make embedding and put in vector store
import faiss
import requests
import numpy as np 

dim = 4096 #dim of response that we get from llama 2
index = faiss.IndexFlatL2(dim)

X = np.zeros((len(df["textual_representation"]), dim), dtype="float32") #init input of zeros

In [7]:
#take the representation and make a request
for i, representation in enumerate(df["textual_representation"]):
    if i % 100 == 0:
        print(i) 
        
    res = requests.post("http://localhost:11434/api/embeddings",
                        json={
                            "model" : "llama2", 
                            "prompt" : representation
                        })
    embedding = res.json()["embedding"]

    X[i] = np.array(embedding)

index.add(X)

#save end results so that no need to run this code over and over again 
faiss.write_index(index, "vector_index.faiss")

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800


In [28]:
#load the saved FAISS index
index = faiss.read_index("vector_index.faiss")

In [29]:
#provide new instance and give the most similar instance 
df[df.title.str.contains("Magic")]

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,textual_representation
244,9780060747473,0060747471,This Rough Magic,,Mary Stewart,Fiction,http://books.google.com/books/content?id=BjFn9...,"A fledgling actress vacationing in Corfu, Lucy...",2004.0,4.05,373.0,4966.0,Title: This Rough Magic\nAuthors: Mary Stewart...
1128,9780156119078,0156119072,Big Anthony and the Magic Ring,,Tomie DePaola,Juvenile Fiction,http://books.google.com/books/content?id=U5sYv...,When Big Anthony borrows Strega Nona's magic r...,1979.0,4.1,32.0,443.0,Title: Big Anthony and the Magic Ring\nAuthors...
2039,9780374525866,0374525862,The Magic Barrel,Stories,Bernard Malamud,Fiction,http://books.google.com/books/content?id=Wmcxu...,Winner of the National Book Award for Fiction ...,2003.0,4.0,240.0,1898.0,Title: The Magic Barrel\nAuthors: Bernard Mala...
2591,9780425187715,0425187713,The Magical Worlds of the Lord of the Rings,"The Amazing Myths, Legends, and Facts Behind t...",David Colbert,Fiction,http://books.google.com/books/content?id=YLeKU...,Offers information about Tolkien's realm of Mi...,2002.0,4.08,208.0,2053.0,Title: The Magical Worlds of the Lord of the R...
2594,9780425190371,0425190374,Practical Magic,,Alice Hoffman,Fiction,http://books.google.com/books/content?id=7hHCk...,Sorcery is the legacy of Gillian and Sally Owe...,2003.0,3.79,286.0,49141.0,Title: Practical Magic\nAuthors: Alice Hoffman...
2685,9780439411875,0439411874,"The Girl, the Dragon, and the Wild Magic",,Dave Luckett,Juvenile Fiction,http://books.google.com/books/content?id=1wpcG...,"Failing out of magic school, Rhianna meets a w...",2003.0,3.86,119.0,450.0,"Title: The Girl, the Dragon, and the Wild Magi..."
2841,9780441014897,0441014895,Magic Bites,,Ilona Andrews,Fiction,http://books.google.com/books/content?id=46yJD...,"Earning a living cleaning up magical messes, m...",2007.0,4.07,260.0,82231.0,Title: Magic Bites\nAuthors: Ilona Andrews\nDe...
2957,9780449702024,0449702022,Winter of Magic's Return,,Pamela F. Service,Fantasy.,,In a time five hundred years after a nuclear h...,1986.0,4.25,194.0,227.0,Title: Winter of Magic's Return\nAuthors: Pame...
3427,9780553267600,0553267604,Magician,Apprentice,Raymond E. Feist,Apprentices,http://books.google.com/books/content?id=KyqdO...,For use in schools and libraries only. Once he...,1985.0,4.18,336.0,611.0,Title: Magician\nAuthors: Raymond E. Feist\nDe...
3503,9780553564938,0553564935,Magician,Master,Raymond E. Feist,Fiction,http://books.google.com/books/content?id=uuuVP...,"As a captive slave of the Tsurani, warlike inv...",1993.0,4.26,499.0,57779.0,Title: Magician\nAuthors: Raymond E. Feist\nDe...


In [30]:
favorite_book = df.iloc[2685]
print(favorite_book["textual_representation"])

Title: The Girl, the Dragon, and the Wild Magic
Authors: Dave Luckett
Description: Failing out of magic school, Rhianna meets a wizard who realizes she is a Wild Talent, possessing a difficult-to-manage power, and when a dragon comes to town, it's up to Rhianna to master her craft or face certain doom.
Categories: Juvenile Fiction
Publishing Year: 2003.0
Average Rating: 3.86
Number of Pages: 119.0



In [34]:
res = requests.post("http://localhost:11434/api/embeddings",
                        json={
                            "model" : "llama2", 
                            "prompt" : favorite_book["textual_representation"] #can also make it my own string
                        })

In [35]:
print(res.json())
embedding = np.array([res.json()["embedding"]], dtype="float32")

{'embedding': [0.5002245306968689, -1.0056668519973755, 1.3424986600875854, -0.4115658104419708, -0.004227227531373501, 0.5636347532272339, -0.6240122318267822, 0.685651957988739, 3.4620261192321777, -0.17785681784152985, 0.08906105905771255, -0.6517775654792786, 0.9217854142189026, 1.3584141731262207, -1.2841718196868896, -1.196996808052063, -1.678426742553711, -0.550056517124176, 0.25480780005455017, -0.3421773612499237, -0.42415672540664673, 0.01689789816737175, -1.5347542762756348, -1.330795407295227, -0.3269815444946289, -2.156529426574707, -0.4813421070575714, -2.0597269535064697, -0.5541914701461792, -0.47457826137542725, -0.5748680233955383, 1.5514315366744995, 1.8155570030212402, -0.04716775193810463, 0.7818975448608398, -1.6163265705108643, 0.09018485993146896, -0.32100096344947815, 0.45726248621940613, 1.2853084802627563, -0.67437344789505, -1.2739698886871338, -0.09354409575462341, -0.46244215965270996, -3.0572659969329834, -1.2328863143920898, 0.3112083673477173, -1.064044

In [36]:
#search for similarity
D, I = index.search(embedding, 5)

In [37]:
best_matches = np.array(df["textual_representation"])[I.flatten()]

In [38]:
for match in best_matches:
    print(match)
    print()

Title: The Girl, the Dragon, and the Wild Magic
Authors: Dave Luckett
Description: Failing out of magic school, Rhianna meets a wizard who realizes she is a Wild Talent, possessing a difficult-to-manage power, and when a dragon comes to town, it's up to Rhianna to master her craft or face certain doom.
Categories: Juvenile Fiction
Publishing Year: 2003.0
Average Rating: 3.86
Number of Pages: 119.0


Title: The Drift House
Authors: Dale Peck
Description: Sent to stay with their uncle in a ship-like home called Drift House, twelve-year-old Susan and her stepbrothers embark on an adventure involving duplicitous mermaids, pirates, and an attempt to stop time forever.
Categories: Juvenile Fiction
Publishing Year: 2006.0
Average Rating: 3.64
Number of Pages: 437.0


Title: The Orange Trees of Versailles
Authors: Annie Pietri
Description: In the 1670s, fourteen-year-old Marion, who has a talent for making perfumes, gets the chance to serve Louis XIV's mistress at the palace of Versailles, whe

In [39]:
#make it into a function to take the user input details of the book they want to find a similar book to
def recommender(textual_representation):
    res = requests.post("http://localhost:11434/api/embeddings",
                        json={
                            "model" : "llama2", 
                            "prompt" : textual_representation
                        })
    embedding = np.array([res.json()["embedding"]], dtype="float32")
    D, I = index.search(embedding, 5)
    best_matches = np.array(df["textual_representation"])[I.flatten()]
    return best_matches
    