# Implementing an LLM-powered recommendation system

## Data Preprocessing

In [9]:
 
 
import pandas as pd
import tiktoken

import lancedb

from openai import OpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

from langchain.chains import RetrievalQA
 



PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.6/u/root-validator-pre-skip

In [2]:
anime = anime.dropna()

In [4]:

anime['combined_info'] = anime.apply(lambda row: f"Title: {row['Name']}. Overview: {row['sypnopsis']} Genres: {row['Genres']}", axis=1)
anime['combined_info'][0]



"Title: Monster. Overview: Dr. Kenzou Tenma, an elite neurosurgeon recently engaged to his hospital director's daughter, is well on his way to ascending the hospital hierarchy. That is until one night, a seemingly small event changes Dr. Tenma's life forever. While preparing to perform surgery on someone, he gets a call from the hospital director telling him to switch patients and instead perform life-saving brain surgery on a famous performer. His fellow doctors, fiancée, and the hospital director applaud his accomplishment; but because of the switch, a poor immigrant worker is dead, causing Dr. Tenma to have a crisis of conscience. So when a similar situation arises, Dr. Tenma stands his ground and chooses to perform surgery on the young boy Johan Liebert instead of the town's mayor. Unfortunately, this choice leads to serious ramifications for Dr. Tenma—losing his social standing being one of them. However, with the mysterious death of the director and two other doctors, Dr. Tenma's

## Embeddings

In [5]:
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191

encoding = tiktoken.get_encoding(embedding_encoding)

# omit descriptions that are too long to embed
anime["n_tokens"] = anime.combined_info.apply(lambda x: len(encoding.encode(x)))
anime = anime[anime.n_tokens <= max_tokens]
len(anime)

16206

In [6]:
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,combined_info,n_tokens
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...",Title: Cowboy Bebop. Overview: In the year 207...,245
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...",Title: Cowboy Bebop: Tengoku no Tobira. Overvi...,199
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...",Title: Trigun. Overview: Vash the Stampede is ...,252
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,Title: Witch Hunter Robin. Overview: ches are ...,125
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,Title: Bouken Ou Beet. Overview: It is the dar...,188


In [8]:
def get_embedding(text, model="text-embedding-3-small"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input=[text], model=model).data[0].embedding


anime["embedding"] = anime.combined_info.apply(lambda x: get_embedding(x, model=embedding_model))
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,combined_info,n_tokens,embedding
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...",Title: Cowboy Bebop. Overview: In the year 207...,245,"[0.00921056978404522, -0.012633174657821655, 0..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...",Title: Cowboy Bebop: Tengoku no Tobira. Overvi...,199,"[-0.008109764195978642, -0.028518257662653923,..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...",Title: Trigun. Overview: Vash the Stampede is ...,252,"[0.0019446373917162418, -0.001545737381093204,..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,Title: Witch Hunter Robin. Overview: ches are ...,125,"[-0.014938411302864552, 0.007340028416365385, ..."
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,Title: Bouken Ou Beet. Overview: It is the dar...,188,"[0.010889030061662197, 0.0069219209253787994, ..."


In [9]:
anime.rename(columns = {'embedding': 'vector'}, inplace = True)
anime.rename(columns = {'combined_info': 'text'}, inplace = True)
anime.to_pickle('data/anime.pkl')

## Start working with LLMs

In [2]:
uri = "dataset/sample-anime-lancedb"

db = lancedb.connect(uri)
table = db.create_table("anime", anime)

# embeddings = OpenAIEmbeddings(engine="text-embedding-ada-002")
embeddings = OpenAIEmbeddings(
    deployment="SL-document_embedder",
    model="text-embedding-ada-002",
    show_progress_bar=True,
    openai_api_key=openai_api_key)

docsearch = LanceDB(connection=table, embedding=embeddings)

# simple similarity computation
# query = "I'm looking for an animated action movie. What could you suggest to me?"
# docs = docsearch.similarity_search(query, k=1)

llm = ChatOpenAI(
    model_name="gpt-3.5-turbo-1106",
    temperature=0,
    api_key=openai_api_key
)

## Prompt engineering

In [32]:
# without prompt
# qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(),
#                                        return_source_documents=True)

# let’s say we are only interested in anime that, among their genres, are tagged as “Action”.
# df_filtered = anime[anime['Genres'].apply(lambda x: 'Action' in x)]
# qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",
#                                        retriever=docsearch.as_retriever(search_kwargs={'data': df_filtered}),
#                                        return_source_documents=True)

# define custom prompt
template = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, suggest three anime, with a short description of the plot and the reason why the user migth like it.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Your response:"""

PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff",
                                       retriever=docsearch.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime. What could you suggest to me?"

# Query and Response
with get_openai_callback() as cb:
    result = qa_chain({"query": query})

print(result['result'])


1. Urikupen Kyuujo-tai: This adventure comedy follows a team of brave young animals that rescues others in peril. With a dog, a boar, a deer, a koala, a mouse, a seagull, and a lion, this show is sure to please those looking for an action anime featuring animals.

2. Nekketsu Jinmen Inu: Life Is Movie: This parody follows a passionate human-faced dog NEET/would be detective in his adventures. Fans of action anime with animals are sure to be engaged by this mystery story.

3. Daisetsusan no Yuusha Kibaou: The main character of this drama is Fang, who was born to a hunting dog and a circus-runaway European wolf. Fang returns from the circus to face his foe, a giant brown bear which killed his family, making this story a great pick for those looking for an action anime with animals.
