In [1]:
!pip -q install chromadb==0.5.3 langchain openai tiktoken

In [2]:
import chromadb

print(chromadb.__version__)

0.5.3


**Loading Libraries**

In [3]:
import pandas as pd
import tiktoken
import os
import openai

# from openai.embeddings_utils import get_embedding

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.csv_loader import CSVLoader

In [4]:
from openai import OpenAI
client = OpenAI(api_key="YOUR_API_KEY")

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input = [text], 
                                    model=model).data[0].embedding

# df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))

**Data Preprocessing**

In [5]:
anime = pd.read_csv('./data/anime_with_synopsis.csv')
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [6]:
#Remove NA's
anime = anime.dropna()

In [7]:
#Combine title, synopsis, and Genre
anime['combined_info'] = anime.apply(lambda row: f"Title: {row['Name']}. Overview: {row['sypnopsis']} Genres: {row['Genres']}", axis=1)
anime['combined_info'][0]

'Title: Cowboy Bebop. Overview: In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as "Cowboys." The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member\'s dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebo

In [8]:
#Save processed dataset - combined_info for Langchain
anime[['combined_info']].to_csv('./data/anime_updated.csv', index=False)

In [9]:
pd.read_csv('./data/anime_updated.csv')

Unnamed: 0,combined_info
0,Title: Cowboy Bebop. Overview: In the year 207...
1,Title: Cowboy Bebop: Tengoku no Tobira. Overvi...
2,Title: Trigun. Overview: Vash the Stampede is ...
3,Title: Witch Hunter Robin. Overview: ches are ...
4,Title: Bouken Ou Beet. Overview: It is the dar...
...,...
16201,Title: Daomu Biji Zhi Qinling Shen Shu. Overvi...
16202,Title: Mieruko-chan. Overview: ko is a typical...
16203,Title: Higurashi no Naku Koro ni Sotsu. Overvi...
16204,Title: Yama no Susume: Next Summit. Overview: ...


**Data Loader and Vector store using Langchain**

In [10]:
# api_key = 'OPENAI_API_KEY' #Put your open ai api key

import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

In [12]:
#data loader
loader = CSVLoader(file_path="./data/anime_updated.csv")
data = loader.load()

#data transformers
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

#embeddings model, this can be a local LLM as well
embeddings = OpenAIEmbeddings()

llm = OpenAI()

#Vector DB
docsearch = Chroma.from_documents(texts, embeddings)

  warn_deprecated(


**Querying Vector DB Store for movie recommendation**

In [13]:
query = "I'm looking for an animated action movie. What could you suggest to me?"
docs = docsearch.similarity_search(query, k=1)
docs

[Document(metadata={'row': 6623, 'source': './data/anime_updated.csv'}, page_content='combined_info: Title: Super Samchongsa. Overview: Korean animated movie about three kids protecting humanity along with their giant robot. Genres: Action, Space, Mecha, Shounen')]

**Using QA Retrieval for movie recommendation**

In [14]:
import os

# os.environ['OPENAI_API_KEY'] = api_key

In [15]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

  warn_deprecated(


In [16]:
qa = RetrievalQA.from_chain_type(llm,
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=True)

In [17]:
query = "I'm looking for an action anime. What could you suggest to me?"
result = qa({"query": query})
result['result']

  warn_deprecated(


'I would suggest "Ikkitousen: Shuugaku Toushi Keppuuroku" or "Mirai Kara Kita Shounen Super Jetter" based on the genres of action, comedy, and super power. Both of these anime series seem to have a good amount of action and entertainment.'

In [18]:
result['source_documents'][0]

Document(metadata={'row': 5692, 'source': './data/anime_updated.csv'}, page_content='combined_info: Title: Ikkitousen: Shuugaku Toushi Keppuuroku. Overview: Ikkitousen OVA. Genres: Action, Ecchi, Martial Arts, Super Power, School')

**Prompt Engineering**

**First Template**

In [19]:
from langchain.prompts import PromptTemplate

template = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, suggest three anime, with a short description of the plot and the reason why the user migth like it.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Your response:"""


PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}

llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0) 

qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

1. Daisetsusan no Yuusha Kibaou - This anime follows the story of Fang, a wild animal born to a hunting dog and a European wolf, as he faces off against a giant brown bear that killed his family. The naturalistic setting and intense animal action make it a thrilling watch for fans of action anime with animals.

2. Wan Wan Chuushingura - In this anime, the homeless dog Rock seeks revenge on a tiger named Killer who killed his mother. With the support of other dogs in town, Rock sets out on an action-packed journey of vengeance. The fantasy elements and intense drama make it a great choice for those looking for action anime with animals.

3. SOS Kochira Chikyuu - While not a traditional action anime, this film focuses on a group of animals coming together to save the world from human war and nuclear disaster. The drama and emotional depth of the story, combined with the unique puppet animation style, make it a compelling watch for fans of action anime with animals.


**Second Template** - Provinding additional user info in the context

In [20]:
from langchain.prompts import PromptTemplate

template_prefix = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}"""

template_suffix= """Question: {question}
Your response:"""

user_info = user_info.format(age = 18, gender = 'female')

COMBINED_PROMPT = template_prefix +'\n'+ user_info +'\n'+ template_suffix
print(COMBINED_PROMPT)

You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
This is what we know about the user, and you can use this information to better tune your research:
Age: 18
Gender: female
Question: {question}
Your response:


In [21]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

I recommend checking out "Wan Wan Chuushingura." It's an action anime that follows the story of a homeless dog seeking revenge on a tiger who killed his mother, with other dogs in town joining forces to support him. It has elements of adventure and drama as well.


In [22]:
result['source_documents']

[Document(metadata={'row': 15680, 'source': './data/anime_updated.csv'}, page_content='combined_info: Title: Anime Kapibara-san. Overview: The series follows the daily adventures of Kapibarasan and his animal friends as they chill out among the Midorino Grasslands. (Source: Crunchyroll) Genres: Comedy, Kids'),
 Document(metadata={'row': 9240, 'source': './data/anime_updated.csv'}, page_content="combined_info: Title: SOS Kochira Chikyuu. Overview: Based on the children's book Doubutsu Kaigi (Animal Conference). The film focuses on a human war (and possible nuclear war) is affecting the animals and how the animals have a conference to figure out how to save the world and get the humans to comply. The film is done entirely in puppet animation. Genres: Drama, Kids"),
 Document(metadata={'row': 3858, 'source': './data/anime_updated.csv'}, page_content='combined_info: Title: Daisetsusan no Yuusha Kibaou. Overview: The conflicting fates of human beings and wild animals are depicted in a natur