**Loading Libraries**

In [1]:
import pandas as pd
import tiktoken
import os
import openai

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq.chat_models import ChatGroq
from langchain.prompts import PromptTemplate

**Data Preprocessing**

In [2]:
anime = pd. read_csv('G:\\youtube_transcript\\anime_with_synopsis\\anime_with_synopsis.csv')
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [3]:
#Remove NA's
anime = anime.dropna()

In [4]:
#Combine title, synopsis, and Genre
anime['combined_info'] = anime.apply(lambda row: f"Title: {row['Name']}. Overview: {row['sypnopsis']} Genres: {row['Genres']}", axis=1)
anime['combined_info'][0]

'Title: Cowboy Bebop. Overview: In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as "Cowboys." The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member\'s dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebo

In [5]:
#Save processed dataset - combined_info for Langchain
anime[['combined_info']].to_csv('G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv', index=False)

In [6]:
pd.read_csv('G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv')

Unnamed: 0,combined_info
0,Title: Cowboy Bebop. Overview: In the year 207...
1,Title: Cowboy Bebop: Tengoku no Tobira. Overvi...
2,Title: Trigun. Overview: Vash the Stampede is ...
3,Title: Witch Hunter Robin. Overview: ches are ...
4,Title: Bouken Ou Beet. Overview: It is the dar...
...,...
16201,Title: Daomu Biji Zhi Qinling Shen Shu. Overvi...
16202,Title: Mieruko-chan. Overview: ko is a typical...
16203,Title: Higurashi no Naku Koro ni Sotsu. Overvi...
16204,Title: Yama no Susume: Next Summit. Overview: ...


**Data Loader and Vector store using Langchain**

In [7]:
loader = CSVLoader(file_path="G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv",encoding='UTF-8')
data = loader.load()

In [8]:
#data loader
loader = CSVLoader(file_path="G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv",encoding='UTF-8')
data = loader.load()

#data transformers
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

#embeddings model, this can be a local LLM as well
embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
#Vector DB
docsearch = Chroma.from_documents(texts, embeddings,collection_name="recommendation_learning")

**Querying Vector DB Store for movie recommendation**

In [9]:
query = "I'm looking for an animated action movie. What could you suggest to me?"
docs = docsearch.similarity_search(query, k=1)
docs

[Document(page_content="combined_info: Title: Ragnarök The Animation. Overview: great evil is sweeping over the realm; an evil that the young swordsman Roan and his life-long companion, the acolyte Yufa, must face head on! For these two travel toward their destiny, from the highest towers to the depths of the underworld, through forest and desert alike. With an ever-growing cast of fellow heroes, fate will grasp these travelers by their very souls and propel the band of skilled adventurers towards a noble end. Or ignoble, if they don't watch their step! Monsters are afoot and the way rife with danger and magic, the path forward may be unclear... But where will is strong, there is a way! Lessons wait in the depths of darkness, and good must prevail. The journey starts now! (Source: FUNimation Entertainment) Genres: Action, Magic, Fantasy", metadata={'row': 379, 'source': 'G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv'})]

**Using QA Retrieval for movie recommendation**

In [None]:
# import os
# os.environ['OPENAI_API_KEY'] = api_key

In [10]:
llm = ChatGroq(api_key="")

In [11]:
qa = RetrievalQA.from_chain_type(llm,
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=True)

In [12]:
query = "I'm looking for an action anime. What could you suggest to me?"
result = qa({"query": query})
result['result']

  warn_deprecated(


"Based on the information provided, I can suggest a few action-packed anime that match your interest.\n\n1. Nintama Rantarou no Koutsuu Anzen - Although this anime is targeted at kids and has a focus on traffic safety, it does involve martial arts, which could include action scenes.\n2. Sakura Kakumei: Hanasaku Otome-tachi - Given its genres of Action and Mecha, this anime could offer the action you're looking for. It's based on a popular RPG and should provide an engaging experience.\n3. Arad Senki: Slap Up Party - This anime combines Action, Adventure, Comedy, and Fantasy genres, making it a strong candidate for action-filled entertainment. Its unique premise and possessed arm element should offer an exciting watch.\n\nGive these anime a try and see if they fulfill your action anime cravings!"

In [13]:
result['source_documents'][0]

Document(page_content='combined_info: Title: Nintama Rantarou no Koutsuu Anzen. Overview: affic safety anime starring the cast of Nintama Rantarou . Genres: Kids, Martial Arts', metadata={'row': 14293, 'source': 'G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv'})

**Prompt Engineering**

**First Template**

In [15]:
template = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, suggest three anime, with a short description of the plot and the reason why the user migth like it.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Your response:"""


PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}

llm = ChatGroq(api_key="")

qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

Based on your interest in an action anime with animals, here are three suggestions:

1. Hamuko Mairu! - This anime follows the adventures of a shinobi who sets out to rescue kidnapped rare animals. With a mix of action and comedy, this anime could be a great fit for you.

2. Sengoku Choujuu Giga: Kou - This anime describes the history of Sengoku warriors using animals. With genres such as historical, demons, supernatural, and samurai, this anime offers a unique take on action and history.

3. Fushigina Ano Ko wa Sutekina Kono Ko - While this anime is primarily targeted towards toddlers, it follows animals and their adventures. With the adventure genre and animal theme, this anime could be a good fit for you.


**Second Template** - Provinding additional user info in the context

In [16]:
template_prefix = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}"""

template_suffix= """Question: {question}
Your response:"""

user_info = user_info.format(age = 18, gender = 'female')

COMBINED_PROMPT = template_prefix +'\n'+ user_info +'\n'+ template_suffix
print(COMBINED_PROMPT)

You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
This is what we know about the user, and you can use this information to better tune your research:
Age: 18
Gender: female
Question: {question}
Your response:


In [17]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

Based on the information you've provided, I would recommend "Hamuko Mairu!". It's an anime that features a shinobi on a mission to rescue kidnapped rare animals, which seems to match your preference for action and animals. The genres for this anime are Adventure and Comedy, and it's suitable for viewers aged 18 and above.


In [18]:
result['source_documents']

[Document(page_content='combined_info: Title: Hamuko Mairu!. Overview: shinobi sets out to rescue kidnapped rare animals. Genres: Adventure, Comedy', metadata={'row': 7143, 'source': 'G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv'}),
 Document(page_content='combined_info: Title: Anime Kapibara-san. Overview: The series follows the daily adventures of Kapibarasan and his animal friends as they chill out among the Midorino Grasslands. (Source: Crunchyroll) Genres: Comedy, Kids', metadata={'row': 15680, 'source': 'G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv'}),
 Document(page_content="combined_info: Title: Fushigina Ano Ko wa Sutekina Kono Ko. Overview: NHK toddler's anime following animals and their adventures. Genres: Adventure, Kids", metadata={'row': 9309, 'source': 'G:\\youtube_transcript\\anime_with_synopsis\\anime_updated.csv'}),
 Document(page_content='combined_info: Title: Sengoku Choujuu Giga: Kou. Overview: The history, either daily life or l