In [1]:
import re

import pandas as pd

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

from langchain.document_loaders.csv_loader import CSVLoader

from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import numpy as np

### Anime name DB


In [2]:
def create_anime_name_csv():
    df = pd.read_csv("../data/anime_list.csv")
    df = df[["Name"]]

    for col in df.columns:
        df[col] = df[col].str.lower()

    df.to_csv("../data/anime_name_db.csv", index=False)

In [3]:
def create_anime_name_db():
    loader = CSVLoader(file_path="../data/anime_name_db.csv")
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()
    db = FAISS.from_documents(docs, embeddings)
    db.save_local("../data/anime_name_db")

In [4]:
def check_similarity_item(db, input):
    input = input.lower()
    sim = db.similarity_search_with_score(query=input, k=1)

    anime_name = sim[0][0].page_content.split("\n")[0]
    match = re.search(r"Name: (.*)", anime_name)
    # match = re.search(r"Japanese_name: (.*)", anime_name)

    if match:
        anime_name = match.group(1)
    else:
        anime_name = np.nan
    sim_value = sim[0][1]

    print(anime_name, sim_value)

In [5]:
create_anime_name_csv()

In [6]:
create_anime_name_db()

In [7]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
anime_name_db = FAISS.load_local("../data/anime_name_db", embeddings)

In [8]:
check_anime_list = [
    "Angel Beats",
    "魔法使いプリキュア",
    "Mahoutsukai Precure!",
    "狼と香辛料",
    "Ookami to Koushinryo",
    "鬼滅の刃",
    "Kimetsu no Yaiba",
]

In [9]:
for anime in check_anime_list:
    check_similarity_item(anime_name_db, anime)

angel beats! 0.2101283
mahou shoujo madoka★magica 0.30650908
mahoutsukai precure! 0.099646196
ikkitousen: western wolves 0.36210635
ookami to koushinryou 0.16843092
queen's blade: gyokuza wo tsugu mono 0.31180137
kimetsu no yaiba 0.12329282


### Anime Information DB


In [10]:
def create_anime_info_text():
    df = pd.read_csv("../data/anime_list.csv")
    # df = df[["Name", "English_name", "Japanese_name", "Genres", "sypnopsis"]]
    df = df[["Name", "Genres", "sypnopsis"]]

    for col in df.columns:
        df[col] = df[col].str.lower()
    df.to_csv("../data/anime_info_db.csv", index=False)

    # # 名前と概要欄を分け、1つのテキストにまとめる
    # # Nameと対にする
    # anime_info_list = []
    # en = df[["Name", "English_name"]].to_dict(orient="records")
    # ja = df[["Name", "Japanese_name"]].to_dict(orient="records")
    # genere = df[["Name", "Genres"]].to_dict(orient="records")
    # sypnopsis = df[["Name", "sypnopsis"]].to_dict(orient="records")

    # anime_info_list.extend(en)
    # anime_info_list.extend(ja)
    # anime_info_list.extend(genere)
    # anime_info_list.extend(sypnopsis)

    # with open("../data/anime_info_db.txt", "w") as file:
    #     for item in anime_info_list:
    #         for key, value in item.items():
    #             file.write(f"{key}: {value}, ")
    #         file.write("\n")

In [11]:
def create_anime_info_db():
    # loader = TextLoader(file_path="../data/anime_info_db.txt")
    loader = CSVLoader(file_path="../data/anime_info_db.csv")
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100)
    docs = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()
    db = FAISS.from_documents(docs, embeddings)
    db.save_local("../data/anime_info_db")

In [12]:
create_anime_info_text()

In [13]:
create_anime_info_db()

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
anime_info_db = FAISS.load_local("../data/anime_info_db", embeddings)

In [29]:
llm = ChatOpenAI(model="gpt-4")
chain = RetrievalQA.from_chain_type(
    llm,
    retriever=anime_info_db.as_retriever(),
)

In [30]:
# template_test = """Genres of {"Mahoutsukai Precure!".lower()}"""
template_test = """Tell me about {"魔法つかいプリキュア".lower()}"""
# template_test = """魔法つかいプリキュア！のジャンルはなんですか。"""

In [31]:
result = chain(template_test)

In [32]:
result

{'query': 'Tell me about {"魔法つかいプリキュア".lower()}',
 'result': '"魔法つかいプリキュア" is a Japanese anime that falls under the genres of action, slice of life, magic, fantasy, school, and shoujo. The story revolves around a 13-year-old girl named Ai who lives in the human world. One day, she witnesses a mysterious object fall from the sky into the park. She brings her stuffed bear Mofurun with her to see what it was and encounters a girl named Riko flying on a broom. Riko is the same age as Ai and comes from the magical world. Riko is searching for a powerful jewel called "link stone emerald." However, they are confronted by Batti, an ally of the dark magic users Dokurokushi, who demands the "link stone emerald." Batti uses dark magic to create a monster known as a Yokubaru. To combat the threat, Ai and Riko join hands and with the magic words "cure-up rapapa," they transform into the legendary Pretty Cure.'}

In [33]:
anime_info_db.similarity_search_with_score(query=template_test, k=10)

[(Document(page_content="Japanese_name: 魔法のスターマジカルエミ 雲光る\nGenres: comedy, shoujo, slice of life\nsypnopsis: kazuki mai, an elementary schoolgirl, wanted to be a magician but her skills weren't good enough for here to join her grandparents' magiccarat troupe. one day she sees a light enter a strange heart-shaped mirror. the light is actually a mirror fairy named topo, who takes over her favourite stuffed toy, a flying squirrel and gives her a bracelet with the 4 card suits. the bracelet can turn into a bubble wand which transforms mai into the 18 year old magician magical emi, who uses her magic to help people and participate in her grandparents' shows.", metadata={'source': '../data/anime_info_db.csv', 'row': 2754}),
  0.31583548),
 (Document(page_content='Japanese_name: ドキドキ! プリキュア\nGenres: action, magic, fantasy, shoujo\nsypnopsis: ddle schooler mana aida is known for her vivid passion and kind-hearted disposition, especially by her friends rikka hishikawa and alice yotsuba. while th