In [1]:
import openai
import re
import json

import pandas as pd

from langchain import PromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
import numpy as np

import pickle
import datetime
from river import stream, metrics

In [11]:
def get_prompt(template, input_text):
    prompt = PromptTemplate(template=template, input_variables=["input"])
    prompt_text = prompt.format(input=input_text)

    return prompt_text


def get_anime_name(anime_name):
    anime_list = anime_name.split("\n")
    anime_list = [anime for anime in anime_list if anime]

    return anime_list


def get_similarity_item(db, input):
    input = input.lower()
    sim = db.similarity_search_with_score(query=input, k=1)
    anime_name = sim[0][0].page_content.split("\n")[0]
    match = re.search(r"Name: (.*)", anime_name)

    if match:
        anime_name = match.group(1)
    else:
        anime_name = np.nan

    sim_value = sim[0][1]
    if sim_value >= 0.3:
        return None
    else:
        return anime_name

### Load DB


In [3]:
embeddings = OpenAIEmbeddings()
anime_name_db = FAISS.load_local("../data/anime_name_db", embeddings)

### Prompt


In [4]:
# AIが使うことができる関数を羅列する
functions = [
    {
        "name": "get_anime_name_list",
        "description": "入力文章から、アニメ名のリストを作成する。",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {
                    "type": "array",
                    "description": "アニメ名",
                    "items": {"type": "string"},
                }
            },
            "required": ["name"],
        },
    }
]

In [5]:
user_input = """おすすめのアニメを教えてください。私が好きなアニメは以下の通りです。

- Angel Beats!
- Mahoutsukai Precure!
- Ookami to Koushinryou
- Shinsekai yori
- Shugo Chara!
- Uma Musume: Pretty Derby (TV)
- Fate/Zero
- Uchuu Senkan Yamato 2199
"""

template_user_input = """以下の文章は、ユーザが好きなアニメの名前を列挙しています。
アニメの名前を抽出し、カンマ区切りで出力しなさい。

----
{input}
"""

prompt = get_prompt(template_user_input, user_input)

In [6]:
prompt

'以下の文章は、ユーザが好きなアニメの名前を列挙しています。\nアニメの名前を抽出し、カンマ区切りで出力しなさい。\n\n----\nおすすめのアニメを教えてください。私が好きなアニメは以下の通りです。\n\n- Angel Beats!\n- Mahoutsukai Precure!\n- Ookami to Koushinryou\n- Shinsekai yori\n- Shugo Chara!\n- Uma Musume: Pretty Derby (TV)\n- Fate/Zero\n- Uchuu Senkan Yamato 2199\n\n'

### Get candidate anime name using function calling


In [7]:
response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {"role": "user", "content": prompt},
    ],
    functions=functions,
    function_call="auto",
    temperature=0,
)
message = response["choices"][0]["message"]
candidate_list = json.loads(message["function_call"]["arguments"])["name"]

In [8]:
candidate_list

['Angel Beats!',
 'Mahoutsukai Precure!',
 'Ookami to Koushinryou',
 'Shinsekai yori',
 'Shugo Chara!',
 'Uma Musume: Pretty Derby (TV)',
 'Fate/Zero',
 'Uchuu Senkan Yamato 2199']

### Online learning


In [12]:
rated_anime = []
for item in candidate_list:
    rated_anime.append(get_similarity_item(anime_name_db, item))

In [13]:
rated_anime

['angel beats!',
 'mahoutsukai precure!',
 'ookami to koushinryou',
 'shinsekai yori',
 'shugo chara!',
 'uma musume: pretty derby (tv)',
 'fate/zero',
 'uchuu senkan yamato 2199']

In [14]:
rated_anime_df = pd.DataFrame(rated_anime)
rated_anime_df.rename(columns={0: "Name_lower"}, inplace=True)

In [15]:
anime_list = pd.read_csv("../data/anime_list.csv")
anime_list["Name_lower"] = anime_list["Name"].str.lower()

In [16]:
rated_anime_df = pd.merge(
    rated_anime_df,
    anime_list[["anime_id", "Name_lower"]],
    on="Name_lower",
    how="left",
)
rated_anime_ids = rated_anime_df["anime_id"]

In [17]:
def create_user_uid():
    now = datetime.datetime.now()
    user_id = now.strftime("%Y%m%d%H%M%S")
    user_id = int(user_id)

    return user_id

In [18]:
user_id = create_user_uid()

In [19]:
rated_anime_df["user_id"] = user_id
rated_anime_df["rating"] = 10

In [20]:
rated_anime_df

Unnamed: 0,Name_lower,anime_id,user_id,rating
0,angel beats!,6547,20230728141734,10
1,mahoutsukai precure!,31884,20230728141734,10
2,ookami to koushinryou,2966,20230728141734,10
3,shinsekai yori,13125,20230728141734,10
4,shugo chara!,2923,20230728141734,10
5,uma musume: pretty derby (tv),35249,20230728141734,10
6,fate/zero,10087,20230728141734,10
7,uchuu senkan yamato 2199,12029,20230728141734,10


In [21]:
X = rated_anime_df[["user_id", "anime_id"]]
y = rated_anime_df[["rating"]]
dataset = stream.iter_pandas(X, y)

In [22]:
with open("../data/model.pkl", "rb") as f:
    model = pickle.load(f)

In [23]:
# Update model
metric = metrics.MAE() + metrics.RMSE()
for x, y in dataset:
    y_pred = model.predict_one(user=x["user_id"], item=x["anime_id"])
    metric = metric.update(y["rating"], y_pred)
    model = model.learn_one(user=x["user_id"], item=x["anime_id"], y=y["rating"])

In [24]:
metric

MAE: 9.368767, RMSE: 9.387725

In [25]:
def predict(user_id, anime_list, rated_anime_ids):
    # predict recommend item
    result_df = model.rank(user=user_id, items=anime_list["anime_id"])
    result_df = pd.DataFrame(result_df, columns=["anime_id"])
    result_df = pd.merge(result_df, anime_list, on=["anime_id"], how="inner")

    # # remove rated item
    result_df = result_df[~result_df["anime_id"].isin(rated_anime_ids)]
    result_df = result_df.reset_index(drop=True)[0:10]

    return result_df

In [26]:
recommend_result = predict(user_id, anime_list, rated_anime_ids)

In [27]:
recommend_result

Unnamed: 0,anime_id,Name,English_name,Japanese_name,Genres,Aired,Score,Ranked,sypnopsis,Name_lower
0,39587,Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season,Re:ZERO -Starting Life in Another World- Season 2,Re：ゼロから始める異世界生活,"Psychological, Drama, Thriller, Fantasy","Jul 8, 2020 to Sep 30, 2020",8.5,104.0,union that was supposed to spell the arrival o...,re:zero kara hajimeru isekai seikatsu 2nd season
1,263,Hajime no Ippo,Fighting Spirit,はじめの一歩 THE FIGHTING!,"Comedy, Sports, Drama, Shounen","Oct 4, 2000 to Mar 27, 2002",8.75,32.0,akunouchi Ippo has been bullied his entire lif...,hajime no ippo
2,30484,Steins;Gate 0,Steins;Gate 0,シュタインズ・ゲート ゼロ,"Sci-Fi, Thriller","Apr 12, 2018 to Sep 27, 2018",8.51,103.0,"The eccentric, self-proclaimed mad scientist R...",steins;gate 0
3,35760,Shingeki no Kyojin Season 3,Attack on Titan Season 3,進撃の巨人 Season3,"Action, Military, Mystery, Super Power, Drama,...","Jul 23, 2018 to Oct 15, 2018",8.59,76.0,"Still threatened by the ""Titans"" that rob them...",shingeki no kyojin season 3
4,2904,Code Geass: Hangyaku no Lelouch R2,Code Geass:Lelouch of the Rebellion R2,コードギアス 反逆のルルーシュ 続編,"Action, Military, Sci-Fi, Super Power, Drama, ...","Apr 6, 2008 to Sep 28, 2008",8.91,18.0,"One year has passed since the Black Rebellion,...",code geass: hangyaku no lelouch r2
5,28171,Shokugeki no Souma,Food Wars! Shokugeki no Soma,食戟のソーマ,"Ecchi, School, Shounen","Apr 4, 2015 to Sep 26, 2015",8.29,230.0,"Ever since he was a child, fifteen-year-old So...",shokugeki no souma
6,35788,Shokugeki no Souma: San no Sara,Food Wars! The Third Plate,食戟のソーマ 餐ノ皿,"Ecchi, School, Shounen","Oct 4, 2017 to Dec 20, 2017",8.14,370.0,The Moon Festival is Tootsuki Academy's annual...,shokugeki no souma: san no sara
7,37450,Seishun Buta Yarou wa Bunny Girl Senpai no Yum...,Rascal Does Not Dream of Bunny Girl Senpai,青春ブタ野郎はバニーガール先輩の夢を見ない,"Comedy, Supernatural, Drama, Romance, School","Oct 4, 2018 to Dec 27, 2018",8.38,166.0,The rare and inexplicable Puberty Syndrome is ...,seishun buta yarou wa bunny girl senpai no yum...
8,9863,SKET Dance,SKET Dance,スケットダンス,"Comedy, School, Shounen","Apr 7, 2011 to Sep 27, 2012",8.25,262.0,Kaimei High School there is a special club ded...,sket dance
9,5040,One Outs,One Outs,ワンナウツ,"Sports, Psychological, Seinen","Oct 8, 2008 to Apr 1, 2009",8.35,180.0,"Toua Tokuchi is an athlete by profession, but ...",one outs


In [29]:
anime_info_db = FAISS.load_local("../data/anime_info_db", embeddings)

In [30]:
llm = ChatOpenAI(model="gpt-4", temperature=0)
chain = RetrievalQA.from_chain_type(
    llm,
    retriever=anime_info_db.as_retriever(),
)

In [31]:
def get_prompt(template, input_text):
    prompt = PromptTemplate(template=template, input_variables=["input"])
    prompt_text = prompt.format(input=input_text)

    return prompt_text

In [32]:
template_recommend = """You are anime recommender system.
Follow the Output rules and introduce the anime in the Recommend Lists.

# Recommend Lists
- {input}

# Output rules
- Show anime name, genere and descriptions.
"""

In [35]:
result_list = []
for l in recommend_result["Name"][0:3]:
    input = l.lower()
    prompt = get_prompt(template_recommend, input)
    result = chain(prompt)
    result_list.append(result["result"])

In [36]:
result_list

['Anime Name: Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season\nGenres: Psychological, Drama, Thriller, Fantasy\nDescription: This is the second season of Re:Zero kara Hajimeru Isekai Seikatsu. The union that was supposed to',
 "Anime Name: Hajime no Ippo\nGenres: Comedy, Sports, Drama, Shounen\nDescription: The story revolves around Makunouchi Ippo, who has been bullied his entire life. One day, he is saved by a boxer, Takamura Mamoru, and brought to a boxing gym to recover from his injuries. As he regains consciousness, he is amazed at his new surroundings in the gym. Takamura places a photo of Ippo's classmate on a punching bag and forces him to punch it. It is only then that Ippo feels something stir inside him and eventually asks Takamura to train him in boxing. With a sudden desire to get stronger, for himself and his hard working mother, Ippo trains relentlessly to accomplish a task given by Takamura within a time limit. Thus begins Ippo's journey to the top of the boxing world.

In [37]:
template_translate = """あなたはアニメをおすすめするレコメンドシステムです。
以下の英語の文章は、アニメの紹介をしています。日本語に翻訳、意訳してください。

-- 
- {input}
"""

In [38]:
result_translate = []
for l in result_list:
    prompt = get_prompt(template_translate, l)
    result = chain(prompt)
    result_translate.append(result["result"])

In [39]:
result_translate

['- アニメ名：Re:Zero kara Hajimeru Isekai Seikatsu 第2シーズン\nジャンル：心理、ドラマ、スリラー、ファンタジー\n説明：これは「Re:Zero kara Hajimeru Isekai Seikatsu」の第2シーズンです。平和な時代の到来を予定していた連合は、スバル・ナツキとエミリアがイーラム村に戻ったときにすぐに破壊されます。罪の大司教として知られる災害によって残された荒廃を目の当たりにしたスバルは、やり直す能力が無駄であることに絶望します。グループが答えを求めて聖域へと進む中、スバルは貪欲の魔女エキドナと予期せぬ出会いをします。彼女の制御不能なリズムに従わざるを得なくなった彼は、過去と未来の渦に飛び込むことを強いられます。同時に、いくつかの謎の脅威が聖域を狙い、閉じ込められた無力な人々に恐ろしい運命を予告します。永遠の契約、過去の罪、報われない愛が衝突し、血の川に沈む「Re:Zero kara Hajimeru Isekai Seikatsu」の第2シーズン。絶望の淵に追い詰められたスバルの、愛する人々を救う決意はどれほど続くのでしょうか？',
 '- アニメ名：はじめの一歩\nジャンル：コメディ、スポーツ、ドラマ、少年\nあらすじ：物語は、生涯いじめられ続けてきた幕之内一歩を中心に展開します。ある日、彼はボクサーの鷹村守に助けられ、怪我から回復するためにボクシングジムに連れて行かれます。意識を取り戻すと、ジムの新しい環境に驚きます。鷹村は一歩のクラスメートの写真をパンチングバッグに貼り、一歩にパンチを打つよう強制します。それがきっかけで一歩の中に何かが芽生え、最終的には鷹村にボクシングのトレーニングを頼むようになります。自分自身と働き者の母親のために強くなるという突然の願望から、一歩は鷹村から与えられた時間制限内に課題を達成するために必死にトレーニングします。こうして一歩のボクシング界での頂点への旅が始まります。',
 '- アニメ名：Steins;Gate 0\nジャンル：SF、スリラー\n説明：自称マッドサイエンティストの奇人、岡部倫太郎はかつての自分の影になってしまいました。友人の牧瀬紅莉栖を救出することに失敗した後、彼は自身のマッドサイエンティストという別の人格を捨て、普通の大学生として生活することを決意