In [1]:
import sys
# This is a workaround for running the script from the parent directory
sys.path.append('../')

# For loading sub file
import pysubs2

# For printing
import pprint

from langchain_openai import ChatOpenAI

In [2]:
import json
json_file = open("../config.json")
json_data = json.load(json_file)
api_key = json_data['openai_api_key']

## Scene Structure

In [3]:
ass_pass = "../sample/sample2.ass"
subs = pysubs2.load(ass_pass)
lines = "\n".join(str(line.text) for line in subs)
lines

'{\\pos(1166,385)}今日も今日とて\n{\\pos(1166,415)}俺は文芸部のホームページの\\N写真を撮るために\n{\\pos(1166,385)}豊橋駅前に来ていた\n{\\pos(1166,415)}小鞠は街中図書館にいると\\N聞いてきたのだが…\n{\\pos(1166,385)}お いたぞ\n{\\pos(1166,385)}何か調べ物をしているようだ\n{\\pos(1166,385)}おい 小鞠ー\n{\\pos(1166,385)}いや 待てよ\n{\\pos(1166,760)}なっ\n{\\pos(1166,385)}うん いい写真が撮れた\n{\\pos(1166,760)}んん…なんだお前…いきなり…\n{\\pos(1166,385)}ああ 文芸部のホームページ用の写真だ\n{\\pos(1166,415)}自然な部員の姿と\\N街の光景を写真に収めたくてな\n{\\pos(1166,760)}んん…温水…\n{\\pos(1166,760)}そこに座れ\n{\\pos(1166,385)}え\n{\\pos(1166,385)}ああ\u3000どうした？\n{\\pos(1166,760)}なっ！\n{\\pos(1166,760)}何で隣に来る？\n{\\pos(1166,760)}か…勝手に鞄を動かすな\n{\\pos(1166,415)}このテーブル大きいから\\N向かい合わせだと話しづらいだろう？\n{\\pos(1166,415)}ここは私語禁止じゃないとは言え\\N一定の節度は必要だ\n{\\pos(1166,760)}そ…そうか\n{\\pos(1166,760)}盗撮男の言葉とは思えないな\n{\\pos(1166,385)}盗撮男？\n{\\pos(1166,385)}そんな奴がいるのか？\n{\\pos(1166,385)}警察呼ばないと\n{\\pos(1166,790)}ぶ…文芸部から\\N逮捕者が出るのか\n{\\pos(1166,385)}ええ\n{\\pos(1166,385)}盗撮男って…\n{\\pos(1166,385)}ひょっとして俺のこと？\n{\\pos(1166,760)}そ…そうだ\n{\\pos(1166,760)}死んで償い\n{\\pos(1166,385)}ハ

In [15]:
from src.logic.context import determine_scene_structure

llm = ChatOpenAI(model="gpt-4o", temperature=json_data['temperature'], api_key=api_key)

In [16]:
format_results = determine_scene_structure(llm, 'ja', 'en', lines)
pprint.pp(format_results)

('The text is structured as an interactive dialogue between two main speakers, '
 'with occasional references to a third person. The dialogue is organized in a '
 "back-and-forth manner, with each character responding to the other's lines, "
 'indicating an interactive exchange rather than isolated monologues. The '
 'speakers are actively engaging with each other, often debating or discussing '
 'topics, and there is a clear progression of conversation. The format does '
 'not include narration or timestamped logs, but rather consists of direct '
 'exchanges between the characters, suggesting a conversational scene rather '
 'than separate reflections or monologues.')


## Web Search

In [None]:
from langchain_community.tools.tavily_search.tool import TavilySearchResults
from src.logic.context import gather_context_from_web

In [7]:
import os
os.environ["TAVILY_API_KEY"] = json_data["tavily_api_key"]
search_tool = TavilySearchResults(k=3)

In [8]:
web_search_results = gather_context_from_web(llm, search_tool, output_lang="en",
                                             series_name="Too Many Losing Heroines!!!", keywords="Characters")

pprint.pp(web_search_results)

('- **Premise and Genre**: "Too Many Losing Heroines!!!" is an anime series '
 'produced by A-1 Pictures, which aired its first season from July to '
 'September 2024. The story revolves around Kazuhiko Nukumizu, a high school '
 'student who inadvertently becomes involved with several "losing heroines" '
 "after witnessing a classmate's romantic rejection.\n"
 '\n'
 '- **Setting**: The series is set in a high school environment, specifically '
 'Tsuwabuki High School, where the main characters navigate their teenage '
 'lives and romantic entanglements.\n'
 '\n'
 '- **Main Characters**: The protagonist, Kazuhiko Nukumizu, is a '
 'self-proclaimed "background character" who has no romantic experience. Key '
 'characters include Anna Yanami, who is rejected by her childhood friend, and '
 'other heroines like Chika Komari and Lemon Yakishio, who each have their own '
 'romantic struggles.\n'
 '\n'
 '- **Character Dynamics**: Anna Yanami is characterized by her unrequited '
 'love for he

## Character Identification

In [9]:
from src.logic.context import identify_characters

In [10]:
characters_output = identify_characters(llm, input_lang="ja", output_lang="en",
                                        transcript=lines, format_description=format_results, web_context=web_search_results)

pprint.pp(characters_output)

('- Name: 温水 — Traits: Uses first-person pronoun "俺," involved in taking '
 "photos for the literature club's website, casual tone, seems to "
 'misunderstand situations, has a playful interaction with 小鞠. [Narrative '
 'Focus]\n'
 '- Name: 小鞠 — Traits: Engages in research for writing a novel, uses '
 'first-person pronoun "私," reacts strongly to 温水\'s actions, shows a mix of '
 'annoyance and shyness, referenced as being at the library.\n'
 '- Name: 八奈見 — Traits: Mentioned in the context of being similar to 温水, '
 'appears to be a third character who is referenced but does not actively '
 'participate in the dialogue, associated with eating.')


## Summarize

In [11]:
from src.logic.context import summarize_scene

In [12]:
summary_output = summarize_scene(llm, input_lang="ja", output_lang="en",
                                 transcript=lines, format_description=format_results, 
                                 web_context=web_search_results, character_list=characters_output)

pprint.pp(summary_output)

('The scene takes place in a library, where Kazuhiko Nukumizu (referred to as '
 "温水) is taking photos for the literature club's website. He encounters Chika "
 'Komari (小鞠), who is at the library conducting research for a novel she is '
 "writing. Their interaction is marked by 温水's casual and somewhat oblivious "
 "behavior, as he takes photos without 小鞠's consent, leading to a playful yet "
 'tense exchange. 小鞠 expresses annoyance and shyness, particularly about being '
 'photographed. The conversation touches on topics such as the use of the '
 "library, research methods, and 温水's misunderstanding of 小鞠's reactions. 八奈見 "
 'is mentioned as a third character who shares similarities with 温水 but does '
 'not actively participate in the dialogue. The scene focuses on the dynamics '
 'between 温水 and 小鞠, highlighting their contrasting personalities.')


## Tone

In [13]:
from src.logic.context import determine_tone

In [14]:
tone_output = determine_tone(llm, input_lang='ja', output_lang='en',
                             transcript=lines, format_description=format_results,
                             web_context=web_search_results)

pprint.pp(tone_output)

('The overall tone of the scene is casual and comedic, with a playful and '
 'teasing dynamic between the characters. The speech is informal, '
 'characterized by blunt exchanges, light-hearted banter, and occasional '
 'sarcasm, which suggests a close and familiar relationship between the '
 'speakers.')
