In [None]:
!pip install langchain_openai
!pip install langchain_community
!pip install openai
!pip install tiktoken
!pip install python-dotenv
!pip install faiss-cpu
!pip install tiktoken

import os
import json
import re
from collections import OrderedDict, defaultdict
import openai
from openai import OpenAI

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage

openai.api_key = 'set your own key'


Collecting langchain_openai
  Downloading langchain_openai-0.3.12-py3-none-any.whl.metadata (2.3 kB)
Collecting tiktoken<1,>=0.7 (from langchain_openai)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading langchain_openai-0.3.12-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 kB[0m [31m940.3 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken, langchain_openai
Successfully installed langchain_openai-0.3.12 tiktoken-0.9.0
Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain_community)
  Downloading langchain_core-0.3.51-py3-n

依次读取多个剧集的剧情内容，通过GPT分析每集中对剧情有重要影响的物品，并将所有关键物品及其描述保存到一个JSON文件中。

In [None]:
### Extract key items ######
client = OpenAI(api_key=openai.api_key)

def process_episode(episode_number, episode_data, previous_items):
    # Use the content from initialRecords
    current_scene_text = '\n'.join(episode_data.get('initialRecords', []))

    # Convert previous important items into text
    previous_items_text = "\n".join([f"- {item}: {desc}" for item, desc in previous_items.items()])

    prompt = f"""
As a careful reader, identify and track items that have a significant impact on the plot or character development in this episode.

For each important item, provide:
- **Item Name**
- **Description** (brief description of the item and its significance)

Only include items that truly impact the story. Avoid listing insignificant everyday items.

**Previous Important Items:**
{previous_items_text}

**Current Episode ({episode_number}):**
{current_scene_text}

Please provide your analysis of important items in the following format:

**Important Items:**

Item Name:
Description: [Brief description of the item and its significance]

[More items if applicable]

Ensure that your reasoning clearly supports the importance of each item to the story.
"""

    print(f"\n--- Prompt sent to GPT (Episode {episode_number}) ---\n")
    print(prompt)
    print("\n--- End of GPT Prompt ---\n")

    try:
        response = client.chat.completions.create(
            model="gpt-4o-2024-08-06",
            messages=[
                {"role": "system", "content": "You are a professional story analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )

        if response.choices and response.choices[0].message:
            content = response.choices[0].message.content.strip()

            print(f"\n--- GPT generated important item analysis (Episode {episode_number}) ---\n")
            print(content)
            print("\n--- End of GPT generated important item analysis ---\n")

            # Parse important items
            important_items = {}
            items = content.split("Item Name:")
            for item in items[1:]:  # Skip the first blank part
                lines = item.strip().split('\n', 1)
                if len(lines) == 2:
                    item_name = lines[0].strip()
                    description = lines[1].replace("Description:", "").strip()
                    important_items[item_name] = description

            return important_items

        else:
            print(f"Error: No valid response for episode {episode_number}")
            return {}

    except Exception as e:
        print(f"Error processing episode {episode_number}: {str(e)}")
        return {}

def key_items():
    origin_story = '/content/storylines.json'
    importance_path = '/content/storylines_importance.json'
    try:
        with open(origin_story, 'r', encoding='utf-8') as file:
            story_data = json.load(file)
        print("Loaded data from file")
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    all_important_items = {}

    for storyline, episodes in story_data.items():
        for episode_number, episode_data in episodes.items():
            print(f"Processing {storyline} - {episode_number}")

            episode_items = process_episode(episode_number, episode_data, all_important_items)

            all_important_items.update(episode_items)

    # Write the overall analysis results to a file
    try:
        with open(importance_path, 'w', encoding='utf-8') as f:
            json.dump({"important_items": all_important_items}, f, ensure_ascii=False, indent=2)
        print("Analysis complete. Results saved to storylines_importance.json")
        print(f"Total important items found: {len(all_important_items)}")
    except Exception as e:
        print(f"Error writing results to file: {str(e)}")


In [None]:
#### Summarize storylines #####
def load_data(file_path):
    """Load JSON data file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        print(f"Error loading data from {file_path}: {str(e)}")
        return None

def save_data(data, file_path):
    """Save JSON data to file."""
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=2)
        print(f"Data successfully saved to {file_path}")
    except Exception as e:
        print(f"Error saving data to {file_path}: {str(e)}")

def extract_json_from_text(text):
    """Extract JSON string from text and parse it as a dictionary."""
    try:
        json_str = text.strip()
        # Find the position of the first and last curly braces
        start = json_str.find('{')
        end = json_str.rfind('}') + 1
        json_str = json_str[start:end]
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON Decode Error: {str(e)}")
        return None

def analyze_episode(storyline_name, episode_name, episode_data, important_items):
    """Call OpenAI API to analyze a single episode."""
    prompt = f"""
    You are a professional story analyst. Please analyze the following episode data and provide detailed analysis for each character.

    **Please note: All output should be in English, including character names and field names.**

    **Please strictly follow these requirements:**

    - **Output only JSON format data, do not add any additional text, explanations, or comments.**
    - **Ensure the output JSON format is correct and can be parsed by a JSON parser.**
    - **Only include items from the provided `important_items` list. Do not add or identify other items.**

    **For each episode, please output according to the following structure:**

    {{
      "{episode_name}": {{
        "whatIf": "{episode_data.get('whatIf', '')}",
        "characters": {{
          "Character Name": {{
            "Interactions_with_Key_Items": {{
              "Item Name": "Description of interaction with the item [Status]"
            }},
            "Actions": "Overall description of character's actions",
            "Relationships": {{
              "Relationship with other character": "Description of relationship"
            }},
            "Emotions": {{
              "Emotion name": "Description of emotion"
            }}
          }},
          ...
        }}
      }}
    }}

    **Here is the episode data:**

    {json.dumps(episode_data['initialRecords'], ensure_ascii=False, indent=2)}

    **Here are the important items:**

    {json.dumps(important_items, ensure_ascii=False, indent=2)}

    **How to generate the Status field:**

    - Based on the events in `initialRecords`, determine the current status of the item. For example:
      - If an item is lost, `[Lost on cliff]`.
      - If an item is hidden, `[Hidden by principal]`.
      - For other necessary statuses, please judge based on the plot and note in the description.

    **Please ensure:**

    - **Output only JSON format data, do not add any additional text.**
    - **All text should be in English.**
    - **JSON format is strictly correct and can be parsed.**
    - **Only include interactions with items from the `important_items` list.**
    - **In `Interactions_with_Key_Items`, the item's status should be included in the description, such as `[Status]`.**
    - **If there are no interactions with important items, keep the field empty.**
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a professional story analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )

        # Get response content
        content = response.choices[0].message.content.strip()
        print(f"Response Content for {episode_name}:")
        print(content)

        # Extract JSON from response
        analysis = extract_json_from_text(content)
        if analysis:
            return analysis
        else:
            print(f"Failed to parse JSON for {episode_name}.")
            return None

    except Exception as e:
        print(f"Error analyzing {episode_name}: {str(e)}")
        return None

def summary():
    # Read data files
    story_data = load_data('/content/storylines.json')
    important_items = load_data('/content/storylines_importance.json')

    if not story_data or not important_items:
        print("Failed to load necessary data. Exiting.")
        return


    index = 1
    for storyline, episodes in story_data.items():
        analysis_results = {}
        for episode_name, episode_data in episodes.items():
            print(f"Analyzing {episode_name}...")
            analysis = analyze_episode(storyline, episode_name, episode_data, important_items['important_items'])
            if analysis:
                analysis_results.update(analysis)
            else:
                print(f"Failed to generate analysis for {episode_name}.")

        # Save the analysis results of all episodes to a file
        save_data(analysis_results, f'/content/storylines{index}_summary.json')
        index += 1

    print("Analysis of all episodes has been completed and saved.")
    print("Analysis summary:")
    print(json.dumps(analysis_results, ensure_ascii=False, indent=2))


In [None]:
def get_episode_number(episode_key):
    # Extract the episode number from the episode key, e.g., "Episode 1" becomes 1.
    match = re.search(r'Episode\s*(\d+)', episode_key, re.IGNORECASE)
    if match:
        return int(match.group(1))
    else:
        return None

def load_summaries(summaries_path):
    # Load summary data and create a list of Document objects.
    with open(summaries_path, 'r', encoding='utf-8') as f:
        summaries = json.load(f, object_pairs_hook=OrderedDict)

    documents = []
    for episode_key, content in summaries.items():
        page_content = content.get('whatIf', '')
        doc = Document(page_content=page_content, metadata={"episode_key": episode_key})
        documents.append(doc)

    return documents

def load_full_contents(full_contents_path, storyline_key='Storyline 3'):
    """
    Load full content data and create a list of Document objects and a mapping.
    Includes information on key_item_status.
    """
    with open(full_contents_path, 'r', encoding='utf-8') as f:
        full_contents = json.load(f, object_pairs_hook=OrderedDict)

    episodes_data = full_contents.get(storyline_key, {})

    documents = []
    episode_key_to_doc = {}

    for episode_key, content in episodes_data.items():
        initial_records = "\n".join(content.get('initialRecords', []))
        key_item_status = content.get('key_item_status', '')
        doc_content = f"{initial_records}\n\n**Key Item Status:**\n{key_item_status}"
        doc = Document(page_content=doc_content, metadata={"episode_key": episode_key})
        documents.append(doc)
        episode_key_to_doc[episode_key] = {
            "full_content": initial_records,
            "key_item_status": key_item_status
        }

    return documents, episode_key_to_doc


In [None]:
def create_vectorstore(documents):
    """
    Create a vector store (FAISS) for similarity retrieval.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
    docs = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

def categorize_feedback(feedback_list):
    """
    Categorize feedback based on type.
    """
    categorized_feedback = defaultdict(list)
    for feedback in feedback_list:
        category = feedback.get("category", "Uncategorized")
        comment = feedback.get("comment", "")
        categorized_feedback[category].append(comment)
    return categorized_feedback

def apply_feedback_with_coherence(categorized_feedback):
    """
    Generate improvement guidelines based on categorized feedback.
    """
    guidelines = {}

    # Story coherence improvements
    if "Story Coherence" in categorized_feedback:
        guidelines["coherence"] = "Ensure smooth transitions between events and clarify character motivations. Add additional context between story scenes to prevent abrupt jumps."

    # Emotion improvements
    if "Needs More Emotion" in categorized_feedback:
        guidelines["emotion"] = "Add more internal dialogue and detailed descriptions of character emotions to build emotional depth."

    # Narration improvements
    if "Better Narration" in categorized_feedback:
        guidelines["narration"] = "Improve scene pacing with more narrations between dialogues to build tension and provide a better flow between story events."

    # Character interaction improvements
    if "Character Interaction" in categorized_feedback:
        guidelines["interaction"] = "Increase the frequency and depth of interactions between main characters to strengthen their relationship and provide context for their motivations."

    # Tone improvements
    if "Tone Issues" in categorized_feedback:
        guidelines["tone"] = "Ensure consistent tone across scenes; avoid dialogues that feel out of place in serious situations."

    return guidelines

def generate_feedback_and_guidelines(evaluation_reasoning):
    """
    Generate improvement guidelines based on feedback.
    Dynamically generate feedback from evaluation reasoning.
    """
    feedback_prompt = f"""
Based on the following evaluation reasons, categories and specific suggestions for improvement are extracted.

**evalution)reasoning:**
{evaluation_reasoning}

**Please return it in the form of a dictionary, with the key being the category and the value being the specific suggestion.**
"""

    llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai.api_key)
    response_message = llm.invoke([HumanMessage(content=feedback_prompt)])
    response_text = response_message.content

    try:
        improvement_guidelines = json.loads(response_text)
    except json.JSONDecodeError:
        print("Error: Failed to parse improvement guidelines from model response.")
        improvement_guidelines = {}

    return improvement_guidelines

def parse_evaluation_response(response_text):
    # Extract score and evaluation reasoning.
    score_match = re.search(r"Score\s*[:：-]?\s*(\d)", response_text, re.IGNORECASE)
    if score_match:
        score = int(score_match.group(1))
    else:
        score = None

    evaluation_reasoning = response_text.strip()

    return {
        "score": score,
        "evaluation_reasoning": evaluation_reasoning
    }


In [None]:
def evaluate_episode(episode_number, episode_key, summaries_vectorstore, episode_key_to_doc, show_retrieved_summaries=False):
    """
    Evaluate the specified episode using RAG to retrieve relevant summaries and generate an evaluation.
    """
    llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai.api_key)

    # Get current episode content and key_item_status
    current_episode_data = episode_key_to_doc.get(episode_key)
    if current_episode_data:
        current_episode_text = current_episode_data["full_content"]
        key_item_status = current_episode_data["key_item_status"]
    else:
        print(f"Warning: {episode_key} not found in episodes data.")
        current_episode_text = ''
        key_item_status = ''

    if not current_episode_text.strip():
        print(f"Error: Current episode content for {episode_key} is empty.")
        return {
            "score": None,
            "evaluation_reasoning": f"Error: Current episode content for {episode_key} is empty."
        }

    # Use current episode content as query to retrieve most relevant summaries from the vector store
    query = current_episode_text
    retrieved_summaries_docs = summaries_vectorstore.similarity_search(query, k=15)  # Retrieve 10 most relevant summaries

    # Filter retrieved summaries to only keep those with episode number less than the current one
    filtered_summaries_docs = []
    for doc in retrieved_summaries_docs:
        ep_key = doc.metadata.get('episode_key', '')
        ep_number = get_episode_number(ep_key)
        if ep_number is not None and ep_number < episode_number:
            filtered_summaries_docs.append(doc)
        if len(filtered_summaries_docs) >= 15:
            break  # Keep at most 15 relevant summaries

    # Extract retrieved summaries
    previous_summaries = ''
    print("\nRetrieved Relevant Summaries:")
    for idx, doc in enumerate(filtered_summaries_docs, 1):
        ep_key = doc.metadata.get('episode_key', '')
        summary_text = doc.page_content
        previous_summaries += f"{ep_key}:\n{summary_text}\n\n"
        if show_retrieved_summaries:
            print(f"Summary {idx}: {ep_key}")
            print(summary_text)
            print("-" * 50)

    if not previous_summaries.strip():
        previous_summaries = "No relevant previous episodes."
        
    # Construct prompt for evaluation, including key_item_status
    prompt = f"""
You are a meticulous literary critic specializing in narrative coherence.

As you read, pay special attention to the continuity and consistency of key items and their statuses.

**Relevant Previous Episodes Summaries:**
{previous_summaries}

**Current Episode ({episode_number}) Full Content:**
{current_episode_text}

**Key Item Status:**
{key_item_status}

Please provide a critical evaluation of the current episode, focusing on:

1. **Character Consistency** - Evaluate whether the actions and dialogues of main characters in this scene align with their established traits. Note any inconsistencies and assess if they are justified by new developments. Give a score based on the Score (0-5) and Justification.

2. **Plot Progression** - Analyze how this scene contributes to the overall story. Assess whether newly introduced elements logically extend the plot and effectively advance or resolve narrative threads. Give a score based on the Score (0-5) and Justification.

3. **Emotional and Psychological Realism** - Review the authenticity of the main characters' emotional and psychological responses. Evaluate whether these reactions are believable and consistent with their character development and the situation. Give a score based on the Score (0-5) and Justification.

4. **Continuity and Consistency in Story Elements** - Examine the episode for any inconsistencies or continuity errors, such as objects appearing or disappearing without explanation, conflicting information, or events that contradict prior established facts. Pay particular attention to items that were lost or destroyed in previous episodes but reappear without explanation. Assess how these issues impact the narrative coherence. Give a score based on the Score (0-5) and Justification.

**Also, please check the "Key Item Status" section specifically for any consistency errors as with this episode (key_item_status should be correct). Make sure the status of each key item is consistent with the status of the current episode. Report any discrepancies or errors. Give a score based on the Score (0-5) and Justification. **

Provide a balanced and critical evaluation, pointing out both strengths and weaknesses. Ensure that your reasoning clearly supports the score you assign.

**Score (0-5) and Justification:**

Scoring Guidelines:
- **5 (Excellent):** The episode is highly coherent, with consistent characters, strong plot development, realistic emotions, and no major errors.

- **4 (Good):** The episode is mostly coherent, but there are small issues with characters, plot, or minor inconsistencies.

- **3 (Fair):** The episode has some noticeable inconsistencies that affect the flow.

- **2 (Poor):** The episode has major issues with consistency and continuity.

- **1 (Very Poor):** The episode is incoherent, with severe flaws and continuity problems.

- **0 (Unacceptable):** The episode is completely incoherent, with critical errors that make it nonsensical, and Error in the status of key items.
"""

    # Call GPT API to perform evaluation
    response_message = llm.invoke([HumanMessage(content=prompt)])
    response_text = response_message.content

    # Parse response
    evaluation_result = parse_evaluation_response(response_text)

    # Generate improvement suggestions
    improvement_guidelines = generate_feedback_and_guidelines(evaluation_result['evaluation_reasoning'])
    evaluation_result['improvement_guidelines'] = improvement_guidelines

    # Return evaluation result
    return evaluation_result

In [None]:
client = OpenAI(api_key=openai.api_key)

summary_cache = {}
key_items_cache = {}

def update_summary_and_key_items(previous_summary, previous_key_items, current_scene, episode_number, continuity_analysis):

    # Use content from initialRecords
    current_scene_text = '\n'.join(current_scene['initialRecords'])

    # Convert previous key item statuses to text
    key_items_text = ""
    for item_name, item_info in previous_key_items.items():
        key_items_text += f"- {item_name}: [Status: {item_info['status']}, Last Known Location/Owner: {item_info['location']}, Current Importance: {item_info['importance']}]\n"

    # Output the key_items_text passed to GPT
    print("\n--- key_items_text passed to GPT ---\n")
    print(key_items_text)
    print("\n--- End of key_items_text ---\n")

    prompt = f""" ... """  # [unchanged prompt omitted for brevity]

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini-2024-07-18",
            messages=[
                {"role": "system", "content": "You are a professional story analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )

        if response and response.choices and response.choices[0].message:
            content = response.choices[0].message.content.strip()

            # Print updated summary and key items generated by GPT
            print("\n--- Updated summary and key items generated by GPT ---\n")
            print(content)
            print("\n--- End of GPT generated content ---\n")

            # Split updated summary and key items
            if "**Updated Key Items and Their Statuses:**" in content:
                summary_part, key_items_part = content.split("**Updated Key Items and Their Statuses:**", 1)
                updated_summary = summary_part.replace("**Updated Summary:**", "").strip()
                key_items_text = key_items_part.strip()

                # Parse key items and their statuses
                updated_key_items = {}
                items = key_items_text.split("\n\n")
                for item in items:
                    lines = item.strip().split('\n')
                    if len(lines) >= 4:
                        item_name_line = lines[0]
                        status_line = lines[1]
                        location_line = lines[2]
                        importance_line = lines[3]

                        item_name = item_name_line.replace("Item Name:", "").strip()
                        status = status_line.replace("- Current Status:", "").strip()
                        location = location_line.replace("- Last Known Location/Owner:", "").strip()
                        importance = importance_line.replace("- Current Importance:", "").strip()

                        updated_key_items[item_name] = {
                            'status': status,
                            'location': location,
                            'importance': importance
                        }
                return updated_summary, updated_key_items
            else:
                # If there's no clear split, return the whole content as summary, keep key items unchanged
                updated_summary = content
                updated_key_items = previous_key_items
                return updated_summary, updated_key_items
        else:
            return previous_summary, previous_key_items
    except Exception as e:
        print(f"Error updating summary and key items: {str(e)}")
        return previous_summary, previous_key_items

def evaluate_scene(previous_summary, previous_key_items, current_scene, next_scene, episode_number):

    # Use content from initialRecords
    current_scene_text = '\n'.join(current_scene['initialRecords'])
    next_scene_text = '\n'.join(next_scene['initialRecords']) if next_scene else "without next scene"

    # Convert previous key item statuses to text
    key_items_text = ""
    for item_name, item_info in previous_key_items.items():
        key_items_text += f"- {item_name}: [Status: {item_info['status']}, Last Known Location/Owner: {item_info['location']}, Current Importance: {item_info['importance']}]\n"

    prompt = f""" ... """  # [unchanged prompt omitted for brevity]

    # Print evaluation prompt sent to GPT
    print("\n--- Evaluation prompt sent to GPT ---\n")
    print(prompt)
    print("\n--- End of evaluation prompt ---\n")

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini-2024-07-18",
            messages=[
                {"role": "system", "content": "You are a meticulous literary critic specializing in narrative coherence."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3
        )

        if response and response.choices and response.choices[0].message:
            content = response.choices[0].message.content.strip()

            # Print GPT's evaluation response
            print("\n--- GPT evaluation response ---\n")
            print(content)
            print("\n--- End of GPT evaluation response ---\n")

            # Extract score
            match = re.search(r"Score\s*[:：]\s*(\d+)", content, re.IGNORECASE)
            score = int(match.group(1)) if match else None

            # Extract analysis of point 5
            continuity_analysis = ""
            pattern = r"5\.\s*\*\*Continuity and Consistency in Story Elements\*\*([\s\S]*?)(?=\n\d|$)"
            match = re.search(pattern, content)
            if match:
                continuity_analysis = match.group(1).strip()

            reasoning = content
            return score, reasoning, continuity_analysis
        else:
            return None, None, None
    except Exception as e:
        print(f"Error evaluating episode: {str(e)}")
        return None, None, None

def main():
    try:
        with open('/content/Storyline_26.json', 'r', encoding='utf-8') as file:
            data = json.load(file)
        print(f"Loaded data from file")
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    scenes = []
    episode_keys_list = []

    for storyline_key, storyline_value in data.items():
        for episode_key, episode_value in storyline_value.items():
            match = re.match(r'^Episode\s+(\d+)$', episode_key)
            if not match:
                continue
            episode_number = int(match.group(1))
            scene = {
                'storyline_key': storyline_key,
                'episode_key': episode_key,
                'episode_number': episode_number,
                'initialRecords': episode_value.get('initialRecords', [])
            }
            scenes.append(scene)
            episode_keys_list.append((storyline_key, episode_key))

    # Sort by episode number
    scenes.sort(key=lambda x: x['episode_number'])

    print(f"Processed {len(scenes)} episodes from data")

    # Initialize summary and key item states
    previous_summary = ""
    previous_key_items = {}

    # Iterate through each episode, updating summary and key item status step-by-step
    for i, scene in enumerate(scenes):
        storyline_key = scene['storyline_key']
        episode_key = scene['episode_key']
        episode_number = scene['episode_number']
        print(f"Processing {episode_key}")

        next_scene = scenes[i + 1] if i + 1 < len(scenes) else None

        # First, evaluate the continuity of the current scene
        continuity_analysis = evaluate_scene(previous_summary, previous_key_items, scene, next_scene, episode_number)

        # Then update summary and key item status
        updated_summary, updated_key_items = update_summary_and_key_items(previous_summary, previous_key_items, scene, episode_number, continuity_analysis)

        # Output key item status for each episode
        print(f"Key items status for {episode_key}:")
        for item_name, item_info in updated_key_items.items():
            print(f"Item Name: {item_name}")
            print(f"- Current Status: {item_info['status']}")
            print(f"- Last Known Location/Owner: {item_info['location']}")
            print(f"- Current Importance: {item_info['importance']}")
            print()

        # Update previous summary and key items
        previous_summary = updated_summary
        previous_key_items = updated_key_items

        # Update key item status in the original data structure for each episode
        data[storyline_key][episode_key]['key_items_status'] = updated_key_items

        # Remove unnecessary data
        for field in ['initialRecords', 'score', 'evaluation_reasoning', 'whatIf', 'characters']:
            data[storyline_key][episode_key].pop(field, None)

    try:
        with open('/content/stoeyline26每集的key_item.json', 'w', encoding='utf-8') as file:
            json.dump(data, file, indent=2, ensure_ascii=False)
        print("Processing complete. Results written.")
    except Exception as e:
        print(f"Error writing results to file: {str(e)}")

if __name__ == "__main__":
    main()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

5. **Suguha's Digital Wings:**
   - Current Status: Active (fully materialized)
   - Last Known Location/Owner: Kirigaya Suguha
   - Current Importance: Their reappearance signifies Suguha's regained confidence and ability to confront her fears.

6. **Holographic Rabbit:**
   - Current Status: Active
   - Last Known Location/Owner: Materialized in the digital landscape
   - Current Importance: Serves as a guide for Ruby, leading her towards Suguha and furthering the rescue mission.

7. **Data-Tether:**
   - Current Status: Active
   - Last Known Location/Owner: Attached to Ruby Rose
   - Current Importance: Crucial for maintaining a connection between Ruby and Monika, ensuring that Ruby can return safely with Suguha.

8. **Giant Glowing Door:**
   - Current Status: Opened (phased through)
   - Last Known Location/Owner: At the facility they are navigating
   - Current Importance: Successfully bypassed, allowing Ruby and 

In [None]:
client = OpenAI(api_key=openai.api_key)

def load_data(file_path):
    """Load JSON data file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        print(f"Error loading data from {file_path}: {str(e)}")
        return None

def save_data(data, file_path):
    """Save JSON data to file."""
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=2)
        print(f"Data successfully saved to {file_path}")
    except Exception as e:
        print(f"Error saving data to {file_path}: {str(e)}")

def extract_json_from_text(text):
    """Extract JSON string from text and parse it as a dictionary."""
    try:
        json_str = text.strip()
        # Find the position of the first and last curly braces
        start = json_str.find('{')
        end = json_str.rfind('}') + 1
        json_str = json_str[start:end]
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON Decode Error: {str(e)}")
        return None

def analyze_episode(episode_name, episode_data, important_items):
    """Call OpenAI API to analyze a single episode."""
    prompt = f"""
    You are a professional story analyst. Please analyze the following episode data and provide detailed analysis for each character.

    **Please note: All output should be in English, including character names and field names.**

    **Please strictly follow these requirements:**

    - **Output only JSON format data, do not add any additional text, explanations, or comments.**
    - **Ensure the output JSON format is correct and can be parsed by a JSON parser.**
    - **Only include items from the provided `important_items` list. Do not add or identify other items.**

    **For each episode, please output according to the following structure:**

    {{
      "{episode_name}": {{
        "whatIf": "{episode_data.get('whatIf', '')}",
        "characters": {{
          "Character Name": {{
            "Interactions_with_Key_Items": {{
              "Item Name": "Description of interaction with the item [Status]"
            }},
            "Actions": "Overall description of character's actions",
            "Relationships": {{
              "Relationship with other character": "Description of relationship"
            }},
            "Emotions": {{
              "Emotion name": "Description of emotion"
            }}
          }},
          ...
        }}
      }}
    }}

    **Here is the episode data:**

    {json.dumps(episode_data['initialRecords'], ensure_ascii=False, indent=2)}

    **Here are the important items:**

    {json.dumps(important_items, ensure_ascii=False, indent=2)}

    **How to generate the Status field:**

    - Based on the events in `initialRecords`, determine the current status of the item. For example:
      - If an item is lost, `[Lost on cliff]`.
      - If an item is hidden, `[Hidden by principal]`.
      - For other necessary statuses, please judge based on the plot and note in the description.

    **Please ensure:**

    - **Output only JSON format data, do not add any additional text.**
    - **All text should be in English.**
    - **JSON format is strictly correct and can be parsed.**
    - **Only include interactions with items from the `important_items` list.**
    - **In `Interactions_with_Key_Items`, the item's status should be included in the description, such as `[Status]`.**
    - **If there are no interactions with important items, keep the field empty.**
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a professional story analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )

        # Get response content
        content = response.choices[0].message.content.strip()
        print(f"Response Content for {episode_name}:")
        print(content)

        # Extract JSON from response
        analysis = extract_json_from_text(content)
        if analysis:
            return analysis
        else:
            print(f"Failed to parse JSON for {episode_name}.")
            return None

    except Exception as e:
        print(f"Error analyzing {episode_name}: {str(e)}")
        return None

def main():
    # Read data files
    story_data = load_data('/content/Storyline_26.json')
    important_items = load_data('/content/stoeyline26每集的key_item.json')

    if not story_data or not important_items:
        print("Failed to load necessary data. Exiting.")
        return

    analysis_results = {}

    for storyline, episodes in story_data.items():
        for episode_name, episode_data in episodes.items():
            print(f"Analyzing {episode_name}...")
            analysis = analyze_episode(episode_name, episode_data, important_items['important_items'])
            if analysis:
                analysis_results.update(analysis)
            else:
                print(f"Failed to generate analysis for {episode_name}.")

    # Save the analysis results of all episodes to a file
    save_data(analysis_results, '/content/episodes27_analysis.json')

    print("Analysis of all episodes has been completed and saved.")
    print("Analysis summary:")
    print(json.dumps(analysis_results, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()

In [None]:
def merge_key_items_into_episodes(episodes_json_path, key_items_json_path, output_json_path):
    """
    Merge key item statuses into the existing episode JSON data,
    and insert 'key_item_status' between 'whatIf' and 'characters'.

    Parameters:
        episodes_json_path (str): Path to the JSON file containing episode plot and character info.
        key_items_json_path (str): Path to the JSON file containing per-episode key item statuses.
        output_json_path (str): Path to save the merged output JSON data.
    """
    try:
        # Load episode data
        with open(episodes_json_path, 'r', encoding='utf-8') as f:
            episodes_data = json.load(f, object_pairs_hook=OrderedDict)
        print(f"Successfully loaded episode data from {episodes_json_path}")
    except Exception as e:
        print(f"Error reading episode JSON file: {str(e)}")
        return

    try:
        # Load key item data
        with open(key_items_json_path, 'r', encoding='utf-8') as f:
            key_items_data = json.load(f)
        print(f"Successfully loaded key item data from {key_items_json_path}")
    except Exception as e:
        print(f"Error reading key item JSON file: {str(e)}")
        return

    # Iterate through each episode and add key item status to the corresponding episode
    for episode_key in episodes_data.keys():
        # Extract episode number using regex
        match = re.match(r'^Episode\s+(\d+)$', episode_key)
        if not match:
            print(f"Skipping unmatched episode key: {episode_key}")
            continue
        episode_number = int(match.group(1))

        # Get the key item status for the corresponding episode
        key_items_text = key_items_data.get(str(episode_number), "")

        # Get current episode data
        current_episode_data = episodes_data[episode_key]

        # Create a new OrderedDict to maintain key order
        merged_episode = OrderedDict()

        # Insert 'key_item_status' between 'whatIf' and 'characters'
        for key, value in current_episode_data.items():
            merged_episode[key] = value
            if key.lower() == 'whatif':
                merged_episode['key_item_status'] = key_items_text

        # Update the episode data
        episodes_data[episode_key] = merged_episode

    # Write the updated data to a new JSON file
    try:
        with open(output_json_path, 'w', encoding='utf-8') as f:
            json.dump(episodes_data, f, indent=2, ensure_ascii=False)
        print(f"Successfully merged data and saved to {output_json_path}")
    except Exception as e:
        print(f"Error writing merged JSON file: {str(e)}")

if __name__ == "__main__":
    # Define file paths
    episodes_json_path = '/content/episodes27_analysis.json'  # Replace with your episode data JSON file path
    key_items_json_path = '/content/stoeyline26每集的key_item.json'  # Replace with your key item data JSON file path
    output_json_path = '/content/stoeyline26 summary_key_item.json'  # Define output path for merged data

    # Call merge function
    merge_key_items_into_episodes(episodes_json_path, key_items_json_path, output_json_path)


In [None]:
client = OpenAI(api_key=openai.api_key)

# Define the path to save sentiment analysis results
SENTIMENT_RESULTS_PATH = '/content/save_score_episode26.json'

total_token_count = 0

# Load existing sentiment results
def load_sentiment_results():
    if os.path.exists(SENTIMENT_RESULTS_PATH):
        with open(SENTIMENT_RESULTS_PATH, 'r') as f:
            return json.load(f)
    return {}

# Save sentiment results to file
def save_sentiment_results(results):
    with open(SENTIMENT_RESULTS_PATH, 'w') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

# Fetch saved sentiment score for an episode
def get_saved_sentiment(episode_number, sentiment_results):
    return sentiment_results.get(str(episode_number))

# Wrapper for sentiment analysis or fetching saved result
def analyze_or_get_sentiment(episode_number, text, sentiment_results):
    saved_result = get_saved_sentiment(episode_number, sentiment_results)
    if saved_result is not None:
        print(f"Retrieved saved sentiment analysis for Episode {episode_number}: {saved_result}")
        return saved_result
    else:
        sentiment_score, token_usage = analyze_sentiment_openai(text)
        sentiment_results[str(episode_number)] = sentiment_score
        print(f"Saved sentiment analysis for Episode {episode_number}: {sentiment_score}, Tokens used: {token_usage}")
        return sentiment_score

def get_episode_number(episode_key):
    match = re.search(r'Episode\s*(\d+)', episode_key)
    if match:
        return int(match.group(1))
    else:
        return None

def format_characters(characters_dict):
    formatted_characters = ""
    for char_name, char_info in characters_dict.items():
        formatted_characters += f"\n- **{char_name}:**"
        for key, value in char_info.items():
            if isinstance(value, dict):
                formatted_characters += f"\n  - **{key}:**"
                for sub_key, sub_value in value.items():
                    formatted_characters += f"\n    - **{sub_key}:** {sub_value}"
            else:
                formatted_characters += f"\n  - **{key}:** {value}"
    return formatted_characters

def format_key_item_status(key_item_status):
    return key_item_status.strip()

def load_summaries(summaries_path):
    with open(summaries_path, 'r', encoding='utf-8') as f:
        summaries = json.load(f, object_pairs_hook=OrderedDict)

    documents = []
    episodes = []
    episode_keys = []
    for episode_key, content in summaries.items():
        what_if = content.get('whatIf', '')
        characters = content.get('characters', {})
        key_item_status = content.get('key_item_status', '')

        characters_str = format_characters(characters)
        key_item_status_str = format_key_item_status(key_item_status)

        page_content = f"**WhatIf:**\n{what_if}\n\n**Characters:**\n{characters_str}\n\n**Key Item Status:**\n{key_item_status_str}"

        doc = Document(
            page_content=page_content,
            metadata={
                "episode_key": episode_key,
                "key_item_status": key_item_status
            }
        )
        documents.append(doc)
        episodes.append(page_content)
        episode_keys.append(episode_key)

    return documents, episodes, episode_keys

def load_full_contents(full_contents_path, storyline_key='Storyline 2'):
    with open(full_contents_path, 'r', encoding='utf-8') as f:
        full_contents = json.load(f, object_pairs_hook=OrderedDict)

    episodes_data = full_contents.get(storyline_key, {})

    documents = []
    episode_key_to_doc = {}
    episode_number_to_key = {}

    for episode_key, content in episodes_data.items():
        initial_records = "\n".join(content.get('initialRecords', []))
        doc = Document(page_content=initial_records, metadata={"episode_key": episode_key})
        documents.append(doc)
        episode_key_to_doc[episode_key] = doc
        episode_number = get_episode_number(episode_key)
        if episode_number is not None:
            episode_number_to_key[episode_number] = episode_key

    return documents, episode_key_to_doc, episode_number_to_key

def create_vectorstore(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
    docs = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(openai_api_key=client.api_key)
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

def parse_evaluation_response(response_text):
    score_match = re.search(r"Score\s*[:：-]?\s*(\d)", response_text, re.IGNORECASE)
    if score_match:
        score = int(score_match.group(1))
    else:
        score = None

    evaluation_reasoning = response_text.strip()

    return {
        "score": score,
        "evaluation_reasoning": evaluation_reasoning
    }
def analyze_sentiment_openai(text):
    global total_token_count
    try:
        response = client.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a sentiment analysis tool that provides only a numerical sentiment score between 0 (very negative) and 1 (very positive)."},
                {"role": "user", "content": f"Analyze the sentiment of the following text and provide a score between 0 (very negative) and 1 (very positive):\n\n{text}\n\nSentiment score:"}
            ],
            temperature=0
        )
        sentiment_text = response.choices[0].message.content.strip()

        # Extracting sentiment score
        match = re.search(r"([0-1](?:\.\d+)?)", sentiment_text)
        sentiment_score = float(match.group(1)) if match else 0.5

        # Extract token usage information
        token_usage = response['usage']['total_tokens']
        total_token_count += token_usage  # Add to global token count

        print(f"Tokens used in this request: {token_usage}")
        return sentiment_score, token_usage
    except Exception as e:
        print(f"Error during sentiment analysis: {e}")
        return 0.5, 0

def evaluate_episode(episode_number, episode_key, summaries_vectorstore, episode_key_to_doc, summaries_data, episodes, episode_keys, sentiment_results):
    llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=client.api_key)

    filtered_episodes = []
    filtered_episode_keys = []
    for ep_num, ep_key in zip([get_episode_number(k) for k in episode_keys], episode_keys):
        if ep_num is not None and ep_num <= episode_number:
            filtered_episodes.append(episodes[episode_keys.index(ep_key)])
            filtered_episode_keys.append(ep_key)

    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(filtered_episodes)

    current_index = filtered_episode_keys.index(episode_key)
    episode_vector = tfidf_matrix[current_index]
    cosine_similarities = cosine_similarity(episode_vector, tfidf_matrix)[0]

    n = 15
    similar_indices = cosine_similarities.argsort()[-(n+1):][::-1]
    similar_indices = [i for i in similar_indices if i != current_index][:n]

    print(f"Episodes most similar to Episode {episode_number} ({episode_key}):")
    for idx in similar_indices:
        sim_episode_key = filtered_episode_keys[idx]
        sim_episode_number = get_episode_number(sim_episode_key)
        print(f"Episode {sim_episode_number} ({sim_episode_key}) - Similarity: {cosine_similarities[idx]:.2f}")

    print("\nPerforming sentiment analysis...")
    sentiment_scores = []
    for idx, text in enumerate(filtered_episodes):
        ep_number = get_episode_number(filtered_episode_keys[idx])
        if ep_number is not None:
            score = analyze_or_get_sentiment(ep_number, text, sentiment_results)
            sentiment_scores.append(score)
            print(f"Episode {filtered_episode_keys[idx]} Sentiment Score: {score}")
        else:
            sentiment_scores.append(0.5)
            print(f"Episode {filtered_episode_keys[idx]} Sentiment Score: 0.5 (Default due to missing episode number)")

    episode_sentiment = sentiment_scores[current_index]
    print(f"\nSentiment analysis result for Episode {episode_number} ({episode_key}): {episode_sentiment}")

    selected_episodes = []
    threshold = 0.4
    for idx in similar_indices:
        sim_score = sentiment_scores[idx]
        if abs(episode_sentiment - sim_score) < threshold:
            sim_episode_key = filtered_episode_keys[idx]
            sim_episode_number = get_episode_number(sim_episode_key)
            sim_similarity = cosine_similarities[idx]
            selected_episodes.append({
                'episode_number': sim_episode_number,
                'episode_key': sim_episode_key,
                'similarity': sim_similarity,
                'sentiment_score': sim_score
            })

    selected_episodes.sort(key=lambda x: x['episode_number'] if x['episode_number'] is not None else float('inf'))

    print("\nFinal selected episodes (sorted by episode number):")
    for ep in selected_episodes:
        print(f"Episode {ep['episode_number']} ({ep['episode_key']}) - Similarity: {ep['similarity']:.2f}, Sentiment Score: {ep['sentiment_score']}")

    previous_summaries = ''
    current_episode_number = episode_number

    for ep in selected_episodes:
        sim_episode_number = ep['episode_number']
        sim_episode_key = ep['episode_key']
        sim_similarity = ep['similarity']
        sim_sentiment = ep['sentiment_score']
        summary_data = summaries_data.get((sim_episode_number, sim_episode_key), {})
        content = summary_data.get('content', '')
        previous_summaries += f"**Episode {sim_episode_number} ({sim_episode_key}) - Similarity: {sim_similarity:.2f}, Sentiment Score: {sim_sentiment}:**\n{content}\n\n"

    if not previous_summaries.strip():
        previous_summaries = "No similar episodes found."

    current_episode_doc = episode_key_to_doc.get(episode_key)
    if current_episode_doc:
        current_episode_text = current_episode_doc.page_content
    else:
        print(f"Warning: {episode_key} not found in episodes data.")
        current_episode_text = ''

    if not current_episode_text.strip():
        print(f"Error: Current episode content for {episode_key} is empty.")
        return {
            "score": None,
            "evaluation_reasoning": f"Error: Current episode content for {episode_key} is empty."
        }

    key_item_status = summaries_data.get((episode_number, episode_key), {}).get('key_item_status', 'No key item status.')
    key_item_status_str = format_key_item_status(key_item_status)

    prompt = f"""
You are a meticulous literary critic specializing in narrative coherence.

As you read, pay special attention to the continuity and consistency of key items and their statuses.

**Similar Episodes Summaries:**
{previous_summaries}

**Current Episode ({episode_number}) Full Content:**
{current_episode_text}

**Current Episode ({episode_number}) Key Item Status:**
{key_item_status_str}

Please provide a critical evaluation of the current episode, focusing on:

1. **Character Consistency** - Evaluate whether the actions and dialogues of main characters in this scene align with their established traits. Note any inconsistencies and assess if they are justified by new developments.

2. **Plot Progression** - Analyze how this scene contributes to the overall story. Assess whether newly introduced elements logically extend the plot and effectively advance or resolve narrative threads.

3. **Emotional and Psychological Realism** - Review the authenticity of the main characters' emotional and psychological responses. Evaluate whether these reactions are believable and consistent with their character development and the situation.

4. **Foreshadowing and Setup for the Next Episode** - Examine how this scene prepares for subsequent developments. Consider whether it hints at future twists or sets the groundwork for upcoming narrative shifts.

5. **Continuity and Consistency in Story Elements** - Examine the episode for any inconsistencies or continuity errors, such as objects appearing or disappearing without explanation, conflicting information, or events that contradict prior established facts. Pay particular attention to items that were lost or destroyed in previous episodes but reappear without explanation. Assess how these issues impact the narrative coherence.

Provide a balanced and critical evaluation, pointing out both strengths and weaknesses. Ensure that your reasoning clearly supports the score you assign.

**Score (0-5) and Justification:**

Scoring Guidelines:
- **5 (Excellent):** The episode is highly coherent, with consistent characters, strong plot development, realistic emotions, and no major errors.

- **4 (Good):** The episode is mostly coherent, but there are small issues with characters, plot, or minor inconsistencies.

- **3 (Fair):** The episode has some noticeable inconsistencies that affect the flow.

- **2 (Poor):** The episode has major issues with consistency and continuity.

- **1 (Very Poor):** The episode is incoherent, with severe flaws and continuity problems.

- **0 (Unacceptable):** The episode is completely incoherent, with critical errors that make it nonsensical, and Error in the status of key items.
"""

    print("\nFull GPT Prompt:")
    print(prompt)
    print("-" * 50)

    try:
        response = llm.invoke([HumanMessage(content=prompt)])
        response_text = response.content
    except Exception as e:
        print(f"Error during GPT evaluation: {e}")
        return {
            "score": None,
            "evaluation_reasoning": f"Error during GPT evaluation: {e}"
        }

    evaluation_result = parse_evaluation_response(response_text)

    print(f"Score: {evaluation_result['score']}")
    print("Evaluation Reasoning:")
    print(evaluation_result['evaluation_reasoning'])

    return evaluation_result
def analyze_complex_question(characters, episodes, summaries_data, episode_key_to_doc, episode_number_to_key, question_text, summaries_vectorstore, k=10):
    # Extract episode numbers mentioned in the question
    episode_numbers = re.findall(r'\b(?:episode)?\s*(\d+)\b', question_text.lower())
    specified_episodes = set(int(num) for num in episode_numbers) if episode_numbers else set()

    max_episode = max(specified_episodes) if specified_episodes else float('inf')

    # Perform similarity search on the question
    docs = summaries_vectorstore.similarity_search(question_text, k=k)
    relevant_summaries = []
    retrieved_episode_numbers = set()

    print("\nRetrieved relevant episodes and their content:")
    for doc in docs:
        episode_key = doc.metadata.get('episode_key', 'Unknown Episode')
        episode_number = get_episode_number(episode_key)
        if episode_number is not None and episode_number <= max_episode:
            relevant_summaries.append((episode_number, episode_key, doc.page_content))
            retrieved_episode_numbers.add(episode_number)

    # Manually add specified episodes if they are missing from the similarity search results
    for ep_num in specified_episodes:
        if ep_num not in retrieved_episode_numbers:
            episode_key = episode_number_to_key.get(ep_num)
            if episode_key:
                doc = episode_key_to_doc.get(episode_key)
                if doc:
                    relevant_summaries.append((ep_num, episode_key, doc.page_content))
                    print(f"Manually added Episode {ep_num} ({episode_key})")

    # Sort by episode number
    relevant_summaries.sort(key=lambda x: x[0])

    formatted_summaries = ""
    for episode_number, episode_key, content in relevant_summaries:
        print(f"Episode {episode_number} ({episode_key})")
        formatted_summaries += f"**Episode {episode_number} ({episode_key}):**\n{content}\n\n"

    prompt = f"""
You are an expert in literary analysis.

The user has the following question about events up to and including Episode {max_episode}:

"{question_text}"

Based on the following episode summaries (which only include information up to Episode {max_episode}), please provide a detailed analysis to answer the user's question. Focus on character motivations, emotional changes, and plot development across the relevant episodes. Do not consider or mention any events that occur after Episode {max_episode}.

**Relevant Episode Summaries:**
{formatted_summaries}

Provide a clear and concise response that directly addresses the user's inquiry, ensuring you only discuss events and character development up to Episode {max_episode}.
"""

    llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=client.api_key)

    print("\nFull GPT Prompt for Complex Question:")
    print(prompt)
    print("-" * 50)

    try:
        response = llm.invoke([HumanMessage(content=prompt)])
        return response.content
    except Exception as e:
        print(f"Error during complex analysis: {e}")
        return None
def main():
    summaries_path = '/content/stoeyline26 summary_key_item.json'
    full_contents_path = '/content/Storyline_26.json'
    output_path = '/content/evaluation_results.json'

    # Load the summaries and full contents data
    summaries_documents, episodes, episode_keys = load_summaries(summaries_path)
    summaries_vectorstore = create_vectorstore(summaries_documents)

    full_contents_documents, episode_key_to_doc, episode_number_to_key = load_full_contents(full_contents_path)

    # Load sentiment results from previous runs
    sentiment_results = load_sentiment_results()

    # Initialize summaries_data to map episode numbers and keys to content and key item status
    summaries_data = {}
    for doc in summaries_documents:
        episode_key = doc.metadata.get('episode_key', '')
        key_item_status = doc.metadata.get('key_item_status', '')
        episode_number = get_episode_number(episode_key)
        if episode_number is not None:
            summaries_data[(episode_number, episode_key)] = {
                'content': doc.page_content,
                'key_item_status': key_item_status
            }
        else:
            print(f"Warning: Could not extract episode number from key '{episode_key}'")

    evaluation_results = OrderedDict()

    while True:
        # Get user input for the question or command
        user_input = input("Please enter your question (type 'exit' to quit): ")

        if user_input.lower().strip() == 'exit':
            print("Program exited.")
            break

        # Extract the maximum episode number from the user's question
        episode_numbers = re.findall(r'\b(?:episode)?\s*(\d+)\b', user_input.lower())
        max_episode = max(int(num) for num in episode_numbers) if episode_numbers else float('inf')

        # Check if we have enough data to answer the question
        if max_episode > len(summaries_documents):
            print(f"Warning: The question involves Episode {max_episode}, but we only have data for {len(summaries_documents)} episodes.")
            continue

        # Use LLM (ChatOpenAI) to classify whether it's an evaluation or analysis request
        llm = ChatOpenAI(model_name="gpt-4o", openai_api_key=client.api_key)
        classification_prompt = f"""
The user has input the following question or request:

"{user_input}"

Please determine whether the user wants to:

1. **Evaluate an episode** - The user wants a critical evaluation of a specific episode.

2. **Analyze characters or plot** - The user wants an analysis of characters, behaviors, or plot elements across one or more episodes.

Please respond with only one of the following options: "Evaluate Episode" or "Analyze Characters/Plot". Do not include any additional text.
"""
        try:
            response = llm.invoke([HumanMessage(content=classification_prompt)])
            classification = response.content.strip()
        except Exception as e:
            print(f"Error during classification: {e}")
            continue

        # Handle the classified request
        if classification == "Evaluate Episode":
            # Extract episode number from the user's request
            episode_number_matches = re.findall(r'\b(?:episode)?\s*(\d+)\b', user_input.lower())
            if episode_number_matches:
                episode_number = int(episode_number_matches[0])
                episode_key = episode_number_to_key.get(episode_number)

                if episode_key:
                    print(f"Evaluating {episode_key}...")

                    # Evaluate the episode using the provided vectorstore and data
                    evaluation_result = evaluate_episode(
                        episode_number, episode_key, summaries_vectorstore,
                        episode_key_to_doc, summaries_data, episodes, episode_keys, sentiment_results
                    )
                    if evaluation_result:
                        print(f"Score: {evaluation_result['score']}")
                        print("Evaluation Reasoning:")
                        print(evaluation_result['evaluation_reasoning'])
                    else:
                        print("No evaluation result found.")

                    evaluation_results[episode_key] = evaluation_result
                else:
                    print(f"Episode {episode_number} does not exist.")
            else:
                print("Unable to extract episode number from your request. Please retry with a valid episode number.")

        elif classification == "Analyze Characters/Plot":
            # Handle the complex question analysis across episodes
            result = analyze_complex_question([], [], summaries_data,
                                              episode_key_to_doc, episode_number_to_key,
                                              user_input, summaries_vectorstore, k=15)
            if result:
                print(f"\nAnalysis result:\n{result}\n")
            else:
                print("Unable to generate analysis result. Please try again.")

        else:
            print("Unable to understand your request. Please try again.")

    # Save sentiment results back to the file
    save_sentiment_results(sentiment_results)

    # Output total token usage at the end
    print(f"Total tokens consumed: {total_token_count}")

    # Save evaluation results to a file if necessary
    if evaluation_results:
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(evaluation_results, f, indent=2, ensure_ascii=False)
        print(f"All evaluation results have been saved to {output_path}")


if __name__ == "__main__":
    main()