In [3]:
import pandas as pd
import glob
import os
import math
from google import genai  # para la API de Gemini
from google.genai import types
import time

root_path = "books"
GEMINI_API_KEY = os.environ["GOOGLE_API_KEY"]

model_name = 'gemini-2.5-flash'
generator = 'gemini-2.5-flash'
temperature = 0.9

txt_files = glob.glob(os.path.join(root_path, "**", "*.txt"), recursive=True)

def call_gemini(prompt, temperature, model_name = 'gemini-2.5-flash'):
    """
    Calls the Gemini model using the Google genai package.
    Raises an exception on error.
    """
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        response = client.models.generate_content(
            model=model_name,
            contents=prompt,
            config=types.GenerateContentConfig(temperature=temperature,
                                              max_output_tokens=None)
        )
        text_response = getattr(response, "text", None)

        
        if not text_response:
            try:
                candidates = response.candidates 
            except Exception:
                candidates = None

            # 检查 usage metadata
            usage = getattr(response, "usage_metadata", None)
            # 检查 prompt feedback / block info
            prompt_feedback = getattr(response, "prompt_feedback", None)

            # 构造调试信息
            debug = {
                "model": model_name,
                "finish_reasons": [],
                "safety_ratings": [],
                "usage": None,
                "prompt_feedback": None,
                "candidates": None,
            }
            if candidates is not None:
                infos = []
                for c in candidates:
                    info = {
                        "finish_reason": getattr(c, "finish_reason", None),
                        "token_count": getattr(c, "token_count", None),
                        "safety_ratings": getattr(c, "safety_ratings", None),
                    }
                    infos.append(info)
                    # 如果有安全 rating
                    if hasattr(c, "safety_ratings"):
                        debug["safety_ratings"].append(c.safety_ratings)
                debug["candidates"] = infos

            if usage is not None:
                debug["usage"] = {
                    "prompt_tokens": getattr(usage, "prompt_token_count", None),
                    "total_tokens": getattr(usage, "total_token_count", None),
                    "candidates_tokens": getattr(usage, "candidates_token_count", None),
                }
            if prompt_feedback is not None:
                debug["prompt_feedback"] = {
                    "blockReason": getattr(prompt_feedback, "blockReason", None),
                    "blockReasonMessage": getattr(prompt_feedback, "blockReasonMessage", None),
                    "safetyRatings": getattr(prompt_feedback, "safetyRatings", None),
                }

            print("Gemini returned empty text. Debug info:", debug)
            return None
            
        return text_response.strip()

    except Exception as e:
        raise Exception(f"Gemini error: {str(e)}")
        


prompt_template = '''Provide a very detailed summary of the plot for the book "{title}" by {author}. The summary must be of the original book, NOT any adaptation like a film or TV show. Include all main events and the complete storyline, detailing every key development, situations, events with characters, and the conclusion. Do NOT include any historical context, literary analysis, or philosophical discussion, only the plot.'''


In [4]:
for file_index in range(5):
    output_csv = f"Results_internal/books_summaries_generator_{generator}_temperature_{temperature}_{file_index+1}.csv"
    print(output_csv)
    if os.path.exists(output_csv):
        df = pd.read_csv(output_csv)
    else:
        df = pd.DataFrame(columns=["book_title", "summary_llm"])


    for file_path in txt_files:
        book_title = os.path.splitext(os.path.basename(file_path))[0]
        
        parts = book_title.split("-", 1)  
        book_title_only = parts[0].replace("_s", "'s") 
        author = parts[1]
        if book_title in df["book_title"].values and not pd.isna(df.loc[df["book_title"] == book_title, "summary_llm"].values[0]):
            print(f"skipped: {book_title}")
            continue

        with open(file_path, "r", encoding="utf-8") as file:
                content = file.read()

        prompt = prompt_template.format(title = book_title_only, author=author)
        print(prompt)
        response = call_gemini(prompt, temperature=temperature, model_name = model_name)
        print(response)
        if book_title in df["book_title"].values:
            df.loc[df["book_title"] == book_title, "summary_llm"] = response
        else:
            df = pd.concat([df, pd.DataFrame([{
                "book_title": book_title,
                "summary_llm": response
            }])], ignore_index=True)

        df.to_csv(output_csv, index=False, encoding="utf-8-sig")

        if response is None:
            print(f"something wrong: {file_path}")
            continue
        time.sleep(5)

    print(f"saved: {output_csv}")


Results_internal/books_summaries_generator_gemini-2.5-flash_temperature_0.9_1.csv
skipped: A Doll_s House-Henrik Ibsen
skipped: Alice_s Adventures in Wonderland-Lewis Carroll
skipped: Anna Karenina-Leo Tolstoy
skipped: Crime and Punishment-Fyodor Dostoyevsky
skipped: Death in Venice-Thomas Mann
skipped: Don Quixote-Miguel de Cervantes
skipped: Dracula-Bram Stoker
skipped: Frankenstein-Mary Shelley
skipped: Great Expectations-Charles Dickens
skipped: Hamlet-William Shakespeare
skipped: Jane Eyre-Charlotte Brontë
skipped: Madame Bovary-Gustave Flaubert
skipped: Mrs. Dalloway-Virginia Woolf
skipped: Pride and Prejudice-Jane Austen
skipped: Robinson Crusoe-Daniel Defoe
skipped: Six Characters in Search of an Author-Luigi Pirandello
skipped: The Adventures of Huckleberry Finn-Mark Twain
skipped: The Count of Monte Cristo-Alexandre Dumas
skipped: The Decameron-Giovanni Boccaccio
skipped: The Divine Comedy-Dante Alighieri
skipped: The Great Gatsby-F. Scott Fitzgerald
skipped: The Iliad-Homer
