In [1]:
import pandas as pd
import glob
import os
import math
from google import genai  # para la API de Gemini
from google.genai import types
import time

root_path = "books"
GEMINI_API_KEY = os.environ["GOOGLE_API_KEY"]

model_name = 'gemini-2.5-flash'
generator = 'gemini-2.5-flash'
temperature = 0.4

txt_files = glob.glob(os.path.join(root_path, "**", "*.txt"), recursive=True)

def call_gemini(prompt, temperature, model_name = 'gemini-2.5-flash'):
    """
    Calls the Gemini model using the Google genai package.
    Raises an exception on error.
    """
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        response = client.models.generate_content(
            model=model_name,
            contents=prompt,
            config=types.GenerateContentConfig(temperature=temperature,
                                              max_output_tokens=None)
        )
        text_response = getattr(response, "text", None)
        
        full_texts = []
        
        if not text_response:
            try:
                candidates = response.candidates
            except Exception:
                candidates = None

            usage = getattr(response, "usage_metadata", None)
            prompt_feedback = getattr(response, "prompt_feedback", None)

            debug = {
                "model": model_name,
                "finish_reasons": [],
                "safety_ratings": [],
                "usage": None,
                "prompt_feedback": None,
                "candidates": None,
            }
            if candidates is not None:
                infos = []
                for c in candidates:
                    info = {
                        "finish_reason": getattr(c, "finish_reason", None),
                        "token_count": getattr(c, "token_count", None),
                        "safety_ratings": getattr(c, "safety_ratings", None),
                    }
                    infos.append(info)
                    if hasattr(c, "safety_ratings"):
                        debug["safety_ratings"].append(c.safety_ratings)
                debug["candidates"] = infos

            if usage is not None:
                debug["usage"] = {
                    "prompt_tokens": getattr(usage, "prompt_token_count", None),
                    "total_tokens": getattr(usage, "total_token_count", None),
                    "candidates_tokens": getattr(usage, "candidates_token_count", None),
                }
            if prompt_feedback is not None:
                debug["prompt_feedback"] = {
                    "blockReason": getattr(prompt_feedback, "blockReason", None),
                    "blockReasonMessage": getattr(prompt_feedback, "blockReasonMessage", None),
                    "safetyRatings": getattr(prompt_feedback, "safetyRatings", None),
                }
            print("Gemini returned empty text. Debug info:", debug)
            return None
            
        return text_response.strip()

    except Exception as e:
        raise Exception(f"Gemini error: {str(e)}")
        


prompt_template = '''Provide a very detailed summary of the plot for the following book: The summary must be of the text provided, do NOT use any internal or previous knowledge that you may have on that book to create the summary. Include all main events and the complete storyline, detailing every key development, situations, events with characters, and the conclusion. Do NOT include any historical context, literary analysis, or philosophical discussion- only the plot.
Full text of the book: "{book_text}"'''



In [2]:
for file_index in range(5):
    output_csv = f"Results_external/books_summaries_generator_{generator}_temperature_{temperature}_{file_index+1}.csv"

    if os.path.exists(output_csv):
        df = pd.read_csv(output_csv)
    else:
        df = pd.DataFrame(columns=["book_title", "summary_llm"])


    for file_path in txt_files:
        book_title = os.path.splitext(os.path.basename(file_path))[0]
        if book_title in df["book_title"].values and not pd.isna(df.loc[df["book_title"] == book_title, "summary_llm"].values[0]):
            print(f"skip: {book_title}")
            continue

        with open(file_path, "r", encoding="utf-8") as file:
                content = file.read()

        prompt = prompt_template.format(book_text = content)

        response = call_gemini(prompt, temperature=temperature, model_name = model_name)
        print(response)
        if book_title in df["book_title"].values:
            df.loc[df["book_title"] == book_title, "summary_llm"] = response
        else:
            df = pd.concat([df, pd.DataFrame([{
                "book_title": book_title,
                "summary_llm": response
            }])], ignore_index=True)

        df.to_csv(output_csv, index=False, encoding="utf-8-sig")

        if response is None:
            print(f"something wrong: {file_path}")
            continue
        time.sleep(5)

    print(f"saved to: {output_csv}")


skip: A Doll_s House-Henrik Ibsen
skip: Alice_s Adventures in Wonderland-Lewis Carroll
skip: Anna Karenina-Leo Tolstoy
skip: Crime and Punishment-Fyodor Dostoyevsky
skip: Death in Venice-Thomas Mann
skip: Don Quixote-Miguel de Cervantes
skip: Dracula-Bram Stoker
skip: Frankenstein-Mary Shelley
skip: Great Expectations-Charles Dickens
skip: Hamlet-William Shakespeare
skip: Jane Eyre-Charlotte Brontë
skip: Madame Bovary-Gustave Flaubert
skip: Mrs. Dalloway-Virginia Woolf
skip: Pride and Prejudice-Jane Austen
skip: Robinson Crusoe-Daniel Defoe
skip: Six Characters in Search of an Author-Luigi Pirandello
skip: The Adventures of Huckleberry Finn-Mark Twain
skip: The Count of Monte Cristo-Alexandre Dumas
skip: The Decameron-Giovanni Boccaccio
skip: The Divine Comedy-Dante Alighieri
skip: The Great Gatsby-F. Scott Fitzgerald
skip: The Iliad-Homer
skip: The Scarlet Letter-Nathaniel Hawthorne
skip: Twenty Thousand Leagues under the Sea-Jules Verne
skip: Ulysses-James Joyce
saved to: Results_ext