#### Import packages

In [22]:
from google import genai

In [23]:
from google.genai import types

In [24]:
import os
import json
import time
import re
import pandas as pd

#### Gemini API key

In [25]:
GEMINI_API_KEY = ''

In [26]:
client = genai.Client(api_key=GEMINI_API_KEY)

#### Function to build prompt with each story

In [27]:
def build_prompt(story):
    prompt = f"""
    You are a creative writing expert.  
    Read the following short story:
    
    {story}
    
    Evaluate the story across fourteen dimensions of creative writing quality.  
    For each dimension, provide both (1) reasoning and (2) a score on a scale from 0 to 3.  
    
    === DIMENSIONS ===
    
    **1. Narrative Ending**  
    - 0 = Abrupt, arbitrary, unearned.  
    - 1 = Some closure but rushed.  
    - 2 = Mostly satisfying and natural.  
    - 3 = Fully earned, resonant closure.  
    *Guidance:* Endings should feel purposeful, not like the writer “got tired.” 

    **2. Understandability & Coherence**  
    - 0 = Disjointed, confusing.  
    - 1 = Some logic but uneven coherence.  
    - 2 = Mostly unified and logical.  
    - 3 = Fully coherent; beginning, middle, and end flow deliberately.  
    *Guidance:* Strong writing holds together as an intentional, unified piece.

    **3. Scene vs. Exposition **  
    - 0 = Reliance on exposition only.  
    - 1 = Some scenes, but balance off.  
    - 2 = Mostly balanced with minor unevenness.  
    - 3 = Excellent balance of scene & exposition.  
    *Guidance:* Strong writing balances dramatized scenes with necessary summary/backstory. 

    **4. Narrative Pacing**  
    - 0 = Poor control; rushed or dragging.  
    - 1 = Some control but uneven.  
    - 2 = Mostly appropriate manipulation of time.  
    - 3 = Excellent pacing; compression/stretching used for dramatic effect.  
    *Guidance:* Skilled pacing balances story time and narrative time for engagement.

    **5. Language Proficiency & Literary Devices**  
    - 0 = Flat, literal, or clumsy language.  
    - 1 = Some figurative language but simplistic.  
    - 2 = Mostly sophisticated use of idiom/metaphor.  
    - 3 = Highly skilled, complex use of idiom, metaphor, allusion.  
    *Guidance:* Strong use of literary devices deepens meaning and resonance. 

    **6. Emotional Flexibility**  
    - 0 = Shallow or overly introspective only.  
    - 1 = Some balance but skewed.  
    - 2 = Mostly balanced interiority/exteriority.  
    - 3 = Excellent emotional range; realistic spectrum of inner/outer life.  
    *Guidance:* Balance between inner life (thoughts/feelings) and outer life (actions/dialogue). 

    **7. Structural Flexibility**  
    - 0 = Predictable, no meaningful turns.  
    - 1 = Forced or predictable twists.  
    - 2 = Mostly surprising and appropriate.  
    - 3 = Organic, coherent, surprising twists.  
    *Guidance:* Good twists surprise yet remain consistent with the story’s logic. 

    **8. Perspective & Voice Flexibility**  
    - 0 = Limited, one-note perspective.  
    - 1 = Some variation but mostly narrow.  
    - 2 = Mostly flexible across characters/perspectives.  
    - 3 = Fully flexible voice; convincingly inhabits diverse viewpoints, even unlikable ones.  
    *Guidance:* Strong writing can inhabit multiple consciousnesses authentically.  

    **9. Originality in Thought**  
    - 0 = Heavy reliance on clichés/stereotypes.  
    - 1 = Some originality but noticeable clichés.  
    - 2 = Mostly original thinking.  
    - 3 = Entirely free of clichés; inventive ideas.  
    *Guidance:* Strong writing avoids lazy tropes and presents fresh, surprising thoughts. 

    **10. Originality in Form & Structure**  
    - 0 = Conventional, unoriginal form.  
    - 1 = Minor novelty but mostly standard.  
    - 2 = Mostly fresh structural choices.  
    - 3 = Highly original form/structure that challenges conventions.  
    *Guidance:* Formal innovation (genre-blending, letters, stream of consciousness, etc.) enriches storytelling.  
    
    **11. Originality of Theme and Content**  
    - 0 = Common/overused themes.  
    - 1 = Slightly fresh but familiar.  
    - 2 = Distinctive, thought-provoking.  
    - 3 = Highly original, transformative.  
    *Guidance:* Original themes reframe familiar ideas, explore rarely addressed concepts, or offer new insights.  
    
     **12. Rhetorical Complexity**  
    - 0 = Pure surface-level narrative.  
    - 1 = Minimal depth beyond surface.  
    - 2 = Mostly layered; some subtext.  
    - 3 = Operates at multiple levels; rich interplay of surface and subtext.  
    *Guidance:* Strong writing engages with both literal meaning and deeper symbolic/subtextual layers.  

    **13. World-Building and Setting**  
    - 0 = Vague/inconsistent world.  
    - 1 = Thin or generic setting.  
    - 2 = Mostly vivid and believable.  
    - 3 = Rich, immersive, self-consistent.  
    *Guidance:* Uses sensory detail to create a tangible, integral environment.  
    
    **14. Character Development**  
    - 0 = Flat, stereotypical characters.  
    - 1 = Some depth but many underdeveloped.  
    - 2 = Mostly complex characters with growth.  
    - 3 = Fully realized, 3D, evolving characters.  
    *Guidance:* Characters should have depth, multiple traits, and capacity to change.  
    
    === OUTPUT FORMAT ===
    
    For each dimension, respond in the format:
    
    [Dimension Name]  
    Reasoning: [your analysis]  
    [Dimension Name] Score: [0–3]  
    
    Then proceed to the next dimension.  
    """

    return prompt

#### Load the stories

In [28]:
file_path = 'goodreadsRatings_fullStories.json'

with open(file_path, 'r') as f:
    stories = json.load(f)

#### Loop through all stories and save scores

In [29]:
output_dir = "instruction_tests/Gemini/Goodreads/prompt_02"
os.makedirs(output_dir, exist_ok=True)
scores_df = pd.DataFrame(columns=['Dimension'])

i=0
for story in stories[0:]:
    
    thoughts = ""
    answer = ""

    print('---------Story:',story['title'],'--------')

    #Build prompt
    prompt = build_prompt(story['story'])

    #Send prompt to model
    response = client.models.generate_content(
        model="gemini-2.5-flash", 
        contents=prompt,
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(
              include_thoughts=True
            )
        )
    )

    #Get response from model
    if response.candidates:
        for part in response.candidates[0].content.parts:
            if not part.text:
                continue
            if part.thought:
                print("Thought summary:")
                print(part.text)
                thoughts = part.text
                print()
            else:
                print("Answer:")
                print(part.text)
                answer = part.text
                print()

                #Create a pattern to get the score and dimension from answer
                pattern = r'^\s*(?P<dimension>.*?)\s+Score:\s*(?P<score>\d+(?:\.\d+)?)\s*$'

                rows = []
                for m in re.finditer(pattern, answer, flags=re.MULTILINE):
                    # Get all the dimensions
                    dim = m.group('dimension')
                    # Drop anything in parentheses
                    dim = re.sub(r'\s*\([^)]*\)', '', dim).strip()
                    if not dim:
                        continue 
                
                    # Get all the scores
                    sc = float(m.group('score'))

                    # Add all dimensions and scores for the story
                    rows.append({"Dimension": dim,story['title']: sc})
                    print(f"Dimension: {dim}, Score: {sc}")

                # Add final row of the average rating
                rows.append({"Dimension": 'Av_Score', story['title']:story['av_rating']})
                df_story = pd.DataFrame(rows)
    else:
        print("Didn't return content")
        print("Block Reason:", response.prompt_feedback.block_reason)
        thoughts = 'N/A'
        answer = 'N/A'
        df_story = pd.DataFrame(columns=["Dimension", story['title']])

        
    # Save response
    if scores_df.empty:
        scores_df = df_story
    else:
        # Add a row onto the csv
        scores_df = pd.merge(scores_df, df_story, on='Dimension', how='outer')

    # Store it temporarily to ensure data isn't lost
    scores_df.to_csv(os.path.join(output_dir,'scores_temp.csv'), index=False)

    # Save answer for the story
    output_path = os.path.join(output_dir, f"{story['title']}.json")
    with open(output_path, "w") as f:
        json.dump({"story_name": story['title'], 'thought':thoughts, 'answer':answer}, f, indent=2)

    i+=1
    
scores_df.fillna("#N/A", inplace=True)

scores_df.to_csv(os.path.join(output_dir,'scores.csv'), index=False)

---------Story: The Wicked Prince --------
Thought summary:
**My Assessment: A Fable of Hubris and Divine Justice**

Having carefully reviewed the short story, I've approached this analysis as a seasoned observer of creative writing, with a particular eye for the nuanced details.  It presents itself as a classic fable, employing familiar tropes to convey a moral lesson. My initial impression, reinforced through this dimensional breakdown, is that while it effectively delivers its central theme, it doesn't venture far beyond the well-trodden paths of the genre.

The story's strength lies in its clear and resonant moral. The ending is particularly well-executed, offering a satisfying and ironic conclusion to the prince's arrogant ambition. The pacing is also commendable; the compression of time effectively highlights the rise and fall of the prince. The language is rich and evocative, with particularly strong imagery that evokes the prince's evil and God's interventions. Furthermore, the

ServerError: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}

In [30]:
output_dir = "instruction_tests/Gemini/Goodreads/prompt_02_02"
os.makedirs(output_dir, exist_ok=True)
scores_df = pd.DataFrame(columns=['Dimension'])

i = 0
for story in stories[0:]:
    thoughts = ""
    answer = ""

    print('---------Story:', story['title'], '--------')

    prompt = build_prompt(story['story'])

    # --- retry loop for API call ---
    while True:
        try:
            response = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=prompt,
                config=types.GenerateContentConfig(
                    thinking_config=types.ThinkingConfig(
                        include_thoughts=True
                    )
                )
            )
            break  # ✅ success, exit loop
        except Exception as e:
            # check if it’s the overloaded error
            if "503" in str(e) or "UNAVAILABLE" in str(e):
                print("Server overloaded (503). Waiting 60s before retry...")
                time.sleep(60)
                continue  # try again
            else:
                raise  # if it’s a different error, stop

    # --- process response as before ---
    if response.candidates:
        for part in response.candidates[0].content.parts:
            if not part.text:
                continue
            if part.thought:
                print("Thought summary:")
                print(part.text)
                thoughts = part.text
                print()
            else:
                print("Answer:")
                print(part.text)
                answer = part.text
                print()

                pattern = r'^\s*(?P<dimension>.*?)\s+Score:\s*(?P<score>\d+(?:\.\d+)?)\s*$'
                rows = []
                for m in re.finditer(pattern, answer, flags=re.MULTILINE):
                    dim = m.group('dimension')
                    dim = re.sub(r'\s*\([^)]*\)', '', dim).strip()
                    if not dim:
                        continue
                    sc = float(m.group('score'))
                    rows.append({"Dimension": dim, story['title']: sc})
                    print(f"Dimension: {dim}, Score: {sc}")

                rows.append({"Dimension": 'Av_Score', story['title']: story['av_rating']})
                df_story = pd.DataFrame(rows)
    else:
        print("Didn't return content")
        print("Block Reason:", response.prompt_feedback.block_reason)
        thoughts = 'N/A'
        answer = 'N/A'
        df_story = pd.DataFrame(columns=["Dimension", story['title']])

    # --- save response ---
    if scores_df.empty:
        scores_df = df_story
    else:
        scores_df = pd.merge(scores_df, df_story, on='Dimension', how='outer')

    scores_df.to_csv(os.path.join(output_dir, 'scores_temp.csv'), index=False)

    output_path = os.path.join(output_dir, f"{story['title']}.json")
    with open(output_path, "w") as f:
        json.dump({"story_name": story['title'], 'thought': thoughts, 'answer': answer}, f, indent=2)

    i += 1
    print(i)

scores_df.fillna("#N/A", inplace=True)
scores_df.to_csv(os.path.join(output_dir, 'scores.csv'), index=False)


---------Story: The Wicked Prince --------
Thought summary:
**My Assessment: A Fable of Hubris**

Alright, let's break this down. My first impression is that we're dealing with a classic fable structure here—the "once upon a time" opening tells us that much. The core story revolves around a "wicked prince" who embodies the familiar sin of hubris, setting his sights on conquering not just lands but also the very heavens.

I'm starting to see how each dimension applies. The ending? It's purposeful. The gnat's victory provides a clear and ironic punishment. Coherence? Absolutely. It's a straight line from ambition to downfall. But the scenes vs. exposition? Leans heavily toward the latter, which is common in fables. Pacing serves the moral purpose: Build the problem, accelerate the downfall. Language? It's proficient, maybe not dazzlingly complex, but it works to convey the imagery needed. Emotional flexibility? Limited. It's more about the external actions, the suffering *caused*, not th