## **Set Runtime**

Before we get started, let's set the runtime to A100 GPU and enable high RAM (up there in the "Runtime" tab)

## Load poetry data

In [1]:
# imports
import pandas as pd
import gdown


In [2]:
# download human-authored poems from LK's gdrive
url = "https://drive.google.com/file/d/19RXOfv3MgMxF_f-fxwOnwhOKlbYSsERO/view?usp=drive_link"

output = "human_poems_sample.csv"

gdown.download(url=url, output=output, fuzzy=True)


Downloading...
From: https://drive.google.com/uc?id=19RXOfv3MgMxF_f-fxwOnwhOKlbYSsERO
To: /content/human_poems_sample.csv
100%|██████████| 84.2k/84.2k [00:00<00:00, 32.0MB/s]


'human_poems_sample.csv'

In [3]:
poems_df = pd.read_csv("human_poems_sample.csv")

poems_df

Unnamed: 0,poem_title,poem_text,author,form
0,[Weaned from life and flown away],Weaned from life and flown away\nIn the mornin...,Emily Brontë,sonnet
1,Of Love: A Sonnet,"How love came in I do not know,\nWhether by th...",Robert Herrick,sonnet
2,"Sonnet XLIV [For Thee the Sun Doth Daily Rise,...","For thee the sun doth daily rise, and set \nBe...",George Santayana,sonnet
3,That you were once unkind befriends me now (So...,"That you were once unkind befriends me now,\nA...",William Shakespeare,sonnet
4,Sonnet 18: Shall I compare thee to a summer’s ...,Shall I compare thee to a summer’s day?\nThou ...,William Shakespeare,sonnet
...,...,...,...,...
89,Blind Boone’s Apparitions,C\n\nmy motto for life\n\n ...,Tyehimba Jess,blues poem
90,Spring 1931,Northern black\nboy traveling\nwith empty bell...,Sam Cornish,blues poem
91,Riverbank Blues,"A man git his feet set in a sticky mudbank,\nA...",Sterling A. Brown,blues poem
92,"Go Down, Death","(A Funeral Sermon)\n\nWeep not, weep not,\nShe...",James Weldon Johnson,blues poem


In [4]:
# take a look at the forms
poems_df['form'].value_counts(dropna=False)

Unnamed: 0_level_0,count
form,Unnamed: 1_level_1
sonnet,50
ghazal,24
blues poem,15
duplex,5


## Query the model

In [5]:
# set up the prompt template
# this is borrowed from the "Sonnet or Not" paper

prompt_template_start = '''
Read the following poem and then respond with the form of the poem based on the following verse forms: [’sonnet’, ’ghazal’, ’duplex’, 'blues poem'].
All of the poems have been tagged by experts as one of these forms. You must pick one of these options.
Please also provide an elaborated rationale for why you think the poem is in this form, a one-word summary rationale, and a score ranking your confidence in your answer from 0 to 1. Please report the single poetic form, elaborated rationale, and one-word rationale in the following format.
1. Poetic Form: Sonnet
2. Elaborated Rationale: This poem is a sonnet because...
3. One-Word Summary: Meter
4. Confidence Score : 0.73

1. Poetic Form: Ghazal
2. Elaborated Rationale: This poem is a ballad because...
3. One-Word Summary: Rhyme
4. Confidence Score : 0.91

Poem Text (in full):

'''

prompt_template_end = '''

Pick ONE of these possible verse forms: [’sonnet’, ’ghazal’, ’duplex’, 'blues poem'].
'''

## Try a sample query

Before we try this at scale, let's try it with one example to see how it does

In [6]:
# import random module
import random

# generate a random number betweeen 0 and the lenght of poems_df
num = random.randint(0, len(poems_df) - 1)

# piece together the prompt
test_prompt = prompt_template_start + poems_df.iloc[num]['poem_text'] + prompt_template_end

# just take a look before we use it
print(test_prompt)


Read the following poem and then respond with the form of the poem based on the following verse forms: [’sonnet’, ’ghazal’, ’duplex’, 'blues poem'].
All of the poems have been tagged by experts as one of these forms. You must pick one of these options.
Please also provide an elaborated rationale for why you think the poem is in this form, a one-word summary rationale, and a score ranking your confidence in your answer from 0 to 1. Please report the single poetic form, elaborated rationale, and one-word rationale in the following format.
1. Poetic Form: Sonnet
2. Elaborated Rationale: This poem is a sonnet because...
3. One-Word Summary: Meter
4. Confidence Score : 0.73

1. Poetic Form: Ghazal
2. Elaborated Rationale: This poem is a ballad because...
3. One-Word Summary: Rhyme
4. Confidence Score : 0.91

Poem Text (in full):

Then hate me when thou wilt; if ever, now;
Now, while the world is bent my deeds to cross,
Join with the spite of fortune, making me bow,
And do not drop in for a

## Model-specific setup
### This one for Emory's AzureOpenAI instance and GPT4-32k

In [7]:
# openAI version
!pip install openai

import getpass
from openai import AzureOpenAI

AZURE_ENDPOINT = "https://cail-training-canadaeast.openai.azure.com/"
# AZURE_API_KEY = getpass.getpass(prompt='Enter AzureOpenAI API key:')
AZURE_MODEL_KEY = "gpt432k"
AZURE_API_VERSION = "2024-03-01-preview"



In [8]:
client = AzureOpenAI(
    azure_endpoint=AZURE_ENDPOINT,
    api_key=AZURE_API_KEY,
    api_version=AZURE_API_VERSION
)

NameError: name 'AZURE_API_KEY' is not defined

In [None]:
# test model

response = client.chat.completions.create( # Use the chat completions endpoint
  model=AZURE_MODEL_KEY,
  messages=[
        {"role": "user", "content": "Say this is a test"}, # Use messages parameter for chat models
    ],
  max_tokens=7,
  temperature=0
)

print(response.choices[0].message.content)

In [None]:
# define the actual query function

def query_chatGPT (prompt):
  response = client.chat.completions.create( # Use the chat completions endpoint
    model=AZURE_MODEL_KEY,
    messages=[
      {"role": "user", "content": prompt}, # Use messages parameter for chat models
    ],
    max_tokens=400, # up the number of tokens for a longer answer, remove additional responses below
    temperature=1
  )

  response_text = response.choices[0].message.content

  return(response_text)

In [None]:
# try once before we do it all

# test model

response = client.chat.completions.create( # Use the chat completions endpoint
  model=AZURE_MODEL_KEY,
  messages=[
        {"role": "user", "content": test_prompt}, # Use messages parameter for chat models
    ],
  max_tokens=400,
  temperature=1
)

print(response.choices[0].message.content)

In [None]:
# response for testing function
response = response.choices[0].message.content

response

In [None]:
def extract_response_fields(response):
    """
    Extract structured fields from a response with the format:
    1. Poetic Form: ...
    2. Elaborated Rationale: ...
    3. One-Word Summary: ...
    4. Confidence Score: ...

    Args:
        response (str): The full model output string

    Returns:
        dict: Dictionary with keys 'poetic_form', 'rationale', 'summary', 'confidence'
    """
    lines = response.strip().split('\n')

    result = {
        'poetic_form': None,
        'rationale': None,
        'summary': None,
        'confidence': None
    }

    for line in lines:
        if line.startswith('1. Poetic Form:'):
            result['poetic_form'] = line.split(':', 1)[1].strip()
        elif line.startswith('2. Elaborated Rationale:'):
            result['rationale'] = line.split(':', 1)[1].strip()
        elif line.startswith('3. One-Word Summary:'):
            result['summary'] = line.split(':', 1)[1].strip()
        elif line.startswith('4. Confidence Score:'):
            result['confidence'] = line.split(':', 1)[1].strip()

    return result


# Usage:
fields = extract_response_fields(response)
print(fields)
# Output: {'poetic_form': '...', 'rationale': '...', 'summary': '...', 'confidence': '...'}

# Access individual values:
response1 = fields['poetic_form']
response2 = fields['rationale']
response3 = fields['summary']
response4 = fields['confidence']

## Run the whole thing

In [None]:
# iterate through every row in poems_df
for index, row in poems_df.iterrows():
  # construct prompt from that row's ['poem_text']
  prompt = prompt_template_start + row['poem_text'] + prompt_template_end

  # get response from prompt
  response = query_chatGPT(prompt)

  # extract response fields using extract_response_fields
  fields = extract_response_fields(response)

  # add fields to poems_df
  poems_df.at[index, 'poetic_form'] = fields['poetic_form']
  poems_df.at[index, 'rationale'] = fields['rationale']
  poems_df.at[index, 'summary'] = fields['summary']
  poems_df.at[index, 'confidence'] = fields['confidence']

poems_df

In [None]:
# export poems_df as csv
poems_df.to_csv('gpt4_poems_and_tags_df.csv', index=False)

## Assess accuracy


In [None]:
# replace any NaN in poems_df["poetic_form"] with "Unclassified"
poems_df['poetic_form'] = poems_df['poetic_form'].fillna('unclassified')

In [None]:
# turn all values in poems_df["poetic_form"] to lowercase
poems_df['poetic_form'] = poems_df['poetic_form'].str.lower()


In [None]:
# view value counts of poems_df['poetic_form']
poems_df['poetic_form'].value_counts()

In [None]:
# this is additional text cleaning from the original paper
# not sure if we need it but can't hurt!

import re

def strip_non_letters_and_conditionally_remove_s(input_string):
    # match any non-letter character
    pattern = re.compile(r'[^a-zA-Z\s]')
    cleaned_string = pattern.sub('', input_string)

    # Remove 's' at the end of the phrase unless the phrase is "ekphrasis" or "ars poetic"
    if cleaned_string.lower().endswith('s') and not cleaned_string.lower().endswith('ekphrasis') and not cleaned_string.lower().endswith('ars poetica'):
        cleaned_string = cleaned_string[:-1]

    return cleaned_string

In [None]:
# define a function to evaluate whether the model got the form "correct" or not

def evaluate(row):
    # Convert columns to string type, handling potential NaN values
    form_str = str(row['form']).strip().lower()
    poetic_form_str = str(row['poetic_form']).strip().lower()

    if form_str == strip_non_letters_and_conditionally_remove_s(poetic_form_str):
        return 1
    else:
        return 0

In [None]:
# apply it to determine true correctness
poems_df['true_correctness'] = poems_df.apply(evaluate, axis = 'columns')

poems_df

In [None]:
# plot confusion matrix for poems_df["form"] and poems_df["poetic_form"]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

df = pd.DataFrame({'y_true': poems_df['form'], 'y_pred': poems_df['poetic_form']})

# Calculate the confusion matrix
cm = confusion_matrix(df['y_true'], df['y_pred'])

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=np.unique(df['y_pred']),
            yticklabels=np.unique(df['y_true']),
            cbar_kws={'label': 'Count'})
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

## Calculate accuracy measures

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# Using the same data
y_true = poems_df['form']
y_pred = poems_df['poetic_form']

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate precision and recall with different averaging methods
# 'macro' - unweighted mean (treats all classes equally)
precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)

print(f"\nMacro-averaged:")
print(f"Precision: {precision_macro:.4f}")
print(f"Recall: {recall_macro:.4f}")

# 'weighted' - weighted by support (accounts for class imbalance)
precision_weighted = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall_weighted = recall_score(y_true, y_pred, average='weighted', zero_division=0)

print(f"\nWeighted-averaged:")
print(f"Precision: {precision_weighted:.4f}")
print(f"Recall: {recall_weighted:.4f}")

# Get detailed per-class metrics
print("\n" + "="*60)
print("Detailed Classification Report:")
print("="*60)
print(classification_report(y_true, y_pred, zero_division=0))