In [1]:
import os 


os.chdir('..')

In [2]:
!ls

AV_models			      README.md
create_summaries_for_eval_samples.py  requirements.txt
create_summaries.sh		      scripts
dataset_prepare			      train_and_eval_an_AV_model.py
notebooks			      train_AV_classifiers.sh


In [22]:
%%writefile generate_llm_writing.py
import os
import argparse
import pandas as pd
from tqdm import tqdm
from scripts.utils import get_completion

from scripts.utils import (
    count_words,
    round_up_to_nearest_10,
    list_writing_samples
)

from scripts.prompt_templates import (
 get_prompt_template_for_writing_setting1, 
 get_prompt_template_for_writing_setting4   
)


def get_args():
    parser = argparse.ArgumentParser(description="Create writing prompts and prompt LLMs to generate writing.")
    parser.add_argument("--evaluation_df_fp", type=str, required=True, help="Path to the evaluation DataFrame.")
    parser.add_argument("--llm", type=str, required=True, help="LLM model to use for generation. Use litellm name convention.")

    parser.add_argument("--training_df_fp", type=str, default=None, help="Path to the training DataFrame. Default is None.")
    parser.add_argument("--setting", type=int, choices=[1, 2, 3, 4, 5], default=1, help="Prompt setting (1-5). Default is 1.")
    parser.add_argument("--genre", type=str, default=None, help="Genre of the writing samples. Default is None (auto infer from dataset if possible).")
    parser.add_argument("--author_col", type=str, default="author", help="Column name for author in the DataFrame. Default is 'author'.")
    parser.add_argument("--text_col", type=str, default="text", help="Column name for text in the DataFrame. Default is 'text'.")
    parser.add_argument("--summary_col", type=str, default="summary", help="Column name for summary in the DataFrame. Default is 'summary'.")
    parser.add_argument("--num_exemplars", type=int, default=5, help="Number of exemplars per author. Default is 5.")
    
    parser.add_argument("--temperature", type=float, default=0, help="Temperature for the LLM. Default is 0.")
    parser.add_argument("--max_tries", type=int, default=5, help="Number of tries for LLM completion. Default is 5.")
    parser.add_argument("--save_freq", type=int, default=10, help="Frequency of saving LLM outputs. Default is 10.")

    return parser.parse_args()


def create_writing_prompts_setting1(training_df_fp, 
                                    evaluation_df_fp, 
                                    genre,
                                    author_col="author", 
                                    text_col="text", 
                                    summary_col="summary", 
                                    num_exemplars=5):
    
    training_df = pd.read_csv(training_df_fp)
    evaluation_df = pd.read_csv(evaluation_df_fp)

    assert training_df[author_col].value_counts().min() >= num_exemplars, \
        f"Each author must have at least {num_exemplars} samples in the training set."
    
    assert summary_col in evaluation_df.columns, \
        f"Summary column '{summary_col}' not found in evaluation DataFrame."

    evaluation_df = evaluation_df.copy()
    prompt_tmp = get_prompt_template_for_writing_setting1()        
    
    print(f"Generating prompts...")
    for ix, row in tqdm(evaluation_df.iterrows(), total=len(evaluation_df)):
        
        author = row[author_col]
        summary = row[summary_col]
        
        num_words = round_up_to_nearest_10(count_words(row[text_col]))
        samples = training_df[training_df[author_col]==author][text_col].sample(num_exemplars)
        writing_samples = list_writing_samples(samples)
        prompt = prompt_tmp.substitute(writing_samples=writing_samples, 
                                       genre=genre, num_words=num_words,
                                       summary=summary)
        evaluation_df.at[ix, "training sample indices"] = ",".join([str(ix) for ix in samples.index])
        evaluation_df.at[ix, "prompt"] = prompt

    evaluation_df.to_csv(evaluation_df_fp, index=False)
    
    return evaluation_df


def create_writing_prompts_setting4(evaluation_df_fp, 
                                    genre,
                                    text_col="text", 
                                    summary_col="summary"):
    
    evaluation_df = pd.read_csv(evaluation_df_fp)

    assert summary_col in evaluation_df.columns, \
        f"Summary column '{summary_col}' not found in evaluation DataFrame."
    
    evaluation_df = evaluation_df.copy()
    prompt_tmp = get_prompt_template_for_writing_setting4()
    
    print(f"Generating prompts...")
    for ix, row in tqdm(evaluation_df.iterrows(), total=len(evaluation_df)):
        summary = row[summary_col]
        
        num_words = round_up_to_nearest_10(count_words(row[text_col]))
        prompt = prompt_tmp.substitute(genre=genre, num_words=num_words,
                                       summary=summary)
        evaluation_df.at[ix, "prompt"] = prompt

    evaluation_df.to_csv(evaluation_df_fp, index=False)

    return evaluation_df


def generate_or_load_writing_prompts(args, dire):
    if os.path.exists(os.path.join(dire, "prompts.csv")):
        print(f"Prompts already exist in {dire}/prompts.csv")
        df = pd.read_csv(os.path.join(dire, "prompts.csv"))

        return df
    
    if args.setting == 1:

        assert args.training_df_fp is not None, \
            "Training DataFrame path is required for setting 1."
        
        df = create_writing_prompts_setting1(
            training_df_fp=args.training_df_fp,
            evaluation_df_fp=args.evaluation_df_fp,
            genre=args.genre,
            author_col=args.author_col,
            text_col=args.text_col,
            summary_col=args.summary_col,
            num_exemplars=args.num_exemplars
        )

    elif args.setting == 4:
        df = create_writing_prompts_setting4(
            evaluation_df_fp=args.evaluation_df_fp,
            genre=args.genre,
            text_col=args.text_col,
            summary_col=args.summary_col
        )
    else:

        raise ValueError("Setting not implemented yet.")
    
    df.to_csv(os.path.join(dire, "prompts.csv"), index=False)
    print(f"Prompts saved to {dire}/prompts.csv")

    return df


def prompt_llm_to_generate_writing(df, save_dir, model, 
                                   temperature=0, max_tries=5, 
                                   save_freq=10):
    model_name = model.split("/")[-1]
    fp = os.path.join(save_dir, model_name + ".csv")

    if os.path.exists(fp):
        llm_df = pd.read_csv(fp)

        if len(llm_df) == len(df):
            print(f"Writing already generated for {model_name}.")
            return
        else:
            print(f"Writing generation interrupted for {model_name}. Continuing from {len(llm_df)}.")
            indices = list(range(len(llm_df), len(df)))
    else:
        indices = list(range(len(df)))
        llm_df = pd.DataFrame(columns=["writing"])
    
    for j, ix in tqdm(enumerate(indices), total=len(indices)):
        prompt = df.at[ix, "prompt"]
        completion = get_completion(prompt, model=model, 
                                    temperature=temperature, 
                                    max_tries=max_tries)
        llm_df.at[ix, "writing"] = completion

        if (j+1) % save_freq == 0:
            llm_df.to_csv(fp, index=False)
    
    llm_df.to_csv(fp, index=False)
    print(f"Writing generated and saved to {fp}")


def main():
    args = get_args()
    print(args)
    
    dataset = args.evaluation_df_fp.split("/")[-1].split(".")[0].split("_")[0]
    dire = f"LLM_writing/Setting{args.setting}/{dataset}"
    os.makedirs(dire, exist_ok=True)

    if args.training_df_fp is not None:
        dataset_ = args.training_df_fp.split("/")[-1].split(".")[0].split("_")[0]
        assert dataset == dataset_, \
            f"Training and evaluation datasets must be the same. {dataset} != {dataset_}"

    if args.genre is None:
        if dataset.startswith("blog"):
            args.genre = "blog post"
        elif dataset.startswith("enron"):
            args.genre = "email"
        elif dataset.startswith("reddit"):
            args.genre = "reddit post"
        elif dataset.startswith("CCAT50"):
            args.genre = "news article"
        else:
            raise ValueError(f"Unknown dataset: {dataset}. Please specify a genre.")

    #### generating or loading prompts
    df = generate_or_load_writing_prompts(args, dire)

    #### prompting llm to generate writing
    prompt_llm_to_generate_writing(
        df, 
        save_dir=dire, 
        model=args.llm,
        temperature=args.temperature,
        max_tries=args.max_tries,
        save_freq=args.save_freq
    )


if __name__ == "__main__":
    main()

Overwriting generate_llm_writing.py


In [18]:
!ls LLM_writing/Setting1/toy_test_with_summaries/

gpt-4.1-mini-2025-04-14  gpt-4.1-mini-2025-04-14.csv  prompts.csv


In [None]:
import pandas as pd

p = pd.read_csv("LLM_writing/Setting1/blog/prompts.csv")
p

Unnamed: 0,dataset,author,text,topic,gender,age,sign,date,file_name,subject,index,subreddit,summary,training sample indices,prompt
0,blog,15365,"'Bathrooms, hallway corners, laundr...",indUnk,female,34.0,Cancer,"28,July,2004",,,,,"Choose accessible locations like bathrooms, ha...",31420,"Given the following summary, your task is to g..."
1,blog,15365,urlLink June 2003 Outlook from Moonsur...,indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The Gemini New Moon on May 30th, 2003, marks a...",41023,"Given the following summary, your task is to g..."
2,blog,15365,urlLink SAGITTARIUS LUNAR CYCLE by Cl...,indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The Sagittarius lunar cycle, marked by a total...",40123,"Given the following summary, your task is to g..."
3,blog,15365,"The Beatles Title: Let It Be (Lennon, M...",indUnk,female,34.0,Cancer,"12,October,2002",,,,,"""Let It Be"" by The Beatles is a song expressin...",10432,"Given the following summary, your task is to g..."
4,blog,15365,THE MOON WAS STILL UP Anger and pain I c...,indUnk,female,34.0,Cancer,"14,September,2003",,,,,The text explores deep emotions of anger and p...,31024,"Given the following summary, your task is to g..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,reddit,wonderfuldog,Occam's Razor]( \n A guy is found dead of a g...,,,,,,,,446467.0,atheism,The text explains Occam's Razor by comparing t...,399396395397398,"Given the following summary, your task is to g..."
396,reddit,wonderfuldog,"If we lived side-by-side with dinosaurs, don't...",,,,,,,,849514.0,atheism,"The text questions why, if humans lived alongs...",399395398397396,"Given the following summary, your task is to g..."
397,reddit,wonderfuldog,it means different things to different people ...,,,,,,,,1242640.0,atheism,The text emphasizes the importance of a shared...,397399398395396,"Given the following summary, your task is to g..."
398,reddit,wonderfuldog,Do you think there is any truth to people sayi...,,,,,,,,609221.0,atheism,The text addresses the criticism that r/atheis...,396398399397395,"Given the following summary, your task is to g..."


In [44]:
print(p.sample(1)["prompt"].values[0])

Given the following summary, your task is to generate a writing sample around 1200 words. The genre of the writing is mixed. Do not output anything other than the writing.

### Writing Task Summary

The writer reflects on a mixed day, celebrating a small personal victory but frustrated by a broker’s fax error that wasted time. They enjoyed a longer lunch and took advantage of a book sale. The writer comments on a news story about a woman fraudulently posing as a missing girl and criticizes the family’s insensitive reaction caught on camera. They mention Adam’s risky bullfighting job and hope for good worker’s compensation. The writer updates friends on various topics: emails sent to Jan, a funny but sad falling squirrel incident, hair loss advice from Kevin, and sympathy for Jen’s protein issues. They express enthusiasm for Swedish meatballs and possibly having stroganoff for dinner, and congratulate Jen on weight loss. The writer discusses TV show updates, TiVo equipment challenges, a

In [49]:
d = pd.read_csv("LLM_writing/Setting4/blog/gpt-4o-mini-2024-07-18.csv")
d

Unnamed: 0,writing
0,**Title: A Surprising Visit to Alltel: More Th...
1,"As we look back at the early 2000s, the landsc..."
2,**The Fine Line Between Safety and Overreactio...
3,"Title: Cats, Communism, and the Conservative C..."
4,"Ah, the beach! There’s something magical about..."
...,...
195,"As I sit here, the pages of my book blur toget..."
196,**The Lesson of the Stolen Bike: A Journey of ...
197,**Homecoming: The Return of Cardinal John Henr...
198,"Hey there, fabulous readers! \n\nFirst off, a ..."


In [50]:
d = pd.read_csv("LLM_writing/Setting1/blog/gpt-4o-mini-2024-07-18.csv")
d

Unnamed: 0,writing
0,"so it’s that time again, my monthly pilgrimage..."
1,The Future of IT Jobs: A Growing Concern \n\n...
2,The recent nightclub fire has everyone buzzing...
3,"Oh wow, here we go again! *sigh* So, I’ve been..."
4,"ah, the beach! what a delightful escape! the w..."
...,...
95,"Girlchick911: so like, can you believe Jen wan..."
96,Breaking News: A Harvard Law Student Wins Miss...
97,"So, last night was a whirlwind, and I’m feelin..."
98,"Oh my gosh, you guys! You will NOT believe wha..."


- LLM_writing

    - Setting1
        - dataset1
            - prompts.csv
            - llm1.csv
            - llm2.csv
            - ...
        - dataset2
            - prompts.csv
            - llm1.csv
            - llm2.csv
            - ...
        - dataset3
            - prompts.csv
            - llm1.csv
            - llm2.csv
            - ...
        - dataset4
            - prompts.csv
            - llm1.csv
            - llm2.csv
            - ...
    - Setting2
    - Setting3
    - Setting4
    - Setting5
    - Setting1_followup