In [2]:
import pandas as pd


df = pd.read_csv("../dataset_prepare/toy_test.csv")

In [7]:
from string import Template


prompt_tmp = '''
You will be given a piece of text. Your task is to summarize the text in a concise and clear manner, capturing the main ideas and key points while maintaining the original meaning.


### Text to Summarize


$text


### Instructions


- Provide a summary that is brief yet comprehensive.
- Ensure that the summary accurately reflects the content of the original text.
- Avoid adding any personal opinions or interpretations.
- Do not output anything other than the summary.


Begin your response below:
    '''.strip()

prompt_tmp = Template(prompt_tmp)

In [4]:
df

Unnamed: 0,dataset,author,text,topic,gender,age,sign,date,file_name,subject,index,subreddit
0,blog,15365,"'Bathrooms, hallway corners, laundr...",indUnk,female,34.0,Cancer,"28,July,2004",,,,
1,blog,15365,urlLink June 2003 Outlook from Moonsur...,indUnk,female,34.0,Cancer,"07,June,2004",,,,
2,blog,15365,urlLink SAGITTARIUS LUNAR CYCLE by Cl...,indUnk,female,34.0,Cancer,"07,June,2004",,,,
3,blog,15365,"The Beatles Title: Let It Be (Lennon, M...",indUnk,female,34.0,Cancer,"12,October,2002",,,,
4,blog,15365,THE MOON WAS STILL UP Anger and pain I c...,indUnk,female,34.0,Cancer,"14,September,2003",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
395,reddit,wonderfuldog,Occam's Razor]( \n A guy is found dead of a g...,,,,,,,,446467.0,atheism
396,reddit,wonderfuldog,"If we lived side-by-side with dinosaurs, don't...",,,,,,,,849514.0,atheism
397,reddit,wonderfuldog,it means different things to different people ...,,,,,,,,1242640.0,atheism
398,reddit,wonderfuldog,Do you think there is any truth to people sayi...,,,,,,,,609221.0,atheism


In [5]:
sub = df.sample(10)
sub = sub.reset_index(drop=True)

In [8]:
from tqdm import tqdm
from litellm import completion


def get_completion(prompt, temperature=0, max_tries=5,
                   model="openai/gpt-4.1-mini-2025-04-14"):
    
    for _ in range(max_tries):
        try:
            # Call the completion function with the provided parameters
            response = completion(
                model=model, temperature=temperature,
                messages=[{"role": "user", "content": prompt}]
            )
            return response["choices"][0]["message"]["content"]
        
        except Exception as e:
            print(f"Error: {e}")
            continue
    
    return "SOMETHING_WRONG"


for ix, row in tqdm(sub.iterrows()):
    text = row["text"]
    prompt = prompt_tmp.substitute(text=text)
    
    # Get the completion
    summary = get_completion(prompt, model="gpt-4.1-2025-04-14", 
                             temperature=0, max_tries=5)
    
    # Save the summary back to the DataFrame
    sub.at[ix, "summary"] = summary
    
# Save the test DataFrame with summaries to a new CSV file

10it [00:50,  5.02s/it]


In [None]:
def summarize_df(df_fp_path, text_col, 
                 model="gpt-4.1-2025-04-14", 
                 temperature=0, max_tries=5, 
                 save_freq=10):
    df = pd.read_csv(df_fp_path)

    if "summary" not in df.columns:
        indices = df.index
    else:
        indices = df[df["summary"].isna()].index
        print(f"{len(df) - len(indices)} summaries already generated.")

    if len(indices) == 0:
        print("All summaries are already generated.")
        return
    
    for ix in tqdm(indices):
        text = df.at[ix, text_col]
        prompt = prompt_tmp.substitute(text=text)
        
        # Get the completion
        summary = get_completion(prompt, model=model, 
                                 temperature=temperature, 
                                 max_tries=max_tries)
        
        # Save the summary back to the DataFrame
        df.at[ix, "summary"] = summary
        
        if (ix + 1) % save_freq == 0:
            df.to_csv(df_fp_path, index=False)
    
    df.to_csv(df_fp_path, index=False)
    print("All summaries generated and saved in place.")

In [17]:
summarize_df(
    "../dataset_prepare/toy_test copy.csv", 
    text_col="text", 
    model="gpt-4.1-2025-04-14", 
    temperature=0, 
    max_tries=5, 
    save_freq=10
)

 21%|██▏       | 85/400 [05:41<21:05,  4.02s/it]


KeyboardInterrupt: 

In [18]:
test = pd.read_csv("../dataset_prepare/toy_test copy.csv")

In [20]:
pd.set_option('display.max_colwidth', 100)
test

Unnamed: 0,dataset,author,text,topic,gender,age,sign,date,file_name,subject,index,subreddit,summary
0,blog,15365,"'Bathrooms, hallway corners, laundry rooms, even open closets work well. Make sure yo...",indUnk,female,34.0,Cancer,"28,July,2004",,,,,"Choose accessible locations like bathrooms, hallways, or laundry rooms for your kitten's litter ..."
1,blog,15365,"urlLink June 2003 Outlook from Moonsurfing.com Dear Moonsurfers, The Moon is New in ...",indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The June 2003 Outlook from Moonsurfing.com highlights the New Moon in Gemini on May 30th, which,..."
2,blog,15365,urlLink SAGITTARIUS LUNAR CYCLE by Claudia @ www.moonsurfing.com Nov. 23 - Dec. 22 It'...,indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The Sagittarius lunar cycle, beginning with a total solar eclipse, encourages letting go of outd..."
3,blog,15365,"The Beatles Title: Let It Be (Lennon, Mccartney) Album: Let It Be When I find myself i...",indUnk,female,34.0,Cancer,"12,October,2002",,,,,"The song ""Let It Be"" by The Beatles expresses finding comfort and wisdom in difficult times, wit..."
4,blog,15365,THE MOON WAS STILL UP Anger and pain I can not understand because they are so far undernea...,indUnk,female,34.0,Cancer,"14,September,2003",,,,,"The text reflects on deep, unexpressed emotions that remain unresolved despite attempts at relea..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,reddit,wonderfuldog,Occam's Razor]( \n A guy is found dead of a gunshot wound in the woods. \n Which more likely: \...,,,,,,,,446467.0,atheism,
396,reddit,wonderfuldog,"If we lived side-by-side with dinosaurs, don't you think someone in some religious or ancient te...",,,,,,,,849514.0,atheism,
397,reddit,wonderfuldog,it means different things to different people \n Obviously we can't stop people from using words...,,,,,,,,1242640.0,atheism,
398,reddit,wonderfuldog,Do you think there is any truth to people saying it is hypocritical for r/atheism to loathe Chri...,,,,,,,,609221.0,atheism,


In [27]:
sub = test[~test.summary.isna()]

print(sub.sample(1)["summary"].values[0])

The writer argues that allowing guns on airplanes is a bad idea, as it could make it easier for terrorists to take control of a flight and use air marshals' weapons against them. They question whether air marshals would be identifiable and point out that focusing security only on transatlantic flights is ineffective, since local flights have also been targeted in the past. The message ends with a note about meeting at the cinema, clarifying the film's start time.
