In [1]:
import utils
import pandas as pd
import tqdm
import openai
import nltk

In [2]:
api_path = '/Users/psachdeva/openai/api.txt'

In [3]:
with open(api_path, 'r') as f:
    openai.api_key = f.read().strip()

In [4]:
df = pd.read_csv('aita_full.csv')

In [5]:
to_drop_flairs = ['UPDATE',
                  'TL;DR',
                  'Best of 2022',
                  'Open Forum',
                  'META',
                  'Upcoming Talk!!!']

In [6]:
df = df[~df['flair_text'].isin(to_drop_flairs)].reset_index(drop=True).copy()

In [7]:
df['true_label'] = df['flair_text'].replace({
    'Asshole': 'YTA',
    'Not the A-hole': 'NTA',
    'Everyone Sucks': 'ESH',
    'No A-holes here': 'NAH',
    'Not enough info': 'INFO'})

In [8]:
df['gpt_dilemma'] = ''
df['gpt_label'] = ''
df['gpt_reason'] =  ''

In [9]:
n_posts = 1500

In [10]:
failed = []

In [11]:
for post in tqdm.tqdm(range(1500)):
    n_sentences = len(nltk.sent_tokenize(df.iloc[post]['top_comment']))
    system_message = utils.create_system_message(identity="", length=f"{n_sentences} sentences")
    try:
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo',
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": df['selftext'].iloc[post]}
            ],
            temperature=0.4)
    except openai.error.APIError as e:
        print(f"Post {post}, OpenAI API returned an API Error: {e}")
        failed.append(post)
        continue
    except openai.error.APIConnectionError as e:
        print(f"Post {post}, OpenAI API request failed to connect: {e}")
        failed.append(post)
        continue
    except openai.error.ServiceUnavailableError as e:
        print(f"Post {post}, OpenAI API returned a Service Unavailable Error: {e}")
        failed.append(post)
        continue
        
    answer = response['choices'][0]['message']['content']
    gpt_dilemma = answer.split('Verdict')[0].replace('Dilemma:', '').strip()
    gpt_label = answer.split('Verdict: ')[1][:3]
    gpt_reason = answer.split('Reasoning:')[-1].strip()
    df.loc[post, 'gpt_dilemma'] = gpt_dilemma
    df.loc[post, 'gpt_label'] = gpt_label
    df.loc[post, 'gpt_reason'] = gpt_reason

100%|█████████████████████████████████████████████████████████| 1500/1500 [2:00:24<00:00,  4.82s/it]


In [19]:
yta_guess = df['true_label'].isna() & df['top_comment'].str.contains('YTA|yta|Yta')
nta_guess = df['true_label'].isna() & df['top_comment'].str.contains('NTA|nta|Nta')
nah_guess = df['true_label'].isna() & df['top_comment'].str.contains('NAH|nah')
esh_guess = df['true_label'].isna() & df['top_comment'].str.contains('ESH|esh|Esh')
info_guess = df['true_label'].isna() & df['top_comment'].str.contains('INFO |info ')

0        0
1        1
2        0
3        0
4        1
        ..
13060    0
13061    0
13062    0
13063    0
13064    1
Length: 13065, dtype: int64

In [165]:
df.loc[yta_guess, 'true_label'] = 'YTA'
df.loc[nta_guess, 'true_label'] = 'NTA'
df.loc[esh_guess, 'true_label'] = 'ESH'
df.loc[info_guess, 'true_label'] = 'INF'
df.loc[nah_guess, 'true_label'] = 'NAH'

In [170]:
df['true_label'] = df['true_label'].replace({'INFO': 'INF'})

In [171]:
sub = df.iloc[:1500]
sub = sub[~sub['true_label'].isna()]

In [176]:
sub['gpt_reason'].iloc[0]

"Both the OP and Anthony are at fault in this situation. While it is understandable that the OP's wife may feel uncomfortable with the picture, it is not fair for her to demand that Anthony remove it completely or move it to a place where she cannot see it. It is Anthony's house, and he has the right to display sentimental items that hold meaning to him and his wife. However, Anthony could have been more empathetic and understanding towards the OP's wife's feelings and offered to find a compromise, such as moving the picture to a less prominent location. The OP is also at fault for not effectively communicating with both parties and finding a solution that respects everyone's boundaries and feelings. Ultimately, all parties involved should have been more considerate and open to finding a middle ground."

In [196]:
print(df.iloc[0].gpt_dilemma)

The OP and his wife are temporarily living with the OP's friend, Anthony. The wife is uncomfortable with a framed picture of Anthony and his wife kissing hanging on the wall and asks the OP to talk to Anthony about it. Anthony refuses to move the picture, causing tension between the OP, his wife, and Anthony.


In [None]:
df.iloc

In [185]:
sub.to_excel('aita.xlsx', index=False)

In [199]:
(sub['true_label'] == sub['gpt_label']).mean()

0.6136690647482015

In [178]:
sub[sub['true_label'] == 'NTA']['gpt_label'].value_counts(normalize=True)

NTA    0.687259
YTA    0.156371
ESH    0.108108
INF    0.040541
NAH    0.007722
Name: gpt_label, dtype: float64

In [179]:
sub[sub['true_label'] == 'YTA']['gpt_label'].value_counts(normalize=True)

YTA    0.556561
NTA    0.244344
ESH    0.122172
INF    0.076923
Name: gpt_label, dtype: float64

In [180]:
sub['true_label'].value_counts(normalize=True)

NTA    0.745324
YTA    0.158993
NAH    0.033813
ESH    0.033813
INF    0.028058
Name: true_label, dtype: float64

In [181]:
sub['gpt_label'].value_counts(normalize=True)

NTA    0.594964
YTA    0.235252
ESH    0.113669
INF    0.048921
NAH    0.007194
Name: gpt_label, dtype: float64

In [172]:
sub[['true_label', 'gpt_label']].value_counts(normalize=True)

true_label  gpt_label
NTA         NTA          0.512230
            YTA          0.116547
YTA         YTA          0.088489
NTA         ESH          0.080576
YTA         NTA          0.038849
NTA         INF          0.030216
YTA         ESH          0.019424
NAH         NTA          0.017266
INF         NTA          0.017266
ESH         YTA          0.013669
YTA         INF          0.012230
NAH         YTA          0.010072
ESH         ESH          0.010072
            NTA          0.009353
INF         YTA          0.006475
NTA         NAH          0.005755
NAH         INF          0.004317
INF         ESH          0.002878
NAH         NAH          0.001439
INF         INF          0.001439
NAH         ESH          0.000719
ESH         INF          0.000719
dtype: float64

In [97]:
df[(df[df['true_label'].isna()]['top_comment'].str.contains('NTA')) & df[df['true_label'].isna()]['top_comment'].str.contains('YTA'))]

  df[(df[df['true_label'].isna()]['top_comment'].str.contains('NTA') & df[df['true_label'].isna()]['top_comment'].str.contains('YTA'))]


IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).

In [94]:
df[df['true_label'].isna()]['top_comment'].str.contains('INFO').sum()

185

In [76]:
df.iloc[:1500].to_csv('aita_w_gpt.csv', index=False)

In [None]:
print(answer)

In [35]:
answer.split('Verdict')[0].replace('Dilemma:', '').strip()

"OP's mother slept with OP's friend without knowing it was OP's mother, and OP is upset and has cut off communication with both their mother and friend."

In [44]:
answer.split('Verdict: ')[1][:3]

'YTA'

In [49]:
df.iloc[:500].to_csv('aita_w_gpt.csv', index=False)

In [50]:
df

Unnamed: 0,submission_id,title,selftext,created_utc,permalink,score,flair_text,top_comment,true_label,gpt_dilemma,gpt_label,gpt_reason
0,yiplwk,AITA for asking my friend to move a picture of...,"\n\nMe (M32) and my wife, Dahlia (F28) lost ou...",1667251988,/r/AmItheAsshole/comments/yiplwk/aita_for_aski...,16582,Asshole,"YTA Let me guess, the issues you had staying w...",YTA,The OP and his wife are temporarily living wit...,ESH,Both the OP and Anthony are at fault in this s...
1,yiv572,AITA for asking my husband to stay with me whi...,Throwaway my family knows my account. I'll get...,1667266450,/r/AmItheAsshole/comments/yiv572/aita_for_aski...,4079,,"YTA you’re pregnant, not made of glass. You’ll...",,The OP is 7 months pregnant and cannot travel ...,NTA,The OP is not the asshole in this scenario. Be...
2,yimgaf,AITA for telling my SIL to stop talking about ...,My (37M) wife (37F) is pregnant with our first...,1667245059,/r/AmItheAsshole/comments/yimgaf/aita_for_tell...,9728,Not the A-hole,"I'm going with NTA, if it wasn't for her remar...",NTA,The OP's wife is pregnant after years of ferti...,NTA,The OP tried to address the issue with their b...
3,yin7pf,"AITA for wanting to meet my ""daughter"" after g...",Long story short: in my (40f) twenties I had a...,1667246573,/r/AmItheAsshole/comments/yin7pf/aita_for_want...,6889,Asshole,Honestly I think YTA. You decided not to be in...,YTA,The OP had a one-night stand and became pregna...,YTA,While the OP may have had good intentions in w...
4,yipi15,AITA for making my roommate replace my garlic ...,"Last week, I came home and my roommate told me...",1667251720,/r/AmItheAsshole/comments/yipi15/aita_for_maki...,2927,,Do I understand this correctly:\n\nShe's norma...,,The OP is upset with their roommate for using ...,YTA,While it is understandable that the OP was ups...
...,...,...,...,...,...,...,...,...,...,...,...,...
13060,1228c6b,AITA for complimenting people?,"So background, i 19f struggle with depression,...",1679797854,/r/AmItheAsshole/comments/1228c6b/aita_for_com...,61,Not the A-hole,\nNTA!! This is the post I needed to read toda...,NTA,,,
13061,122b9tz,AITA for telling my girlfriend that her mom's ...,My girlfriend had a dog for about 6 months bef...,1679805817,/r/AmItheAsshole/comments/122b9tz/aita_for_tel...,26,No A-holes here,I’m going with NAH but you are a little bit of...,NAH,,,
13062,122bek6,AITA for calling my girlfriend a “filet-o-fish?”,My (M24) girlfriend (F24) and I are often invi...,1679806211,/r/AmItheAsshole/comments/122bek6/aita_for_cal...,56,,"Happy meal, everything you need in one package?",,,,
13063,122csfx,AITA for showing my brother how much he’s over...,"My [21m] brother [20m] is overweight, about 20...",1679810416,/r/AmItheAsshole/comments/122csfx/aita_for_sho...,40,Not the A-hole,NTA\n\nPortion sizes matter when wanting to g...,NTA,,,
