In [1]:
import pandas as pd
import tqdm
import openai
import nltk
import os

from pyprojroot import here
from moral_foundations_llms import utils
from textwrap import dedent

In [2]:
api_path = os.path.join(os.environ['HOME'], 'openai/api.txt')

In [3]:
with open(api_path, 'r') as f:
    openai.api_key = f.read().strip()

In [4]:
df = pd.read_csv(here('data/aita_comments_Aug17_labels_update_Aug31.csv'))

In [5]:
df

Index(['id', 'flair_text_x', 'comment', 'score_x', 'comment_author',
       'comment_time', 'comment_parent_id', 'comment_depth',
       'comment_permalink', 'author_comment_karma', 'author_link_karma',
       'submission_id', 'title', 'selftext', 'created_utc', 'permalink',
       'score_y', 'flair_text_y', 'top_comment', 'true_label', 'gpt_dilemma',
       'gpt_label', 'gpt_reason', 'care_p', 'fairness_p', 'loyalty_p',
       'authority_p', 'sanctity_p', 'care_sent', 'fairness_sent',
       'loyalty_sent', 'authority_sent', 'sanctity_sent',
       'moral_nonmoral_ratio', 'f_var', 'sent_var', 'comment_label'],
      dtype='object')

In [25]:
df = df[df['comment_author'] != 'AutoModerator']

In [29]:
df_nan = df[df['comment_label'].isna()]

In [31]:
instructions = dedent("""
    You are a tool for labeling social media posts.
    
    Please evaluate the following comment on from the subreddit "Am I the Asshole".
    
    Determine whether the following comment is expressing one of the following judgements:
    
    - YTA or "You're the Asshole" is for scenarios where the OP is at fault in their situation.
    - NTA or "Not the Asshole" is for scenarios where the OP is NOT to blame and the other party described in their scenario is to blame.
    - ESH or "Everyone Sucks Here" is for scenarios where both parties are to blame- both people involved in the scenario should be held responsible.
    - NAH or "No Assholes Here" is for scenarios where neither party is to blame. All parties actions are justified. Nobody needs to be held accountable. Shit happens.
    - INFO or "Not Enough Info" is for situations where the OP never clarifies details that would determine the true judgment.
    - NONE when it seems like no specific judgement is being rendered.
    
    You should lean toward assigning "NONE" if it is not clear what the judgement is.
    Usually, commenters will use one of the labels in their post, but sometimes they imply it.
    A comment simply expressing a strong sentiment is not enough to guarantee a YTA assignment, for example.
    Additionally, a comment that implies multiple judgements, usually conditioned on unknown information, should also be assigned NONE.
    
    Please return the label you think should be assigned to the comment.""")

In [43]:
for post in tqdm.tqdm(range(31, 40)):
    try:
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo',
            messages=[
                {"role": "system", "content": instructions},
                {"role": "user", "content": df_nan['comment'].iloc[post]}
            ],
            temperature=0.4)
    except openai.error.APIError as e:
        print(f"Post {post}, OpenAI API returned an API Error: {e}")
        failed.append(post)
        continue
    except openai.error.APIConnectionError as e:
        print(f"Post {post}, OpenAI API request failed to connect: {e}")
        failed.append(post)
        continue
    except openai.error.ServiceUnavailableError as e:
        print(f"Post {post}, OpenAI API returned a Service Unavailable Error: {e}")
        failed.append(post)
        continue

    answer = response['choices'][0]['message']['content']
    print(answer)

        

 11%|███████                                                        | 1/9 [00:00<00:07,  1.06it/s]

YTA


 22%|██████████████                                                 | 2/9 [00:01<00:04,  1.46it/s]

NONE


 33%|█████████████████████                                          | 3/9 [00:01<00:03,  1.65it/s]

NONE


 44%|████████████████████████████                                   | 4/9 [00:02<00:03,  1.37it/s]

YTA


 56%|███████████████████████████████████                            | 5/9 [00:03<00:02,  1.58it/s]

NTA


 67%|██████████████████████████████████████████                     | 6/9 [00:03<00:01,  1.64it/s]

NTA


 78%|█████████████████████████████████████████████████              | 7/9 [00:04<00:01,  1.41it/s]

YTA


 89%|████████████████████████████████████████████████████████       | 8/9 [00:05<00:00,  1.54it/s]

NONE


100%|███████████████████████████████████████████████████████████████| 9/9 [00:05<00:00,  1.56it/s]

NONE





In [46]:
print(df_nan['comment'].iloc[35])

NTA: tell her and her flying monkeys that she abandoned them for 10 years. They don’t know her anymore and don’t want anything to do with her. If that changes you’ll inform her if not, she needs to leave you alone. In the meantime, I suggest you inform the court that she showed up so she can start paying you child support.
