In [1]:
import pandas as pd
import tqdm
import openai
import nltk
import os

from pyprojroot import here
from moral_foundations_llms import utils

In [2]:
api_path = os.path.join(os.environ['HOME'], 'openai/api.txt')

In [3]:
with open(api_path, 'r') as f:
    openai.api_key = f.read().strip()

In [4]:
df = pd.read_csv(here('data/aita_processed.csv'))

In [5]:
failed = []
responses = {}

In [None]:
for post in tqdm.tqdm(range(df.shape[0])):
    n_sentences = len(nltk.sent_tokenize(df.iloc[post]['top_comment']))
    system_message = utils.create_system_message(identity="", length=f"{n_sentences} sentences")
    try:
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo',
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": df['selftext'].iloc[post]}
            ],
            temperature=0.4)
    except openai.error.APIError as e:
        print(f"Post {post}, OpenAI API returned an API Error: {e}")
        failed.append(post)
        continue
    except openai.error.APIConnectionError as e:
        print(f"Post {post}, OpenAI API request failed to connect: {e}")
        failed.append(post)
        continue
    except openai.error.ServiceUnavailableError as e:
        print(f"Post {post}, OpenAI API returned a Service Unavailable Error: {e}")
        failed.append(post)
        continue
        
    answer = response['choices'][0]['message']['content']
    responses[post] = answer
    gpt_dilemma = answer.split('Verdict')[0].replace('Dilemma:', '').strip()
    gpt_label = answer.split('Verdict: ')[1][:3]
    gpt_reason = answer.split('Reasoning:')[-1].strip()
    df.loc[post, 'gpt_dilemma'] = gpt_dilemma
    df.loc[post, 'gpt_label'] = gpt_label
    df.loc[post, 'gpt_reason'] = gpt_reason

  1%|▍                                                      | 93/11355 [17:41<574:41:01, 183.70s/it]

Post 92, OpenAI API returned an API Error: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Wed, 09 Aug 2023 03:35:19 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7f3ce424c8db1739-SJC', 'alt-svc': 'h3=":443"; ma=86400'}


  4%|██▎                                                 | 495/11355 [1:00:12<551:12:57, 182.72s/it]

Post 494, OpenAI API returned an API Error: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Wed, 09 Aug 2023 04:17:50 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7f3d226e0fd71563-SJC', 'alt-svc': 'h3=":443"; ma=86400'}


  6%|███                                                   | 634/11355 [1:11:08<14:27:42,  4.86s/it]

In [8]:
failed

[92, 494]

In [11]:
len(responses)

632

In [12]:
df['gpt_label'].isna().sum()

10723