# Analyze the OpenAI flagging and clean out any remotely offensive data

OpenAI is super strict with finetuning. We have to remove anything even remotely offensive from the training data. 

In [12]:
import pandas as pd

In [31]:
df = pd.read_pickle('personas_and_tweets.df.pkl')
df['flagged'] = None 


In [36]:
responsejson['results'][0]['category_scores']

{'harassment': 0.04012942841051969,
 'harassment_threatening': 0.000957539514024588,
 'hate': 0.007229441470329221,
 'hate_threatening': 1.0889691002655445e-05,
 'illicit': 3.459916031782155e-05,
 'illicit_violent': 1.1061159714638084e-05,
 'self_harm': 0.0005058990298734508,
 'self_harm_instructions': 0.00022597546832214694,
 'self_harm_intent': 0.00023231015172245714,
 'sexual': 0.0007293448983675677,
 'sexual_minors': 9.028039015031105e-06,
 'violence': 0.016203406374785012,
 'violence_graphic': 8.349627818261147e-06,
 'harassment/threatening': 0.000957539514024588,
 'hate/threatening': 1.0889691002655445e-05,
 'illicit/violent': 1.1061159714638084e-05,
 'self-harm/intent': 0.00023231015172245714,
 'self-harm/instructions': 0.00022597546832214694,
 'self-harm': 0.0005058990298734508,
 'sexual/minors': 9.028039015031105e-06,
 'violence/graphic': 8.349627818261147e-06}

In [38]:
# PARALELIZED
import pandas as pd
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the OpenAI client
openaiclient = OpenAI(api_key="xxx")

# Function to process each row
def process_row(index, row):
    if 'flagged' in df.columns and pd.notnull(row.get('flagged')):
        return None  # Skip processing if already flagged

    response = openaiclient.moderations.create(
        model="omni-moderation-latest",
        input=row['message'],
    )

    flagged = response.results[0].flagged
    result_data = {
        'index': index,
        'flagged': flagged,
        'category_scores': response.model_dump()['results'][0]['category_scores']
    }
    
    return result_data

# Using ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
    futures = [executor.submit(process_row, index, row) for index, row in df.iterrows()]

    for future in as_completed(futures):
        result = future.result()
        if result:
            df.at[result['index'], 'flagged'] = result['flagged']
            for k, v in result['category_scores'].items():
                df.at[result['index'], k] = v

        # Save progress periodically
        if result and result['index'] % 100 == 0:
            df.to_pickle('personas_and_tweets_flagged2.df.pkl')

# Final save
df.to_pickle('personas_and_tweets_flagged2.df.pkl')


In [None]:
# NON-PARALLELIZED

from openai import OpenAI
openaiclient = OpenAI(api_key="xxx")
for index, row in df.iterrows():
    
        # Check if persona already exists to avoid redundant processing
        if 'flagged' in df.columns and pd.notnull(row.get('flagged')):
            continue

        # Get user summary and persona

        response = openaiclient.moderations.create(
            model="omni-moderation-latest",
            input=row['message'],
        )
        print(f'{row['message']}: {response.results[0].flagged}')

        df.at[index, 'flagged'] = response.results[0].flagged

        # Set all the properties
        responsejson = response.model_dump()
        for k,v in responsejson['results'][0]['category_scores'].items():
            df.at[index, k] = v

        df.to_pickle('personas_and_tweets_flagged2.df.pkl')


df.to_pickle('personas_and_tweets_flagged2.df.pkl')



In [14]:
from openai import OpenAI
openaiclient = OpenAI(api_key="xx")

In [9]:

response = openaiclient.moderations.create(
    model="omni-moderation-latest",
    input=df.at[0,'message'],
)
# print(f'{row['message']}: {response.results[0].flagged}')
# df.at[index, 'flagged'] = response.results[0].flagged


In [27]:
responsejson = response.model_dump()

In [29]:
responsejson['results'][0]['category_scores']

{'harassment': 0.04012942841051969,
 'harassment_threatening': 0.000957539514024588,
 'hate': 0.007229441470329221,
 'hate_threatening': 1.0889691002655445e-05,
 'illicit': 3.459916031782155e-05,
 'illicit_violent': 1.1061159714638084e-05,
 'self_harm': 0.0005058990298734508,
 'self_harm_instructions': 0.00022597546832214694,
 'self_harm_intent': 0.00023231015172245714,
 'sexual': 0.0007293448983675677,
 'sexual_minors': 9.028039015031105e-06,
 'violence': 0.016203406374785012,
 'violence_graphic': 8.349627818261147e-06,
 'harassment/threatening': 0.000957539514024588,
 'hate/threatening': 1.0889691002655445e-05,
 'illicit/violent': 1.1061159714638084e-05,
 'self-harm/intent': 0.00023231015172245714,
 'self-harm/instructions': 0.00022597546832214694,
 'self-harm': 0.0005058990298734508,
 'sexual/minors': 9.028039015031105e-06,
 'violence/graphic': 8.349627818261147e-06}

We do the cleaning out elsewhere...