In [26]:
import asyncio
import json
import openai
import os
import re 



async def call_openai_moderation(message_content):
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY environment variable not set")
    # client = openai.OpenAI()
    client = openai.AsyncClient(api_key=api_key)

    try:
        response = await client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are an AI content moderator specialized in identifying pig butchering scams. Identify if the following message is a pig butchering scam. Give a confidence score between 0 and 1."},
                {"role": "user", "content": message_content}
            ], 
            temperature=0.0,
        )
        # print("response: ", response)
        moderation_result = response.choices[0].message.content
        # parse out the confidence score
        confidence_score = re.findall(r'[0-1]\.[0-9]' ,moderation_result)[0]
        return [float(confidence_score), moderation_result]
    except Exception as e:
        print(f"Error calling OpenAI API: {e}")
        return "Error in moderation"
    
results = []
async def process_dataset():
    THRESHOLD = float(0.5)
    true_positives = 0
    true_negatives = 0
    false_positives = 0
    false_negatives = 0

    # Load the dataset
    with open('../dataset.json', 'r') as f:
        dataset = json.load(f)
    
    counter = 0

    for example in dataset:
        print("Processing example: ", example)
        true_label = example["1"]  # The true/false label
        message_content = list(example.keys())[1]  # The example message
        print("Message content: ", message_content)
        # return 
        
        # Call the OpenAI moderation API
        response = await call_openai_moderation(message_content)
        confidence_score = response[0]
        justification = response[1]

        if confidence_score > THRESHOLD:
            predicted_label = 1
            if true_label == 1:
                true_positives += 1
            else:
                false_positives += 1
        else:
            predicted_label = 0
            if true_label == 0:
                true_negatives += 1
            else:
                false_negatives += 1
        
        if confidence_score:
            results.append({
                "true_label": true_label,
                "predicted_label": predicted_label,
                "confidence_score": confidence_score,
                "message_content": message_content,
                "Chat_justification": justification,
            })
        counter +=1 
        if counter >= 5:
            break 

    # return 
    print("True positives: ", true_positives)
    print("True negatives: ", true_negatives)
    print("False positives: ", false_positives)
    print("False negatives: ", false_negatives)
    print("Accuracy: ", (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives))
    print("Precision: ", true_positives / (true_positives + false_positives))
    print("Recall: ", true_positives / (true_positives + false_negatives))
    print("F1 Score: ", 2 * (true_positives / (true_positives + false_positives)) * (true_positives / (true_positives + false_negatives)) / ((true_positives / (true_positives + false_positives)) + (true_positives / (true_positives + false_negatives))))


# Run the async function
await process_dataset()

# Save the results to a file for later evaluation
with open('moderation_results.json', 'w') as f:
    json.dump(results, f, indent=4)


Processing example:  {'1': 1, 'What i do here is Bitcoin mining and binary options trading, we manage the account and trader on your behalf with our ethical trading software that enables you to earn (10x) your investment capital in 24 hours from the trading section. Here you invest and make a profit, no loss and no internal cheating for you 100% beneficiary': 'itake a line of coke and buy shitcoins', 'train': 'train'}
Message content:  What i do here is Bitcoin mining and binary options trading, we manage the account and trader on your behalf with our ethical trading software that enables you to earn (10x) your investment capital in 24 hours from the trading section. Here you invest and make a profit, no loss and no internal cheating for you 100% beneficiary
Processing example:  {'1': 1, 'What i do here is Bitcoin mining and binary options trading, we manage the account and trader on your behalf with our ethical trading software that enables you to earn (10x) your investment capital in

In [30]:
with open('../cryptods.json' , 'r') as f:
    dataset = json.load(f)
print(len(dataset))


FileNotFoundError: [Errno 2] No such file or directory: '../cryptods.json'