In [43]:
import asyncio
import json
import openai
import os
import re 



async def call_openai_moderation(message_content):
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY environment variable not set")
    # client = openai.OpenAI()
    client = openai.AsyncClient(api_key=api_key)

    try:
        response = await client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are an AI content moderator specialized in identifying pig butchering scams. Identify if the following message is a pig butchering scam. Give a confidence score between 0 and 1."},
                {"role": "user", "content": message_content}
            ], 
            temperature=0.0,
        )
        # print("response: ", response)
        moderation_result = response.choices[0].message.content
        # parse out the confidence score
        confidence_score = re.findall(r'[0-1]\.[0-9]' ,moderation_result)[0]
        return [float(confidence_score), moderation_result]
    except Exception as e:
        print(f"Error calling OpenAI API: {e}")
        return False 
    
results = []
async def process_dataset():
    THRESHOLD = float(0.7)
    true_positives = 0
    true_negatives = 0
    false_positives = 0
    false_negatives = 0

    # Load the dataset
    with open('../cryptods.json', 'r') as f:
        dataset = json.load(f)

    for example in dataset:
        print("Processing example: ", example)
        true_label = example["TRUELABEL"]  # The true/false label
        message_content = example["MESSAGE"] # The example message
        print("Message content: ", message_content)
        # return 
        
        # Call the OpenAI moderation API
        
        response = await call_openai_moderation(message_content)
        if response:
            confidence_score = response[0]
            justification = response[1]
            
            print("Confidence score: ", confidence_score)
            print("true_label: ", true_label)

            if confidence_score > THRESHOLD:
                predicted_label = 1
                if true_label == 1:
                    true_positives += 1
                else:
                    false_positives += 1
            else:
                predicted_label = 0
                if true_label == 0:
                    true_negatives += 1
                else:
                    false_negatives += 1
            
            if confidence_score:
                results.append({
                    "true_label": true_label,
                    "predicted_label": predicted_label,
                    "confidence_score": confidence_score,
                    "message_content": message_content,
                    "Chat_justification": justification,
                })

    # return 
    print("True positives: ", true_positives)
    print("True negatives: ", true_negatives)
    print("False positives: ", false_positives)
    print("False negatives: ", false_negatives)
    print("Accuracy: ", (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives))
    print("Precision: ", true_positives / (true_positives + false_positives))
    print("Recall: ", true_positives / (true_positives + false_negatives))
    print("F1 Score: ", 2 * (true_positives / (true_positives + false_positives)) * (true_positives / (true_positives + false_negatives)) / ((true_positives / (true_positives + false_positives)) + (true_positives / (true_positives + false_negatives))))


# Run the async function
await process_dataset()

# Save the results to a file for later evaluation
with open('moderation_results.json', 'w') as f:
    json.dump(results, f, indent=4)


Processing example:  {'TRUELABEL': 1, 'MESSAGE': 'What i do here is Bitcoin mining and binary options trading, we manage the account and trader on your behalf with our ethical trading software that enables you to earn (10x) your investment capital in 24 hours from the trading section. Here you invest and make a profit, no loss and no internal cheating for you 100% beneficiary', 'TRAINTEST': 'train'}
Message content:  What i do here is Bitcoin mining and binary options trading, we manage the account and trader on your behalf with our ethical trading software that enables you to earn (10x) your investment capital in 24 hours from the trading section. Here you invest and make a profit, no loss and no internal cheating for you 100% beneficiary
Confidence score:  0.9
true_label:  1
Processing example:  {'TRUELABEL': 1, 'MESSAGE': 'itake a line of coke and buy shitcoins', 'TRAINTEST': 'train'}
Message content:  itake a line of coke and buy shitcoins
Confidence score:  0.1
true_label:  1
Proc

In [40]:
Total = 196 examples
threshold = 0.7
True positives:  47
True negatives:  86
False positives:  7
False negatives:  51
Accuracy:  0.6963350785340314
Precision:  0.8703703703703703
Recall:  0.47959183673469385
F1 Score:  0.6184210526315789

Total = 196 examples
threshold = 0.6
True positives:  57
True negatives:  88
False positives:  6
False negatives:  42
Accuracy:  0.7512953367875648
Precision:  0.9047619047619048
Recall:  0.5757575757575758
F1 Score:  0.7037037037037038


threshold = 0.5
True positives:  63
True negatives:  89
False positives:  8
False negatives:  36
Accuracy:  0.7755102040816326
Precision:  0.8873239436619719
Recall:  0.6363636363636364
F1 Score:  0.7411764705882353



threshold = 0.4
True positives:  57
True negatives:  90
False positives:  6
False negatives:  41
Accuracy:  0.7577319587628866
Precision:  0.9047619047619048
Recall:  0.5816326530612245
F1 Score:  0.7080745341614907

SyntaxError: invalid syntax (1688410041.py, line 2)

[2024-06-03 14:37:54] [ERROR   ] discord.client: Attempting a reconnect in 0.30s
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/site-packages/discord/client.py", line 659, in connect
    await self.ws.poll_event()
  File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/site-packages/discord/gateway.py", line 646, in poll_event
    raise ConnectionClosed(self.socket, shard_id=self.shard_id, code=code) from None
discord.errors.ConnectionClosed: Shard ID None WebSocket closed with 1000
[2024-06-03 14:37:54] [ERROR   ] discord.client: Attempting a reconnect in 0.30s
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/site-packages/discord/client.py", line 659, in connect
    await self.ws.poll_event()
  File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/site-packages/discord/gateway.py", line 646, in poll_event
    raise ConnectionClosed(self.socket, shard_id=self.shard_id, code=code) 

Group 21 Bot has connected to Discord! It is these guilds:
 - Trust and Safety - Spring 2024
Press Ctrl-C to quit.
