## Example Selection

In [None]:
import pandas as pd
import json

label_column = 'Class'


train_df = pd.read_csv(f'../../Tasks/hs_cs/train.csv')
train_df['length'] = train_df['text'].apply(lambda x: len(x))
mapping = {0: "hatespeech", 1: "counterspeech", 2: "neither"}


train_df[label_column] = train_df[label_column].apply(lambda x: mapping[x])

context = ""
for j in train_df[label_column].unique():
	print(f'Class: {j}')
	sample = train_df[train_df['length'] < 180]
	sample = sample[sample[label_column] == j].sample(5)
	for index, row in sample.iterrows():
		print(f'  {row["text"]}')
		context += f'<text_icl_begin> CONTEXT: {row["context"]}\nCOMMENT: {row["text"]} <text_icl_end>\nLABEL: {j}\n'

#save the context into a json file
context = f"You are an expert in social psychology.\nAlways read the CONTEXT before the COMMENT and decide whether the comment is  CONTERSPEECH , HATESPEECH or NEITHER. Reply with the single label on its own line — no extra words.\n\n### EXAMPLES\n{context}### END EXAMPLES"
with open('../icl_promtps/hs_cs.json', 'w') as f:
	json.dump([context], f, indent=4)

Class: neither
  > Simply pointing out that women just get shafted in general.  Biased much?
  Find one example of a woman getting punished for consensual sex.  I'll wait.
  The poor mail carrier in that neighborhood doesn't deserve that
  Let's not lump Ohioans in a large ignorant group just because some Ohioans are this way.
  Source please? I'd love to use that in the future.
Class: hatespeech
  The radicals run the feminist movement. Always have.
  YiKeS! ThIs ThReAd iS nOw LoCkEd BeCaUsE y'All CaN't Be CiViL
  I would prefer to continue recognizing fat people as fat and disgusting, thanks.
  For the majority of them, that "different" feeling they have after transitioning is called regret. 
  Thank god for that. Church ain't nothing but evil power hungry thugs
Class: counterspeech
  No, this IS inequality, men have to pass a harder fitness for the same job.
  What is a Rubin sentence?
  But why does she feel the need to broadcast her life and choices on social media?
  It is disgus

### Pompt GPT-4o model and save results for each fold

In [1]:
import json 

with open('../icl_promtps/hs_cs.json', 'r') as f:
	data = json.load(f)[0]

In [None]:
import pandas as pd
import openai, json
import pandas as pd
from tqdm import tqdm
import traceback, pickle

import os, sys, dotenv
dotenv.load_dotenv()

client = openai.OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

def get_response( message ): 
  
    response = client.chat.completions.create(
      model="chatgpt-4o-latest",
      # logprobs = True,
      messages=[
        {"role": "system", "content": "You are an expert in social psychology."},
        {"role": "user", "content": message},
      ],
       max_tokens = 10,

    )
    # print(response.choices[0].message.content)
    return response.choices[0].message.content


def get_inference(message, label_set):
    
	pred = "unknown"
	for i in range(6):
		try:
			z = get_response(message)
			if z.strip().lower() in label_set:
				pred = z
				break
		except:
			print(traceback.format_exc())
			pass

	return pred

df = pd.read_csv(f'../../Tasks/hs_cs/test.csv')
df = df.dropna(subset=['text'])
mapping = {0: "hatespeech", 1: "counterspeech", 2: "neither"}

label_column = 'Class'
df[label_column] = df[label_column].apply(lambda x: mapping[x])

for i in range(4):

	for index, row in tqdm(df.iterrows(), total=len(df)):

		prompt = f"{data}\n\nCONTEXT: {row['context']}\nTEXT:{row['text']}\nLABEL:"
		
		response = get_inference(prompt, label_set=list(mapping.values()))
		df.at[index, 'predicted_label'] = response

	df.to_csv(f'../outputs/hs_cs/test_{i}.csv', index=False)
		

In [None]:
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

f1_scores = []
for i in range(4):
	df = pd.read_csv(f'../outputs/hs_cs/test_{i}.csv')
	f1_scores += [f1_score(df['Class'], df['predicted_label'], average='macro')]
	
print('Avg', np.mean(f1_scores))
print('Std', np.std(f1_scores))

Avg 0.3900911180942862
Std 0.00603082381133632
