## Save folding

In [None]:
from sklearn.model_selection import StratifiedKFold
import pandas as pd


df = pd.read_csv('../../Tasks/tsnh/TSNH_uniform.csv').dropna(subset=['Class'])
skf = StratifiedKFold(n_splits=5, shuffle=True)

performance = {}
for i, (train_index, test_index) in enumerate(skf.split(df['text'], df['Class'])):
	train_df = df.iloc[train_index]
	test_df = df.iloc[test_index]

	train_df.to_csv(f'../../Tasks/tsnh/dataset_5folding/train_fold_{i}.csv', index=False)
	test_df.to_csv(f'../../Tasks/tsnh/dataset_5folding/test_fold_{i}.csv', index=False)


## Example Selection

In [None]:
import pandas as pd
import json

label_column = 'Class'

context = {}

for i in range(5):
	train_df = pd.read_csv(f'../tsnh/dataset_5folding/train_fold_{i}.csv')
	train_df['length'] = train_df['text'].apply(lambda x: len(x))

	context[i] = ""
	print(f'Fold {i+1}')

	for j in train_df[label_column].unique():
		print(f'Class: {j}')
		sample = train_df[train_df['length'] < 180]
		sample = sample[sample[label_column] == j].sample(5)
		for index, row in sample.iterrows():
			print(f'  {row["text"]}')
			context[i] += f'TEXT: <text_icl_begin> {row["text"]} <text_icl_end>\nLABEL: {j}\n'

#save the context into a json file
for i in range(5):
	context[i] = f"You are an expert in social psychology.Decide whether the **TEXT** is counterspeech or noncounter-speech. Reply with the single label on its own line — no extra words.\n\n### EXAMPLES\n{context[i]}### END EXAMPLES"
with open('icl_promtps/tsnh.json', 'w') as f:
	json.dump(context, f, indent=4)	

Fold 1
Class: noncounter
  Hell yes don't support these Fucking jews
  State your name and innocence. It's shitty but if it's getting to the point they want to put you in cuffs just say your name and do what they say
  It's OK to be white
  God hates gay poeple god says they have no place in heaven with god or with jesus
  @audi940 Spoken like the Losing Muslim you are.  The more reactions I get from you Muslims the more convinced I am you are really very vile and evil people...
Class: counterspeech
  "your not a practicing Jew" how does she know that? he seems dignified more than I can say for this crazy lady.
  your an idiot and no i am not jewish but you need to see the hypocrisy and bigotry in what you say, you cant paint everyone with the same brush.
  I still don't get what they arrested him for.
  If we truly want a Wonderful World, then we must Create Wonderful individuals.
  😂😂😂 what an idiot
Fold 2
Class: counterspeech
  doO naT tAwLK tO MAi kWidZ
  Jeeez what a brain dead ro

### Pompt GPT-4o model and save results for each fold

In [2]:
import json 

with open('../icl_promtps/tsnh.json', 'r') as f:
	data = json.load(f)

In [None]:
import pandas as pd
import openai, json
import pandas as pd
from tqdm import tqdm
import traceback, pickle

import os, sys, dotenv
dotenv.load_dotenv()

client = openai.OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

def get_response( message ): 
  
    response = client.chat.completions.create(
      model="chatgpt-4o-latest",
      # logprobs = True,
      messages=[
        {"role": "system", "content": "You are an expert in social psychology."},
        {"role": "user", "content": message},
      ],
       max_tokens = 10,

    )
    # print(response.choices[0].message.content)
    return response.choices[0].message.content


def get_inference(message, label_set):
    
	pred = "unknown"
	for i in range(6):
		try:
			z = get_response(message)
			if z.strip().lower() in label_set:
				pred = z
				break
		except:
			print(traceback.format_exc())
			pass

	return pred

for i in range(5):

	df = pd.read_csv(f'../../Tasks/tsnh/dataset_5folding/test_fold_{i}.csv')
	df = df.dropna(subset=['text'])
	df['text+template'] = df['text'].apply(lambda x: f"{data[str(i)]}\n\nTEXT: <text_begin> {x} <text_end>\nLABEL:")
	for index, row in tqdm(df.iterrows(), total=len(df)):
		response = get_inference(row['text+template'], label_set=df['Class'].unique())
		df.at[index, 'predicted_label'] = response
	
	df.to_csv(f'../outputs/tsnh/test_fold_{i}.csv', index=False)

In [3]:
import numpy as np
from sklearn.metrics import f1_score
import pandas as pd

f1_scores = []
for i in range(5):
	df = pd.read_csv(f'outputs/tsnh/test_fold_{i}.csv')
	f1_scores += [f1_score(df['Class'], df['predicted_label'], average='macro')]
	
print('Avg', np.mean(f1_scores))
print('Std', np.std(f1_scores))

Avg 0.6415591750866574
Std 0.025543425311282716
