# Preparation

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
import subprocess
import threading

#istallazione di ollama
!curl -fsSL https://ollama.com/install.sh | sh

>>> Downloading ollama...
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [3]:
def start_ollama():
    t = threading.Thread(target=lambda: subprocess.run(["ollama", "serve"]),daemon=True)
    t.start()

In [4]:
def pull_model(local_llm):
    !ollama pull local_llm

In [5]:
def start_model(local_llm):
    t2 = threading.Thread(target=lambda: subprocess.run(["ollama", "run", local_llm]),daemon=True)
    t2.start()

In [6]:
%%capture --no-stderr
%pip install -U scikit-learn==1.3 langchain-ai21 langchain_community tiktoken langchainhub langchain

In [7]:
import os

os.environ["LANGCHAIN_API_KEY"] = "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"
os.environ["LANGCHAIN_TRACING_V2"] = "true"
#os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
#os.environ["LANGCHAIN_PROJECT"]="hate-speech-project"

In [8]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm import tqdm
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

In [9]:
# todo #all'incirca 7.000 testi di test

df = pd.read_csv("/content/drive/MyDrive/predictions_hate/HateSpeechDatasetBalanced.csv")

print(len(df))

#label_0 = df[df["Label"]==0] #no hate speech
#label_1 = df[df["Label"]==1] #hate speech
#print(f"Label con valore 0: {len(label_0)}")
#print(f"Label con valore 1: {len(label_1)}")

# Campiona l'1% del DataFrame
sampled_df = df.sample(frac=0.01, random_state=42)

x_test = sampled_df["Content"]
y_test = sampled_df["Label"]

print(len(x_test))
print(len(y_test))

label_0 = sampled_df[sampled_df["Label"]==0] #no hate speech
label_1 = sampled_df[sampled_df["Label"]==1] #hate speech
print(f"Label con valore 0: {len(label_0)}")
print(f"Label con valore 1: {len(label_1)}")

726119
7261
7261
Label con valore 0: 3584
Label con valore 1: 3677


In [10]:
examples = [
    {"input": "We need to keep our culture pure.", "output": "yes"},
    {"input": "Fuck you.", "output": "yes"},
    {"input": "It’s important to respect other people's choices.", "output": "no"},
    {"input": "Some groups of people are bastards", "output": "yes"},
    {"input": "I think the weather is nice today.", "output": "no"},
    {"input": "Go back to where you came from, asshole.", "output": "yes"},
    {"input": "Have a great day!.", "output": "no"},
    {"input": "They're just different, and that's not a bad thing, but they shouldn't try to change us.", "output": "yes"},
]

In [11]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

def create_prompt(shot):
    example_prompt  = ChatPromptTemplate.from_messages(
        [
            ("user", "{input}"),
            ("assistant", "{output}"),
        ]
    )

    few_shot_prompt = FewShotChatMessagePromptTemplate(
        example_prompt=example_prompt ,
        examples=examples[:shot], #:2 two-shot, #4: four-shot #8 eight-shot
    )

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """You are an helpful assistant who has to detect the presence of hate speech.
                Hate speech is speech that attacks a person or group based on attributes such as race, religion, ethnic origin, national origin, sex, disability, sexual orientation, or gender identity.
                You have to answer "yes" if it contains hate speech, or "no" if it doesn't contain hate speech.
                Give only the answer "yes" or "no", NO PREAMBLE, NO EXPLANATIONS."""),
             few_shot_prompt,
            ("user", "{input}"),
        ] if shot>0 else
        [
            ("system", """You are an helpful assistant who has to detect the presence of hate speech.
                Hate speech is speech that attacks a person or group based on attributes such as race, religion, ethnic origin, national origin, sex, disability, sexual orientation, or gender identity.
                You have to answer "yes" if it contains hate speech, or "no" if it doesn't contain hate speech.
                Give only the answer "yes" or "no", NO PREAMBLE, NO EXPLANATIONS."""),
            ("user", "{input}"),
        ]
    )
    return prompt

In [12]:
#import functools
import sys
import io

def hate_speech_detection(llm, shot):
    prompt_final = create_prompt(shot)
    hate_speech_detection = prompt_final | llm
    return hate_speech_detection

In [13]:
def predict(llm,x_test,shot):
    y_pred = []
    chain = hate_speech_detection(llm,shot)
    for x in tqdm(x_test):
        answer = chain.invoke({"input": x})
        #print(answer)
        if "no" in answer.lower(): y_pred.append(0) #no hate speech
        else: y_pred.append(1) #hate speech
    return y_pred

# Testing

In [None]:
import time
import json

models = ["mistral"]
shots = [2,4,8]

def write_file(filename,content):
    with open(filename, 'w') as file:
        json.dump(content, file, indent=4)

for model in models:
    start_ollama()
    pull_model(model)
    start_model(model)
    time.sleep(500)
    llm = Ollama(model=model, temperature=0)
    for shot in shots:
        y_pred = predict(llm,x_test,shot)
        if model=="llama3.1":
            write_file(f"/content/drive/MyDrive/predictions_hate/prediction_hate_speech_llama31_{shot}_shot.json", y_pred)
        else:
            write_file(f"/content/drive/MyDrive/predictions_hate/prediction_hate_speech_{model}_{shot}_shot.json", y_pred)

# Evaluation

In [32]:
import json

def load_json(filename):
    with open(filename, 'r') as file:
        return json.load(file)

# Label 0: no hate speech
# Label 1: hate speech

model = "llama31" #gemma2 #mistral #llama31
shot = 0 #2 #4 #8
y_pred = load_json(f"/content/drive/MyDrive/predictions_hate/prediction_hate_speech_{model}_{shot}_shot.json")

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, labels=[0,1], output_dict=True)

TN, FP, FN, TP = conf_matrix.ravel()

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

Accuracy: 0.6607905247211128
Confusion Matrix:
[[3027  557]
 [1906 1771]]
Classification Report:
{'0': {'precision': 0.613622542063653, 'recall': 0.8445870535714286, 'f1-score': 0.7108136667840789, 'support': 3584.0}, '1': {'precision': 0.7607388316151202, 'recall': 0.4816426434593419, 'f1-score': 0.589841798501249, 'support': 3677.0}, 'accuracy': 0.6607905247211128, 'macro avg': {'precision': 0.6871806868393866, 'recall': 0.6631148485153853, 'f1-score': 0.650327732642664, 'support': 7261.0}, 'weighted avg': {'precision': 0.6881228308228796, 'recall': 0.6607905247211128, 'f1-score': 0.6495530195349445, 'support': 7261.0}}


In [33]:
import json

def load_json(filename):
    with open(filename, 'r') as file:
        return json.load(file)

def write_file(filename,content):
    with open(filename, 'w') as file:
        json.dump(content, file, indent=4)

In [45]:
def compute_result(model, shot, y_test) -> pd.DataFrame:
  y_pred = load_json(f"/content/drive/MyDrive/predictions_hate/prediction_hate_speech_{model}_{shot}_shot.json")

  accuracy = accuracy_score(y_test, y_pred)
  conf_matrix = confusion_matrix(y_test, y_pred)
  class_report = classification_report(y_test, y_pred, labels=[0,1], output_dict=True)

  TN, FP, FN, TP = conf_matrix.ravel()

  df = pd.DataFrame()
  df["model"] = [model]
  df["shots"] = [shot]

  df["precision"] = class_report["1"]["precision"]
  df["recall"] = class_report["1"]["recall"]
  df["f1"] = class_report["1"]["f1-score"]
  df["accuracy"] = class_report["accuracy"]
  df["TP"] = TP
  df["FP"] = FP
  df["TN"] = TN
  df["FN"] = FN

  df["w_precision"] = class_report["weighted avg"]["precision"]
  df["w_recall"] = class_report["weighted avg"]["recall"]
  df["w_f1"] = class_report["weighted avg"]["f1-score"]

  return df

In [48]:
import openpyxl

models = ["llama31","gemma2","mistral"]
shots = [0,2,4,8]

excel_file = "/content/drive/MyDrive/predictions_hate/Hate_speech_test.xlsx"
results = pd.DataFrame()
for model in models:
  for shot in shots:
    new_row = compute_result(model, shot, y_test)
    results = pd.concat([results, new_row], ignore_index=True)
print(results.round(3))

results.round(3).to_excel(excel_file, index=False, engine='openpyxl')

      model  shots  precision  recall     f1  accuracy    TP    FP    TN  \
0   llama31      0      0.761   0.482  0.590     0.661  1771   557  3027   
1   llama31      2      0.723   0.722  0.723     0.719  2656  1018  2566   
2   llama31      4      0.677   0.821  0.742     0.711  3018  1437  2147   
3   llama31      8      0.644   0.883  0.745     0.694  3248  1795  1789   
4    gemma2      0      0.728   0.682  0.704     0.710  2508   937  2647   
5    gemma2      2      0.725   0.710  0.717     0.717  2611   992  2592   
6    gemma2      4      0.718   0.767  0.742     0.729  2820  1108  2476   
7    gemma2      8      0.703   0.787  0.743     0.724  2895  1222  2362   
8   mistral      0      0.657   0.742  0.697     0.673  2728  1425  2159   
9   mistral      2      0.556   0.948  0.701     0.590  3487  2787   797   
10  mistral      4      0.545   0.887  0.675     0.568  3262  2723   861   
11  mistral      8      0.576   0.793  0.667     0.599  2917  2150  1434   

      FN  w