In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
!pip install sentence-similarity
!python -m spacy download it_core_news_sm

In [None]:
!pip install sentence-transformers

# First task, classification
In this section we will load the dataset labeled manually and we will perform the first steps of preprocessing and the first task

In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import re

import colorsys
import string
import torch

import pandas as pd
import numpy as np

import spacy
from spacy.lang.it import Italian

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Schikit learn import
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split


from sklearn.base import TransformerMixin
from spacy.lang.it.stop_words import STOP_WORDS
from spacy.lang.it import Italian
from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import StratifiedKFold

import seaborn as sns
from matplotlib import pyplot as plt
import transformers
import warnings
import numpy as np
from tqdm import tqdm
from keras.models import Sequential
from keras import layers
from keras.backend import clear_session
from sklearn import neural_network

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import json
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from dotenv import load_dotenv

import os

colab = False
if os.getenv("COLAB_RELEASE_TAG"):
    from google.colab import userdata, files
    colab = True
    print("Running in Colab")
    hf_key = userdata.get('hf')
else:
    load_dotenv()
    hf_key = os.getenv("KEY_HF")
    print("NOT in Colab")
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-paper')
df = None
n_iteration = 10

Path of all the folder, custom them with your path

In [None]:
path_df = 'toxic_relationship_dataset/'
path_result = 'Result/'
path_model = 'Models/'

In [None]:
nlp = spacy.load("it_core_news_sm")
stop_words = spacy.lang.it.stop_words.STOP_WORDS
parser = Italian()
punctuations = string.punctuation

# Custom transformer using spaCy
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        # Cleaning Text
        return [clean_text(text) for text in X]

    def fit(self, X, y=None, **fit_params):
        return self

    def get_params(self, deep=True):
        return {}

# Basic function to clean the text
def clean_text(text):
    # Removing spaces and converting text into lowercase
    return text.strip().lower()

# Tokenizer function
def spacy_tokenizer(sentence):
    mytokens = parser(sentence)
    mytokens = [ word.text for word in mytokens ]

    # remove stop words
    mytokens = [ word for word in mytokens if word not in stop_words and word not in punctuations ]
    # return preprocessed list of tokens
    return mytokens

vector = TfidfVectorizer(tokenizer = spacy_tokenizer)

In [None]:
with open(path_df+'toxic_relationship_dataset.csv', 'r', encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        if i == 0:
            columns = line.strip().split(';')
            columns = columns[1:]
            df = pd.DataFrame(columns=columns)
        else:
          line = line.lower()
          row = line.strip().split(';')[1:]
          df.loc[i] = row
df.head()

The dataset has been labeled in such a way as to have for each sentence a type of violence therefore it is not considered at the same time a physical or cyber violence, considered this we go to create a unique feature indicating the type of violence

In [None]:
print(df['Type of physical violence'].value_counts())

In [None]:
print(df['Type of Cyberviolence'].value_counts())

In [None]:
df.insert(1,column='Type of violence', value=df['Type of physical violence'] + df['Type of Cyberviolence'])
df.drop(columns=['Type of physical violence', 'Type of Cyberviolence'], inplace=True)
df.head()

In [None]:
print(df['Type of violence'].value_counts())

During the labeling came out sentences that did not contain an aggressive communication, considered this, we replace the Nan values with "Nessuna" so that we can remove the feature "Aggressive communication (YES/NO)"

In [None]:
print(df['Type of aggressive communication'].value_counts())

In [None]:
df.replace(to_replace="", value="nessuna", inplace=True)
df.drop(columns=['Aggressive communication (YES/NO)'], inplace=True)
df.head()

I divide the two classification tasks to balance the dataset differently, before the data augmentation, the dataset have this distribution, considering the features target: "Type of violence" and "Type of aggressive communication"

In [None]:
df_violence = df[['sentence','Type of violence']]
df_violence['Type of violence'].value_counts().plot(kind="bar")

Before perform manually the oversampling, we try the classification with the unbalance dataset and we evaluate them

In [None]:
classifier_violence = svm.LinearSVC()

pipeline_violence = Pipeline([("cleaner", predictors()),
 ('vectorizer', vector),
  ('classifier', classifier_violence)
  ])

#K-fold
kf = KFold(n_splits=10, random_state=42, shuffle=True)
fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
X = df_violence['sentence'].to_numpy()
y = df_violence['Type of violence'].to_numpy()

for _ in tqdm(range(n_iteration)):
  for train_index, test_index in kf.split(X):
    pipeline_violence.fit(X[train_index], y[train_index])
    predicted = pipeline_violence.predict(X[test_index])

    fmacro = fmacro + metrics.f1_score(y[test_index], predicted, average='macro')
    fmicro = fmicro + metrics.f1_score(y[test_index], predicted, average='micro')
    facc += metrics.accuracy_score(y[test_index], predicted)
    fprecision += metrics.precision_score(y[test_index], predicted, average='macro')
    frecall += metrics.recall_score(y[test_index], predicted, average='macro')

print("\n======================================================================================================")
print("Accuracy:", facc/100)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/100, frecall/100, fmacro/100, fmicro/100))
print("========================================================================================================")

In [None]:
df_comunication = df[['sentence','Type of aggressive communication']]
df_comunication['Type of aggressive communication'].value_counts().plot(kind="bar")

In [None]:
classifier_comunication = svm.LinearSVC()

pipeline_comunication = Pipeline([("cleaner", predictors()),
 ('vectorizer', vector),
  ('classifier', classifier_comunication)
  ])

#K-fold
kf = KFold(n_splits=10, random_state=42, shuffle=True)
fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
X = df['sentence'].to_numpy()
y = df['Type of aggressive communication'].to_numpy()

for _ in tqdm(range(n_iteration)):
  for train_index, test_index in kf.split(X):
    pipeline_comunication.fit(X[train_index], y[train_index])
    predicted = pipeline_comunication.predict(X[test_index])

    fmacro = fmacro + metrics.f1_score(y[test_index], predicted, average='macro')
    fmicro = fmicro + metrics.f1_score(y[test_index], predicted, average='micro')
    facc += metrics.accuracy_score(y[test_index], predicted)
    fprecision += metrics.precision_score(y[test_index], predicted, average='macro')
    frecall += metrics.recall_score(y[test_index], predicted, average='macro')

print("\n======================================================================================================")
print("Accuracy:", facc/100)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/100, frecall/100, fmacro/100, fmicro/100))
print("======================================================================================================")

## First classification
Now we upload the data augmented dataset for the first classification and we perform the classification

In [None]:
# table of metrics
metrics_model = pd.DataFrame(columns=['model','accuracy','precision','recall'])

In [None]:
with open(path_df+'toxic_relationship_violence_dataset.csv', 'r', encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        if i == 0:
            df_violence = pd.DataFrame(columns=['sentence','Type of violence'])
        else:
          line = line.lower()
          row = line.strip().split(';')
          df_violence.loc[i] = row
df_violence.tail()

In [None]:
df_violence['Type of violence'].value_counts().plot(kind="bar")

In [None]:
df_violence['Type of violence'].value_counts()

### First model at low level
Let’s make a first classification to recognize the Type of violence, the dataset is already augmented, so we just perform the classification.

Considering that we haven't a lot of example, we perform the repeated stratified Kfold cross validation. We choose the stratified Kfold for aving the same number of class variable in test

We use two different classifier: a SVM and a Deep Learning and we take the most performing

#### SVM

In [None]:
classifier_violence = svm.LinearSVC(loss='hinge')

pipeline_violence = Pipeline([("cleaner", predictors()),
 ('vectorizer', vector),
  ('classifier', classifier_violence)
  ])


fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
X = df_violence['sentence'].to_numpy()
y = df_violence['Type of violence'].to_numpy()

# all the y predicted
y_pred = []
# all the ground truth of y
gt_y = []
for _ in tqdm(range(n_iteration)):
  #K-fold
  kf = StratifiedKFold(n_splits=10, shuffle=True)
  for train_index, test_index in kf.split(X,y):
    pipeline_violence.fit(X[train_index], y[train_index])
    predicted = pipeline_violence.predict(X[test_index])
    y_pred.extend(predicted)
    gt_y.extend(y[test_index])

    # Compute the evaluation metrics
    fmacro = fmacro + metrics.f1_score(y[test_index], predicted, average='macro')
    fmicro = fmicro + metrics.f1_score(y[test_index], predicted, average='micro')
    facc += metrics.accuracy_score(y[test_index], predicted)
    fprecision += metrics.precision_score(y[test_index], predicted, average='macro')
    frecall += metrics.recall_score(y[test_index], predicted, average='macro')

print("\n================================================================================================================================")
print("Accuracy:", facc/100)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/100, frecall/100, fmacro/100, fmicro/100))
print("=================================================================================================================================")
print(metrics.classification_report(gt_y, y_pred, digits=4, labels=classifier_violence.classes_))
# save mean metrics
metrics_model.loc[len(metrics_model)] = {
    'model': 'SVM',
    'accuracy': facc/100,
    'precision': fprecision/100,
    'recall': frecall/100
}

#### Deep Learning model

In [None]:
X = df_violence['sentence'].to_numpy()
y = df_violence['Type of violence'].to_numpy()

vectorizer_violence = CountVectorizer()

X = vectorizer_violence.fit_transform(X)

# Number of features
input_dim = X.shape[1]
print("Number of features: ",input_dim)

# binarize labels
encoder_violence = LabelBinarizer()
encoder_violence.fit(y)
nc = encoder_violence.classes_.size
print("Number of classes: ",nc)

In [None]:
model_violence = Sequential()
model_violence.add(layers.Dense(512, input_dim=input_dim, activation='relu'))
model_violence.add(layers.Dense(256, activation='relu'))
model_violence.add(layers.Dense(nc, activation='softmax'))

# compile the model
model_violence.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_violence.summary()

In [None]:
clear_session() # Make sure to call clear_session() before you start training the model again

fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
y_gt = []
y_pred = []
#K-fold
kf = StratifiedKFold(n_splits=10, shuffle=True)
for train_index, test_index in kf.split(X,y):
  y_fold = encoder_violence.transform(y)
  X_f, X_test, y_f, y_test = X[train_index], X[test_index], y_fold[train_index], y_fold[test_index]

  X_train, X_val, y_train, y_val = train_test_split(X_f, y_f, test_size=0.1)

  model_violence.fit(X_train.toarray(), y_train, epochs=20, verbose=False, validation_data=(X_val.toarray(), y_val))
  pred = model_violence.predict(X_test.toarray(), verbose=False)

  pred = encoder_violence.inverse_transform(pred)
  y_test = encoder_violence.inverse_transform(y_test)

  y_gt.extend(y_test)
  y_pred.extend(pred)

  # Valutation metrics
  fmacro += metrics.f1_score(y_test, pred, average='macro')
  fmicro += metrics.f1_score(y_test, pred, average='micro')
  facc += metrics.accuracy_score(y_test, pred)
  fprecision += metrics.precision_score(y_test, pred, average='macro')
  frecall += metrics.recall_score(y_test, pred, average='macro')

print("\n========================================================================================================================================")
print("Accuracy:", facc/10)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/10, frecall/10, fmacro/10, fmicro/10))
print("========================================================================================================================================")
print(metrics.classification_report(y_gt, y_pred, digits=4))
# save mean metrics
metrics_model.loc[len(metrics_model)] = {
    'model': 'DL model',
    'accuracy': facc/10,
    'precision': fprecision/10,
    'recall': frecall/10
}

In [None]:
metrics_model.to_csv(path_result + 'metric_violence.csv', index=False)

## Second classification
We take the second increased dataset for the second classification and make the second classification for aggressive communication types

In [None]:
# table of metrics
metrics_model = pd.DataFrame(columns=['model','accuracy','precision','recall'])

In [None]:
with open(path_df+'toxic_relationship_comunication_dataset.csv', 'r', encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        if i == 0:
            df_comunication = pd.DataFrame(columns=['sentence','Type of aggressive communication'])
        else:
          line = line.lower()
          row = line.strip().split(';')
          if row[0].startswith('"'):
            row[0] = row[0][1:-1].replace('""','"')

          df_comunication.loc[i] = row
df_comunication.tail()

In [None]:
df_comunication['Type of aggressive communication'].value_counts().plot(kind="bar")

In [None]:
df_comunication['Type of aggressive communication'].value_counts()

#### MLPClassifier

In [None]:
classifier_comunication = neural_network.MLPClassifier(hidden_layer_sizes=(1024,),verbose=False,max_iter=50)

fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
X = df_comunication['sentence'].to_numpy()
y = df_comunication['Type of aggressive communication'].to_numpy()

# all the y predicted
y_pred = []
# all the ground truth of y
gt_y = []
for _ in tqdm(range(n_iteration)):
  #K-fold
  kf = StratifiedKFold(n_splits=10, shuffle=True)
  for train_index, test_index in kf.split(X,y):
    X_fold = vector.fit_transform(X)

    classifier_comunication.fit(X_fold[train_index], y[train_index])
    predicted = classifier_comunication.predict(X_fold[test_index])

    y_pred.extend(predicted)
    gt_y.extend(y[test_index])

    # Valutation metrics
    fmacro = fmacro + metrics.f1_score(y[test_index], predicted, average='macro')
    fmicro = fmicro + metrics.f1_score(y[test_index], predicted, average='micro')
    facc += metrics.accuracy_score(y[test_index], predicted)
    fprecision += metrics.precision_score(y[test_index], predicted, average='macro')
    frecall += metrics.recall_score(y[test_index], predicted, average='macro')

print("\n==========================================================================================================")
print("Accuracy:", facc/100)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/100, frecall/100, fmacro/100, fmicro/100))
print("==========================================================================================================")
print(metrics.classification_report(gt_y, y_pred, digits=4))
# save mean metrics
metrics_model.loc[len(metrics_model)] = {
    'model': 'MLPClassifier',
    'accuracy': facc/100,
    'precision': fprecision/100,
    'recall': frecall/100
}

#### Deep Learning model

In [None]:
X = df_comunication['sentence'].to_numpy()
y = df_comunication['Type of aggressive communication'].to_numpy()

vectorizer_comunication = CountVectorizer()

X = vectorizer_comunication.fit_transform(X)

# Number of features
input_dim = X.shape[1]
print("Number of features: ",input_dim)

# binarize labels
encoder_comunication = LabelBinarizer()
#y = encoder.fit_transform(y)
encoder_comunication.fit(y)
nc = encoder_comunication.classes_.size
print("Number of classes: ",nc)

In [None]:
model_comunication = Sequential()
model_comunication.add(layers.Dense(512, input_dim=input_dim, activation='relu'))
model_comunication.add(layers.Dense(256, activation='relu'))
model_comunication.add(layers.Dense(nc, activation='softmax'))

# compile the model
model_comunication.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_comunication.summary()

In [None]:
clear_session() # Make sure to call clear_session() before you start training the model again

fmacro = 0
fmicro = 0
facc = 0
fprecision = 0
frecall = 0
gt_y = []
y_pred = []
#K-fold
kf = StratifiedKFold(n_splits=10, shuffle=True)
for train_index, test_index in kf.split(X,y):
  y_fold = encoder_comunication.transform(y)
  X_f, X_test, y_f, y_test = X[train_index], X[test_index], y_fold[train_index], y_fold[test_index]

  X_train, X_val, y_train, y_val = train_test_split(X_f, y_f, test_size=0.1)

  model_comunication.fit(X_train.toarray(), y_train, epochs=20, verbose=False, validation_data=(X_val.toarray(), y_val))
  pred = model_comunication.predict(X_test.toarray(), verbose=False)

  pred = encoder_comunication.inverse_transform(pred)
  y_test = encoder_comunication.inverse_transform(y_test)

  gt_y.extend(y_test)
  y_pred.extend(pred)

  # Valutation metrics
  fmacro += metrics.f1_score(y_test, pred, average='macro')
  fmicro += metrics.f1_score(y_test, pred, average='micro')
  facc += metrics.accuracy_score(y_test, pred)
  fprecision += metrics.precision_score(y_test, pred, average='macro')
  frecall += metrics.recall_score(y_test, pred, average='macro')

print("\n==========================================================================================================")
print("Accuracy:", facc/10)
print("P={0}, R={1}, F1 Macro={2}, F1 Micro={2}".format(fprecision/10, frecall/10, fmacro/10, fmicro/10))
print("==========================================================================================================")
print(metrics.classification_report(gt_y, y_pred, digits=4))
# save mean metrics
metrics_model.loc[len(metrics_model)] = {
    'model': 'DL model',
    'accuracy': facc/10,
    'precision': fprecision/10,
    'recall': frecall/10
}

In [None]:
metrics_model.to_csv(path_result + 'metric_toxic_comunication.csv', index=False)

# Second task, Generation
In this section, we had the two model for the classification of the new sentence and we use them for classify the sentence and perform the generation of explanation

In [None]:
# Change this with the quantized model do you prefer from unsloth
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"

In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_key,
)

In [None]:
df = pd.read_csv(path_df+'toxic_relationship_filtered_dataset.csv')
df.head()

## Generation of explainability using pre trained LLM
Now we use LLaMa 3 for generate the explainable of the sentence using the classification performed with the use of the model already developed

In [None]:
# Sentence to explain
sentences = [
    # Sentences inside the domain
    "Il mio ragazzo mi dice che sono una troia perché pubblico foto in bikini.",
    "Tu non sei niente senza di me.",
    "Il post di una ragazza è stato segnalato e cancellato solo perché ha scritto una frase rivolta agli uomini sessisti.",
    "Chiede chi mi scrive su WhatsApp a quest'ora e perché.",
    # Sentences ouside the domain
    "Sono un buono a nulla, non so fare nulla",
    "Mi ha preso il telefono con la forza e non me lo ha voluto restituire",
    "Mi ha preso con forza e mi ha portata via"
]

In [None]:
EOS_TOKEN = tokenizer.eos_token

prompt_gen = f"""
Below is an instruction that describes a task, paired with examples and input that provide further context.
Write a response that appropriately completes the request.

### Instruction:
Given a sentence, generate an explanation of why the sentence is toxic, considering the type of violence and
the type of aggressive communication.
The response MUST BE IN ITALIAN and in the THIRD PERSON.
You should explain WHY the sentence is toxic.

### Example:
Sentence: "Il mio ragazzo mi dice che sono una troia perché pubblico foto in bikini."
Type of violence: "Violenza verbale"
Type of aggressive communication: "Insulti"
## Response:
Questa frase è tossica perché rappresenta un esempio di violenza verbale che utilizza insulti per denigrare e umiliare l'altra persona, creando un ambiente di aggressività e mancanza di rispetto.

### Example:
Sentence: "Mi ha detto che nessuno mi ama e che sarò sempre solo."
Type of violence: "Violenza psicologica"
Type of aggressive communication: "Manipolazione emotiva"
## Response:
Questa frase è tossica perché rappresenta un esempio di violenza psicologica, poiché cerca di manipolare le emozioni dell'altra persona facendola sentire non amata e isolata.

### Input:
Sentence: "{{}}"
Type of violence: "{{}}"
Type of aggressive communication: "{{}}"
### Response:
{{}}
{EOS_TOKEN}
"""

### Generation without finetuning

In [None]:
first_gen = []

for sentence in tqdm(sentences):

  # Compute the first classification
  X = vectorizer_violence.transform([sentence])
  y_violence = model_violence.predict(X.toarray(), verbose=False)
  y_violence = str(encoder_violence.inverse_transform(y_violence)[0]).lower()

  # Compute the first classification
  X = vectorizer_comunication.transform([sentence])
  y_comunication = model_comunication.predict(X.toarray(), verbose=False)
  y_comunication = str(encoder_comunication.inverse_transform(y_comunication)[0]).lower()

  inputs = tokenizer(
      prompt_gen.format(
          sentence,
          y_violence, # Type of violence
          y_comunication, # Type of communication
          "",
      ),

  return_tensors="pt",
  ).to('cuda')

  generated = model.generate(inputs.input_ids, max_new_tokens=100, temperature=0.2, do_sample=True, top_p=0.3, num_return_sequences=1)
  generated = tokenizer.decode(generated[0], skip_special_tokens=True).strip()
  generated = generated.split('### Response:')[1]
  generated = generated.replace("_"," ")

  first_gen.append(generated)

In [None]:
first_gen[6]

### Data augmentation for fine tuning
Now we perform the data augmentation using the quantizzed version of LLaMa 3 in 4 bit

We use bert for computing the similarity between the generated instances and the instances manually labeled and keep the explaination created with psicologist.

We had a limited our of GPU for fine tuning and for generation of description, so we generate 3000 example

Before augmented the dataset, we use the previous dataset used for classification of violence for oversampling the original dataset.

In [None]:
# Initializing the Sentence Transformer model using BERT with mean-tokens pooling
model_similarity = SentenceTransformer('bert-base-nli-mean-tokens')

In [None]:
with open(path_df+'toxic_relationship_violence_dataset.csv', 'r', encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        if i == 0:
            df_violence = pd.DataFrame(columns=['sentence','Type of violence'])
        else:
          # skip all the data inside the original df
          if i < len(df):
            continue
          line = line.lower()
          row = line.strip().split(';')
          df_violence.loc[i] = row
df_violence.head()

In [None]:
# Encoding the sentences of the original df to obtain their embeddings
sentence_embeddings = model_similarity.encode(df['sentence'].to_numpy())

In [None]:
explaination = df['Description'].to_numpy()
all_violence = df['Type of violence'].to_numpy()
# convert the type of all value in string and lowercase the class
all_violence = np.char.lower(all_violence.astype('str'))

df_oversampled = df.copy()
# keep just the value that aren't in dataframe
for sentence, violence in df_violence[['sentence','Type of violence']].to_numpy():

  # keep the subset of the df contained the example in category
  indexes = np.where(all_violence == violence)[0]
  # keep the most similar description
  sentence_en = model_similarity.encode(sentence)
  # index of the most similar element
  index_ms = np.argmax(cosine_similarity([sentence_en], sentence_embeddings[indexes]))
  # keep the description
  description = explaination[index_ms]

  # Compute the second classification
  X = vectorizer_comunication.transform([sentence])
  y_comunication = model_comunication.predict(X.toarray(), verbose=False)
  y_comunication = str(encoder_comunication.inverse_transform(y_comunication)[0]).lower()

  df_oversampled.loc[len(df_oversampled)] = {
      'sentence': sentence,
      'Type of violence': violence,
      'Type of aggressive communication': y_comunication,
      'Description': description
    }

df_oversampled = df_oversampled.sample(frac=1).reset_index(drop=True)
# We save the oversampled dataset in local
df_oversampled.to_json(f'dataset_oversampled.jsonl', orient='records',lines=True, force_ascii=False)
files.download(f'dataset_oversampled.jsonl')
# we print the tail for verify that the data is oversampled
df_oversampled.tail()

In [None]:
# number of iteration for the data augmented
n_aug = 0
# we set a number of backup for auto download the file and if some problem occure, we don't lost the data
n_backup = 2

In [None]:
prompt = """
Given a sentence, generate a NEW sentence that simulates toxic behavior within a relationship.
The response must be SIMILAR to the input sentence but DIFFERENT. The response must be ONLY the new sentence in Italian
without explanation or additional content. DO NOT REPEAT THE INPUT SENTENCE AND NOT TRANSLATE IN OUTPUT.
Input: {}
"""+EOS_TOKEN

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

sentence_embeddings = model_similarity.encode(df_oversampled['sentence'].to_numpy())
explaination = df_oversampled['Description'].to_numpy()
all_violence = df_oversampled['Type of violence'].to_numpy()
all_violence = np.char.lower(all_violence.astype('str'))

# df that contain all example generated
new_df = df_oversampled.head()

model.generation_config.pad_token_id = tokenizer.pad_token_id

for it in tqdm(range(n_aug)):
  fold_setence = df_oversampled.head(0)
  for _ in range(n_backup):
    for sentence in df_oversampled['sentence'].to_numpy():

      inputs = tokenizer(

          prompt.format(
              #instruction, #Instruction
              sentence, # input
              #"", # output - leave this blank for generation!
          ),

      return_tensors="pt",
      ).to('cuda')

      generated = model.generate(inputs.input_ids, max_length=200, temperature=0.7)
      generated = tokenizer.decode(generated[0], skip_special_tokens=True).strip()
      generated = generated.split("Output:")[1].split('(')[0]
      if 'Note' in generated:
        generated = generated.split("Note:")[0]
      if 'Example' in generated:
        generated = generated.split("Note:")[0]
      generated = generated.replace("\n","").replace('"', "")

      # Compute the first classification
      X = vectorizer_violence.transform([generated])
      y_violence = model_violence.predict(X.toarray(), verbose=False)
      y_violence = str(encoder_violence.inverse_transform(y_violence)[0]).lower()

      # keep the subset of the df contained the example in category
      indexes = np.where(all_violence == y_violence)[0]
      # keep the most similar description
      g_encoded = model_similarity.encode(generated)
      # index of the most similar element
      index_ms = np.argmax(cosine_similarity([g_encoded], sentence_embeddings[indexes]))
      # keep the description
      description = explaination[index_ms]

      # Compute the second classification
      X = vectorizer_comunication.transform([generated])
      y_comunication = model_comunication.predict(X.toarray(), verbose=False)
      y_comunication = str(encoder_comunication.inverse_transform(y_comunication)[0]).lower()

      fold_setence.loc[len(fold_setence)] = {
          'sentence': generated,
          'Type of violence': y_violence,
          'Type of aggressive communication': y_comunication,
          'Description': description
        }

  fold_setence.to_json(path_df + f'dataset_augmented_{it}.jsonl', orient='records',lines=True, force_ascii=False)
  if colab:
      print('Download Locally')
      # If you want to download all the augmented dataset locally un comment the following line
      #files.download(f'dataset_augmented_{it}.jsonl')
  new_df = pd.concat([new_df, fold_setence])

if len(new_df) > 0:
  new_df.to_json(path_df + f'dataset_augmented.jsonl', orient='records',lines=True, force_ascii=False)
  if colab:
      print('Download from Colab') 
      # If you want to download all the augmented dataset locally un comment the following line
      #files.download(path_df + f'dataset_augmented.jsonl')

### First Fine Tuning of LLaMa3
For adapt LLaMa 3 to our task we do the finetuning with two dataset for fine tuning, a balanced dataset created by hands and a dataset created with the previous code, that contain about a thousand artificially created examples.

For the re usage of the previous instanced model, i load another model

In [None]:
model_ft, tokenizer_ft = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_key,
)


model_ft = FastLanguageModel.get_peft_model(
    model_ft,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Prepare the dataset for fine tuning

In [None]:
# Get the 80% of dataset for train and 20% for test
train = df_oversampled.sample(n=int(len(df_oversampled)*0.8))
eval = df_oversampled.drop(train.index)

len(train), len(eval)

Create train dataset

In [None]:
df_train = {
    'text': [],
}
for sentence, violence, comunication, description in train.to_numpy():
  prompt_gen = prompt_gen.replace('\n', '')
  gen = prompt_gen.format(
          sentence,
          violence, # Type of violence
          comunication, # Type of communication
          description
      ),
  df_train['text'] += gen

Create eval dataset

In [None]:
df_eval = {
    'text': [],
}
for sentence, violence, comunication, description in eval.to_numpy():
  prompt_gen = prompt_gen.replace('\n', '')
  gen = prompt_gen.format(
          sentence,
          violence, # Type of violence
          comunication, # Type of communication
          description
      ),
  df_eval['text'] += gen

In [None]:
train = Dataset.from_dict(df_train)
eval = Dataset.from_dict(df_eval)
len(train), len(eval)

In [None]:
trainer = SFTTrainer(
    model = model_ft,
    tokenizer = tokenizer_ft,
    train_dataset = train,
    eval_dataset = eval,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        num_train_epochs= 5,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
trainer.train()

In [None]:
# command for save in local the model
name_model = 'pretrained_model_500_example'
model_ft.save_pretrained(name_model)
tokenizer_ft.save_pretrained(name_model)

In [None]:
# this code is for create a zip file that contain the model
!zip -r /content/pretrained_model_500_example.zip /content/pretrained_model_500_example
# automatically download the file
files.download(f'{name_model}.zip')

In [None]:
af_train = []

for sentence in tqdm(sentences):

  # Compute the first classification
  X = vectorizer_violence.transform([sentence])
  y_violence = model_violence.predict(X.toarray(), verbose=False)
  y_violence = str(encoder_violence.inverse_transform(y_violence)[0]).lower()

  # Compute the first classification
  X = vectorizer_comunication.transform([sentence])
  y_comunication = model_comunication.predict(X.toarray(), verbose=False)
  y_comunication = str(encoder_comunication.inverse_transform(y_comunication)[0]).lower()

  inputs = tokenizer_ft(
      prompt_gen.format(
          sentence,
          y_violence, # Type of violence
          y_comunication, # Type of communication
          ""
      ),

  return_tensors="pt",
  ).to('cuda')

  generated = model.generate(inputs.input_ids, max_new_tokens=100, temperature=0.2, do_sample=True, top_p=0.3, num_return_sequences=1)
  generated = tokenizer.decode(generated[0], skip_special_tokens=True).strip()
  generated = generated.split('### Response:')[1]
  generated = generated.replace("_"," ")
  if 'assistant' in generated:
    generated = generated.split('assistant')[1]

  af_train.append(generated)

In [None]:
result = pd.DataFrame(columns=['sentence', 'LLaMa 3 base','LLaMa 3 with finetuning'])

for index, sentence in enumerate(sentences):
  result.loc[len(result)] = {
      'sentence': sentence,
      'LLaMa 3 base': first_gen[index],
      'LLaMa 3 with finetuning': af_train[index]
      }

file_name = 'generated_sentence_with_LLaMa3_500_example_finetuning'
result.to_csv(path_result + f'{file_name}.csv')
if colab:
    print('Download from Colab')
    # files.download(f'{file_name}.csv')

In [None]:
result

### Second fine tuning
We continue the learning of fine tuned model with the generated dataset

In [None]:
df_augmented = pd.read_json(path_df + 'dataset_augmented_0.jsonl', lines=True)
df_augmented.tail()

In [None]:
train = df_augmented.sample(n=int(len(df_augmented)*0.8))
eval = df_augmented.drop(train.index)

len(train), len(eval)

In [None]:
df_train = {
    'text': [],
}
for sentence, violence, comunication, description in train.to_numpy():
  prompt_gen = prompt_gen.replace('\n', '')
  gen = prompt_gen.format(
          sentence,
          violence, # Type of violence
          comunication, # Type of communication
          description
      ),
  df_train['text'] += gen

In [None]:
df_eval = {
    'text': [],
}
for sentence, violence, comunication, description in eval.to_numpy():
  prompt_gen = prompt_gen.replace('\n', '')
  gen = prompt_gen.format(
          sentence,
          violence, # Type of violence
          comunication, # Type of communication
          description
      ),
  df_eval['text'] += gen

In [None]:
train = Dataset.from_dict(df_train)
eval = Dataset.from_dict(df_eval)
len(train), len(eval)

In [None]:
trainer = SFTTrainer(
    model = model_ft,
    tokenizer = tokenizer_ft,
    train_dataset = train,
    eval_dataset = eval,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        num_train_epochs= 5,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
trainer.train()

In [None]:
# command for save in local the model
name_model = path_model + 'continue_learning_model_1000_example'
model_ft.save_pretrained(name_model)
tokenizer_ft.save_pretrained(name_model)

In [None]:
# IF YOU WORK WITH COLAB AND YOU WANT TO SAVE THE MODEL, UN COMMENT THIS LINES
# this code is for create a zip file that contain the model
#!zip -r /content/continue_learning_model_1000_example.zip /content/continue_learning_model_1000_example
# automatically download the file
#files.download(f'{name_model}.zip')

In [None]:
c_train = []

for sentence in tqdm(sentences):

  # Compute the first classification
  X = vectorizer_violence.transform([sentence])
  y_violence = model_violence.predict(X.toarray(), verbose=False)
  y_violence = str(encoder_violence.inverse_transform(y_violence)[0]).lower()

  # Compute the first classification
  X = vectorizer_comunication.transform([sentence])
  y_comunication = model_comunication.predict(X.toarray(), verbose=False)
  y_comunication = str(encoder_comunication.inverse_transform(y_comunication)[0]).lower()

  inputs = tokenizer_ft(
      prompt_gen.format(
          sentence,
          y_violence, # Type of violence
          y_comunication, # Type of communication
          ""
      ),

  return_tensors="pt",
  ).to('cuda')

  generated = model.generate(inputs.input_ids, max_new_tokens=100, temperature=0.2, do_sample=True, top_p=0.3, num_return_sequences=1)
  generated = tokenizer.decode(generated[0], skip_special_tokens=True).strip()
  generated = generated.split('### Response:')[1]
  generated = generated.replace("_"," ")
  if 'assistant' in generated:
    generated = generated.split('assistant')[1]

  c_train.append(generated)

In [None]:
c_train[0]

In [None]:
result = pd.DataFrame(
    columns=[
        'sentence',
        'LLaMa 3 base',
        'LLaMa 3 with 500 example of finetuning',
        'LLaMa 3 with other 1000 example of finetuning'
    ]
)

for index, sentence in enumerate(sentences):
  result.loc[len(result)] = {
      'sentence': sentence,
      'LLaMa 3 base': first_gen[index],
      'LLaMa 3 with 500 example of finetuning': af_train[index],
      'LLaMa 3 with other 1000 example of finetuning': c_train[index]
      }

file_name = 'generated_sentence_with_LLaMa3_continue_training'
result.to_csv(f'Result/{file_name}.csv')

# If you work with colab uncomment this line for autodownload the result
#files.download(f'{file_name}.csv')