<a href="https://colab.research.google.com/github/jlopetegui98/Literary-Fine-Tuning-of-LLM/blob/main/ClassifierWildeVsMistral/clf_wild_vs_mistral.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Classifier Wilde vs Mistral7B (baseline model)

The idea is to train a classifier with texts from Oscar Wilde and texts generated by Mistral7B. Once the model is trained, it should be able to discriminate correctly between texts from both sources. The hypothesis of our work is that after fine tuning the model, we could be able to cheat the classifier.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Classifier wilde vs mistral


In [None]:
!pip install -U simpletransformers

In [None]:
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from simpletransformers.classification import ClassificationModel
import torch

In [None]:
# data paths
dir_data = f'./drive/MyDrive/DL-ENS/dataset'
authors_paths = [f'{dir_data}/wilde_complete.txt']
mistral_gen_texts = f'{dir_data}/dataset_mistral7B_gen_texts.json'
authors_names = ["Wilde", "Mistral7B"]

In [None]:
# function to read the texts of an specific author
def read_texts(path: str, label, len_to_read =None, max_length = 350):
    text = ''
    with open(path, 'r+') as fd:
      text = fd.read()
      if len_to_read != None:
        text = text[:len_to_read]
    text_splited = text.split()
    dt = {'text': [], 'label': []}
    for i in range(0,len(text_splited),max_length):
      text = ' '.join(text_splited[i:min(i+max_length, len(text_splited))])
      dt['text'].append(text)
      dt['label'].append(label)
    return dt

In [None]:
# get wilde texts
dt = {'text': [], 'label': []}
for i,path in enumerate(authors_paths):
  dt_i = read_texts(path,i,len_wilde_texts)
  dt['text'].extend(dt_i['text'])
  dt['label'].extend(dt_i['label'])

In [None]:
# get mistral generated texts
with open(mistral_gen_texts, 'r+') as fd:
  mistral_texts = json.load(fd)

dt['text'].extend(mistral_texts['texts'])
dt['label'].extend(1)

In [None]:
# convert the dataset to DataFrame
dt = DataFrame.from_dict(dt)
dt.head()

In [None]:
dt_train, dt_test = train_test_split(dt, test_size=0.2, random_state=42, shuffle=True)

In [None]:
dt_train.head()

In [None]:
dt_test.head()

In [None]:
dt_train.hist()

In [None]:
dt_test.hist()

In [None]:
# define model for classifier and initial weights
model_name = 'bert'
model_weights =  'bert-base-cased'

In [None]:
model = ClassificationModel(model_name, model_weights, num_labels=2, weight = [1 - sum(dt_train['label'])/len(dt_train['label']), sum(dt_train['label'])/len(dt_train['label'])], args={'reprocess_input_data': True, 'overwrite_output_dir': True,  'num_train_epochs' : 5}, use_cuda=True)
model.train_model(dt_train)

In [None]:
predictions, raw_out = model.predict(list(dt_test['text']))

In [None]:
print(classification_report(dt_test['label'], predictions, target_names = authors_names))