<a href="https://colab.research.google.com/github/jlopetegui98/RebornToBeWilde/blob/main/5-Experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Experiments with Mistral7B-Instruct (baseline) and fine tuned model with Oscard Wilde texts**

The idea of the experiments consists in using the Classifier trained for authorship attribution (BertAA_Wilde_vs_Mistral) to test the capacity of the fine tuned model to mimetize Wilde's writting style.

Then, for each model we are going to generate fiction stories from the same starting lines and assign labels to each story and compare the results between the two models. The hypothesis is that for the fine tuned model the capacity of the model to distinguish between Wilde and Mistral will decay.

In [1]:
# uncomment the following lines to run in google colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# install the required packages if working in google colab
!pip install -U simpletransformers
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q trl xformers wandb datasets einops gradio sentencepiece

Collecting simpletransformers
  Downloading simpletransformers-0.70.0-py3-none-any.whl (315 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/315.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━[0m [32m174.1/315.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpl

In [1]:
# import the required packages
import torch
import simpletransformers
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, platform, gradio, warnings
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from huggingface_hub import notebook_login
import json
from tqdm import tqdm
import utils

In [2]:
from utils import *

In [3]:
# data and models paths
# dir_root = '.' # comment this line to run in google colab
dir_root = './drive/MyDrive/RebornToBeWilde' # uncomment this line to run in google colab
dir_data = f'{dir_root}/data'
models_path = f'{dir_root}/models'
clf_path = f'./drive/MyDrive/DL-ENS/models/BertClassifier(BERTAA)_wilde_vs_mistral7B.pt'
list_to_generate_path = f'{dir_data}/story_prompts_for_evaluation.txt'
ft_model = f'{dir_root}/models/Mistral7B_fine_tuned_OscarWilde.pt'

In [4]:
# load classifier (wilde vs mistral7B-Instruct)
clf = torch.load(clf_path)
clf

<simpletransformers.classification.classification_model.ClassificationModel at 0x781f12f00be0>

In [5]:
#base model
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [6]:
# load the lines to start the generation
texts_to_generation = []
with open(list_to_generate_path, 'r+', encoding='utf-8') as fd:
  texts_to_generation = fd.readlines()
texts_to_generation = [text[:-1] for text in texts_to_generation]

**Experiments with baseline model**

In [7]:
model = load_model(model_name)
tokenizer = load_tokenizer(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [10]:
from utils import *

In [11]:
from tqdm import tqdm

In [12]:
import regex as re

In [8]:
def clf_exp_(model, tokenizer, clf, texts):
    """
    Function for the experiments: Given a model, a tokenizer and a classifier, it generates text using the
    lines in the texts list and then predicts the label of the generated text using the classifier
    inputs:
      model: model: model to use
      tokenizer: tokenizer: tokenizer to use
      clf: classifier: classifier to use
      texts: list: list of strings with the lines to generate and predict
    outputs:
        label_predictions: list: list of predictions of the classifier
        generated_texts: list: list of the generated texts
    """

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # pattern to remove the prompt tokens after generation
    patt = r'\[INST]|\[\/INST]|\<s>|\</s>|This are the first lines of a work of fiction. Continue it.'

    generated_texts = [] # list of generated texts
    label_predictions = [] # list of predictions of the classifier
    for input in tqdm(texts):
      tokens = tokenize(tokenizer, input)
      model_inputs = tokens.to(device)
      generated_ids = model.generate(**model_inputs, max_new_tokens=500, do_sample=True)
      decoded = tokenizer.batch_decode(generated_ids)
      decoded = [re.sub(patt, '', x) for x in decoded] # clean the generated texts
      preds, _ = clf.predict(decoded) # predict the label of the generated text with clf
      label_predictions.extend(preds)
      generated_texts.extend(decoded)
      del model_inputs
      del decoded
      del generated_ids
    return label_predictions, generated_texts

In [16]:
author_preds, generated_texts = clf_exp_(model, tokenizer, clf, texts_to_generation)

  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 1/100 [00:11<18:09, 11.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  2%|▏         | 2/100 [00:26<21:48, 13.35s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  3%|▎         | 3/100 [01:01<38:05, 23.56s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  4%|▍         | 4/100 [01:37<45:24, 28.38s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  5%|▌         | 5/100 [02:01<42:15, 26.69s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  6%|▌         | 6/100 [02:36<46:39, 29.78s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  7%|▋         | 7/100 [03:11<48:35, 31.35s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  8%|▊         | 8/100 [03:18<36:11, 23.60s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  9%|▉         | 9/100 [03:54<41:32, 27.39s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 10%|█         | 10/100 [04:10<35:45, 23.84s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 11%|█         | 11/100 [04:17<28:03, 18.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 12%|█▏        | 12/100 [04:39<28:58, 19.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 13%|█▎        | 13/100 [04:47<23:21, 16.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 14%|█▍        | 14/100 [05:07<24:39, 17.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 15%|█▌        | 15/100 [05:29<26:26, 18.66s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 16%|█▌        | 16/100 [06:03<32:38, 23.31s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 17%|█▋        | 17/100 [06:27<32:49, 23.73s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 18%|█▊        | 18/100 [06:53<33:06, 24.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 19%|█▉        | 19/100 [07:13<31:13, 23.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 20%|██        | 20/100 [07:48<35:16, 26.46s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 21%|██        | 21/100 [08:14<34:59, 26.57s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 22%|██▏       | 22/100 [08:32<31:06, 23.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 23%|██▎       | 23/100 [09:08<35:19, 27.52s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 24%|██▍       | 24/100 [09:21<29:18, 23.14s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 25%|██▌       | 25/100 [09:31<23:52, 19.10s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 26%|██▌       | 26/100 [09:49<23:13, 18.83s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 27%|██▋       | 27/100 [10:24<28:44, 23.62s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 28%|██▊       | 28/100 [10:55<31:14, 26.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 29%|██▉       | 29/100 [11:21<30:36, 25.86s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 30%|███       | 30/100 [11:39<27:36, 23.66s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 31%|███       | 31/100 [12:03<27:08, 23.59s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 32%|███▏      | 32/100 [12:28<27:20, 24.12s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 33%|███▎      | 33/100 [12:36<21:32, 19.29s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 34%|███▍      | 34/100 [12:43<17:17, 15.72s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 35%|███▌      | 35/100 [13:19<23:34, 21.76s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 36%|███▌      | 36/100 [13:41<23:04, 21.64s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 37%|███▋      | 37/100 [13:47<18:01, 17.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 38%|███▊      | 38/100 [13:56<14:58, 14.49s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 39%|███▉      | 39/100 [14:09<14:27, 14.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 40%|████      | 40/100 [14:31<16:25, 16.42s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 41%|████      | 41/100 [14:47<16:01, 16.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 42%|████▏     | 42/100 [15:11<18:08, 18.77s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 43%|████▎     | 43/100 [15:34<18:52, 19.87s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 44%|████▍     | 44/100 [15:55<18:53, 20.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 45%|████▌     | 45/100 [16:31<22:56, 25.03s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 46%|████▌     | 46/100 [16:54<22:04, 24.53s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 47%|████▋     | 47/100 [17:16<20:54, 23.68s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 48%|████▊     | 48/100 [17:27<17:08, 19.79s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 49%|████▉     | 49/100 [17:48<17:04, 20.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 50%|█████     | 50/100 [18:00<14:44, 17.69s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 51%|█████     | 51/100 [18:30<17:28, 21.39s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 52%|█████▏    | 52/100 [19:06<20:39, 25.83s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 53%|█████▎    | 53/100 [19:27<19:03, 24.32s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 54%|█████▍    | 54/100 [19:51<18:37, 24.29s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 55%|█████▌    | 55/100 [20:22<19:49, 26.43s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 56%|█████▌    | 56/100 [20:59<21:33, 29.39s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 57%|█████▋    | 57/100 [21:19<19:05, 26.63s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 58%|█████▊    | 58/100 [21:29<15:12, 21.74s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 59%|█████▉    | 59/100 [21:45<13:37, 19.93s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 60%|██████    | 60/100 [22:01<12:36, 18.90s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 61%|██████    | 61/100 [22:20<12:13, 18.81s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 62%|██████▏   | 62/100 [22:43<12:39, 20.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 63%|██████▎   | 63/100 [23:03<12:21, 20.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 64%|██████▍   | 64/100 [23:09<09:30, 15.85s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 65%|██████▌   | 65/100 [23:24<09:05, 15.59s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 66%|██████▌   | 66/100 [23:51<10:43, 18.91s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 67%|██████▋   | 67/100 [24:08<10:13, 18.58s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 68%|██████▊   | 68/100 [24:37<11:27, 21.50s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 69%|██████▉   | 69/100 [25:01<11:29, 22.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 70%|███████   | 70/100 [25:09<08:58, 17.95s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 71%|███████   | 71/100 [25:14<06:50, 14.14s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 72%|███████▏  | 72/100 [25:21<05:33, 11.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 73%|███████▎  | 73/100 [25:40<06:19, 14.07s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 74%|███████▍  | 74/100 [26:01<06:59, 16.15s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 75%|███████▌  | 75/100 [26:20<07:07, 17.10s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 76%|███████▌  | 76/100 [26:41<07:17, 18.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 77%|███████▋  | 77/100 [27:17<09:00, 23.52s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 78%|███████▊  | 78/100 [27:50<09:43, 26.52s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 79%|███████▉  | 79/100 [28:12<08:45, 25.03s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 80%|████████  | 80/100 [28:34<08:02, 24.10s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 81%|████████  | 81/100 [29:10<08:44, 27.60s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 82%|████████▏ | 82/100 [29:34<08:01, 26.72s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 83%|████████▎ | 83/100 [29:44<06:07, 21.63s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 84%|████████▍ | 84/100 [29:58<05:11, 19.47s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 85%|████████▌ | 85/100 [30:29<05:42, 22.81s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 86%|████████▌ | 86/100 [30:43<04:40, 20.05s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 87%|████████▋ | 87/100 [31:13<05:02, 23.24s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 88%|████████▊ | 88/100 [31:42<04:59, 24.94s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 89%|████████▉ | 89/100 [31:53<03:46, 20.63s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 90%|█████████ | 90/100 [32:16<03:32, 21.28s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 91%|█████████ | 91/100 [32:46<03:36, 24.01s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 92%|█████████▏| 92/100 [33:12<03:17, 24.72s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 93%|█████████▎| 93/100 [33:28<02:33, 21.94s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 94%|█████████▍| 94/100 [33:47<02:07, 21.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 95%|█████████▌| 95/100 [34:02<01:36, 19.24s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 96%|█████████▌| 96/100 [34:09<01:02, 15.57s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 97%|█████████▋| 97/100 [34:20<00:43, 14.34s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 98%|█████████▊| 98/100 [34:52<00:39, 19.50s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 99%|█████████▉| 99/100 [34:59<00:15, 15.66s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 100/100 [35:35<00:00, 21.35s/it]


In [17]:
print(f"Predictions:\n Wilde: {author_preds.count(0)/len(author_preds) * 100} % \n Mistral7B: {author_preds.count(1)/len(author_preds) * 100} %")

Predictions:
 Wilde: 0.0 % 
 Mistral7B: 100.0 %


In [9]:
def save_generated_texts_and_labels_(texts, labels, model = 'baseline', data_path = 'data'):
    """
    Save the generated texts and labels in a json file
    inputs:
      texts: list: list of generated texts
      labels: list: list of labels of the generated texts
      model: str: name of the model used to generate the texts
    """
    dict_text_to_author = {'text': [], 'label': []}

    for i in range(len(texts)):
      dict_text_to_author['text'].append(texts[i])
      dict_text_to_author['label'].append(str(labels[i]))

    with open(f"{data_path}/{model}_generated_texts.json", 'w+') as fd:
      json.dump(dict_text_to_author, fd)

In [None]:
# saver the results
import json
save_generated_texts_and_labels_(generated_texts, author_preds, model = 'baseline', data_path = dir_data)

**Experiments with fine-tuned model**

In [10]:
model = load_model(model_name, adapt = True, from_finetuned = True, model_path = ft_model)
tokenizer = load_tokenizer(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
import re

In [14]:
author_preds_ft, generated_texts_ft = clf_exp_(model, tokenizer, clf, texts_to_generation)

  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 1/100 [00:49<1:22:01, 49.71s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  2%|▏         | 2/100 [01:38<1:20:31, 49.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  3%|▎         | 3/100 [02:27<1:19:29, 49.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  4%|▍         | 4/100 [03:16<1:18:29, 49.06s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  5%|▌         | 5/100 [04:05<1:17:46, 49.12s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  6%|▌         | 6/100 [04:55<1:17:02, 49.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  7%|▋         | 7/100 [05:44<1:16:12, 49.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  8%|▊         | 8/100 [06:33<1:15:19, 49.12s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  9%|▉         | 9/100 [07:22<1:14:36, 49.19s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 10%|█         | 10/100 [08:11<1:13:51, 49.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 11%|█         | 11/100 [09:01<1:13:05, 49.28s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 12%|█▏        | 12/100 [09:50<1:12:12, 49.24s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 13%|█▎        | 13/100 [10:39<1:11:23, 49.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 14%|█▍        | 14/100 [11:28<1:10:29, 49.18s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 15%|█▌        | 15/100 [12:18<1:09:48, 49.28s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 16%|█▌        | 16/100 [13:07<1:09:04, 49.34s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 17%|█▋        | 17/100 [13:57<1:08:19, 49.39s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 18%|█▊        | 18/100 [14:46<1:07:36, 49.47s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 19%|█▉        | 19/100 [15:36<1:06:50, 49.51s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 20%|██        | 20/100 [16:26<1:06:02, 49.53s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 21%|██        | 21/100 [17:16<1:05:21, 49.64s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 22%|██▏       | 22/100 [18:05<1:04:31, 49.63s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 23%|██▎       | 23/100 [18:55<1:03:44, 49.67s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 24%|██▍       | 24/100 [19:44<1:02:52, 49.64s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 25%|██▌       | 25/100 [20:34<1:02:05, 49.67s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 26%|██▌       | 26/100 [21:24<1:01:09, 49.59s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 27%|██▋       | 27/100 [22:13<1:00:21, 49.61s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 28%|██▊       | 28/100 [23:03<59:33, 49.63s/it]  Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 29%|██▉       | 29/100 [23:52<58:41, 49.61s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 30%|███       | 30/100 [24:42<57:59, 49.70s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 31%|███       | 31/100 [25:32<57:09, 49.71s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 32%|███▏      | 32/100 [26:22<56:24, 49.77s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 33%|███▎      | 33/100 [27:12<55:37, 49.82s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 34%|███▍      | 34/100 [28:02<54:52, 49.88s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 35%|███▌      | 35/100 [28:52<53:56, 49.79s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 36%|███▌      | 36/100 [29:41<53:08, 49.82s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 37%|███▋      | 37/100 [30:31<52:19, 49.83s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 38%|███▊      | 38/100 [31:21<51:24, 49.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 39%|███▉      | 39/100 [32:11<50:38, 49.80s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 40%|████      | 40/100 [33:01<49:55, 49.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 41%|████      | 41/100 [33:51<49:03, 49.89s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 42%|████▏     | 42/100 [34:41<48:16, 49.94s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 43%|████▎     | 43/100 [35:30<47:20, 49.83s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 44%|████▍     | 44/100 [36:20<46:29, 49.81s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 45%|████▌     | 45/100 [37:10<45:42, 49.85s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 46%|████▌     | 46/100 [38:01<45:01, 50.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 47%|████▋     | 47/100 [38:51<44:13, 50.07s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 48%|████▊     | 48/100 [39:41<43:24, 50.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 49%|████▉     | 49/100 [40:31<42:37, 50.15s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 50%|█████     | 50/100 [41:21<41:46, 50.14s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 51%|█████     | 51/100 [42:11<40:56, 50.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 52%|█████▏    | 52/100 [43:02<40:06, 50.14s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 53%|█████▎    | 53/100 [43:51<39:10, 50.02s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 54%|█████▍    | 54/100 [44:41<38:18, 49.97s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 55%|█████▌    | 55/100 [45:31<37:32, 50.06s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 56%|█████▌    | 56/100 [46:22<36:44, 50.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 57%|█████▋    | 57/100 [47:12<35:52, 50.07s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 58%|█████▊    | 58/100 [48:02<35:05, 50.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 59%|█████▉    | 59/100 [48:52<34:13, 50.08s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 60%|██████    | 60/100 [49:42<33:19, 50.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 61%|██████    | 61/100 [50:32<32:28, 49.97s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 62%|██████▏   | 62/100 [51:22<31:38, 49.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 63%|██████▎   | 63/100 [52:11<30:46, 49.90s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 64%|██████▍   | 64/100 [53:01<29:55, 49.88s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 65%|██████▌   | 65/100 [53:51<29:00, 49.74s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 66%|██████▌   | 66/100 [54:40<28:06, 49.61s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 67%|██████▋   | 67/100 [55:29<27:10, 49.40s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 68%|██████▊   | 68/100 [56:17<26:14, 49.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 69%|██████▉   | 69/100 [57:06<25:21, 49.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 70%|███████   | 70/100 [57:55<24:29, 48.99s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 71%|███████   | 71/100 [58:44<23:39, 48.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 72%|███████▏  | 72/100 [59:33<22:50, 48.95s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 73%|███████▎  | 73/100 [1:00:22<22:05, 49.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 74%|███████▍  | 74/100 [1:01:11<21:16, 49.08s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 75%|███████▌  | 75/100 [1:02:00<20:25, 49.02s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 76%|███████▌  | 76/100 [1:02:50<19:38, 49.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 77%|███████▋  | 77/100 [1:03:38<18:47, 49.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 78%|███████▊  | 78/100 [1:04:28<18:00, 49.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 79%|███████▉  | 79/100 [1:05:17<17:10, 49.08s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 80%|████████  | 80/100 [1:06:06<16:22, 49.12s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 81%|████████  | 81/100 [1:06:55<15:32, 49.06s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 82%|████████▏ | 82/100 [1:07:44<14:43, 49.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 83%|████████▎ | 83/100 [1:08:34<13:58, 49.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 84%|████████▍ | 84/100 [1:09:23<13:08, 49.27s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 85%|████████▌ | 85/100 [1:10:12<12:19, 49.31s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 86%|████████▌ | 86/100 [1:11:02<11:30, 49.32s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 87%|████████▋ | 87/100 [1:11:51<10:41, 49.32s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 88%|████████▊ | 88/100 [1:12:40<09:51, 49.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 89%|████████▉ | 89/100 [1:13:30<09:02, 49.28s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 90%|█████████ | 90/100 [1:14:19<08:11, 49.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 91%|█████████ | 91/100 [1:15:08<07:23, 49.25s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 92%|█████████▏| 92/100 [1:15:57<06:33, 49.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 93%|█████████▎| 93/100 [1:16:47<05:45, 49.35s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 94%|█████████▍| 94/100 [1:17:36<04:55, 49.29s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 95%|█████████▌| 95/100 [1:18:25<04:05, 49.19s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 96%|█████████▌| 96/100 [1:19:14<03:16, 49.14s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 97%|█████████▋| 97/100 [1:20:03<02:27, 49.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 98%|█████████▊| 98/100 [1:20:53<01:38, 49.22s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 99%|█████████▉| 99/100 [1:21:42<00:49, 49.37s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 100/100 [1:22:31<00:00, 49.52s/it]


In [15]:
print(f"Predictions:\n Wilde: {author_preds_ft.count(0)/len(author_preds_ft) * 100} % \n Mistral7B: {author_preds_ft.count(1)/len(author_preds_ft) * 100} %")

Predictions:
 Wilde: 55.00000000000001 % 
 Mistral7B: 45.0 %


In [16]:
# saver the results
save_generated_texts_and_labels_(generated_texts_ft, author_preds_ft, model = 'Mistral7B_fine_tuned', data_path = dir_data)

In [17]:
print("Done")

Done
