In [2]:
from transformers import TrainerCallback, TrainerState

class CaptureMetricsCallback(TrainerCallback):
    def on_log(self, args, state: TrainerState, control, model, logs=None, **kwargs):
        if logs is not None:
            with open('training_logs.txt', 'a') as f:
                f.write(f"Epoch: {state.epoch}, Step: {state.global_step}, Training Loss: {logs.get('loss', 'N/A')}, Validation Loss: {logs.get('eval_loss', 'N/A')}\n")

#Training with intention or procedures only
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM , Seq2SeqTrainer, Seq2SeqTrainingArguments
from torch.utils.data import Dataset
import json
from sklearn.model_selection import train_test_split
import os 
import numpy as np
import random

def set_seed(seed_value=42):
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    np.random.seed(seed_value)
    random.seed(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')

class T5Dataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels['input_ids'][idx])
        return item

    def __len__(self):
        return len(self.encodings.input_ids)

with open('train_data_v2.json', 'r') as f:
    train_data = json.load(f)

with open('val_data_v2.json', 'r') as f:
    val_data = json.load(f)

train_texts = []  
train_labels = [] 
val_texts = []  
val_labels = []  

for item in train_data: 
    train_texts.append(item['raw_text'])  
    train_labels.append(item['intention']) 

for item in val_data:  
    val_texts.append(item['raw_text'])  
    val_labels.append(item['intention'])  
    
#for item in train_data:
#    input_text = f"Determine the intention behind the text: {item['raw_text']}"
#    train_texts.append(input_text)
#    train_labels.append(item['intention'])

#for item in val_data:
#    input_text = f"Determine the intention behind the text: {item['raw_text']}"
#    val_texts.append(input_text)
#    val_labels.append(item['intention'])

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)
train_labels = tokenizer(train_labels, truncation=True, padding=True)
val_labels = tokenizer(val_labels, truncation=True, padding=True)

train_dataset = T5Dataset(train_encodings, train_labels)
val_dataset = T5Dataset(val_encodings, val_labels)

training_args = Seq2SeqTrainingArguments(
    output_dir='FlanT5LargeIntention (Real World with different prompt)',          
    num_train_epochs=9,              # For Real World Data, Given the small amount of data that i have, most likely will require more epochs
    per_device_train_batch_size=4,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logsfolder',            # directory for storing logs
    load_best_model_at_end=True,     
    logging_steps=10,                # log & save weights each logging_steps
    evaluation_strategy='epoch',     # evaluation strategy to adopt during training
    #save_steps=60,    
    #save_steps=100,
    save_strategy='epoch',
    learning_rate=5e-5,              # learning rate
    report_to=[],                
)

#model = AutoModelForSeq2SeqLM.from_pretrained('t5-large')
#model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')
model = AutoModelForSeq2SeqLM.from_pretrained('Models and Results/Seeded Models/FlanT5LargeIntention/checkpoint-444')


torch.cuda.empty_cache()

trainer = Seq2SeqTrainer(
    model=model,                         
    args=training_args,                  
    train_dataset=train_dataset,         
    eval_dataset=val_dataset,
    callbacks=[CaptureMetricsCallback()],
)

#trainer.train(resume_from_checkpoint=True)
trainer.train()

trainer.save_model('./FlanT5LargeIntentionOnly - (Real World with different prompt)')

***** Running training *****
  Num examples = 30
  Num Epochs = 9
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 72


Epoch,Training Loss,Validation Loss
1,No log,0.832006
2,1.421200,0.814713
3,1.278100,0.782847
4,1.362700,0.743801


***** Running Evaluation *****
  Num examples = 8
  Batch size = 16
Saving model checkpoint to FlanT5LargeIntention (Real World with different prompt)/checkpoint-8
Configuration saved in FlanT5LargeIntention (Real World with different prompt)/checkpoint-8/config.json
Model weights saved in FlanT5LargeIntention (Real World with different prompt)/checkpoint-8/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 8
  Batch size = 16
Saving model checkpoint to FlanT5LargeIntention (Real World with different prompt)/checkpoint-16
Configuration saved in FlanT5LargeIntention (Real World with different prompt)/checkpoint-16/config.json
Model weights saved in FlanT5LargeIntention (Real World with different prompt)/checkpoint-16/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 8
  Batch size = 16
Saving model checkpoint to FlanT5LargeIntention (Real World with different prompt)/checkpoint-24
Configuration saved in FlanT5LargeIntention (Real World with different promp

In [2]:
!pip install rouge-score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24936 sha256=ce857f035cff34495db391885a087e59ff0bf40641fc7ea54594b2006a8d4858
  Stored in directory: /root/.cache/pip/wheels/b0/3f/ac/cc3bc304f50c77ef38d79d8e4e2684313de39af543cb4eb3da
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
[0m

#######################

In [5]:
#Included BLEU
import json
from rouge_score import rouge_scorer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

model = AutoModelForSeq2SeqLM.from_pretrained('FlanT5LargeIntention (Real World with Insturctions)/checkpoint-296')
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
smoother = SmoothingFunction()

with open('FlanT5LargeIntention (Real World with Insturctions)/test_data.json', 'r') as f:
    data = json.load(f)

#raw_texts = [item['raw_text'] for item in data]
raw_texts = [f"Determine the intention behind the text: {item['raw_text']}" for item in data]
inputs = tokenizer(raw_texts, return_tensors='pt', truncation=True, padding=True)

outputs = model.generate(**inputs, max_new_tokens=128)
predictions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

data_with_predictions = []
for item, prediction in zip(data, predictions):
    new_item = item.copy()
    new_item['model_output'] = prediction
    correct_output = f"Intention: {item['intention']}"
    
    scores = scorer.score(correct_output, prediction)
    for key, score in scores.items():
        new_item[key] = score.fmeasure
    
    reference = correct_output.split()
    candidate = prediction.split()
    bleu_score = sentence_bleu([reference], candidate, smoothing_function=smoother.method1)
    new_item['bleu_score'] = bleu_score
    
    data_with_predictions.append(new_item)

with open('Data Output FLAN-T5 Large (Intention Only No Training Performed).json', 'w') as f:
    json.dump(data_with_predictions, f)

loading configuration file FlanT5LargeIntention (Real World with Insturctions)/checkpoint-296/config.json
Model config T5Config {
  "_name_or_path": "FlanT5LargeIntention (Real World with Insturctions)/checkpoint-296",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 2816,
  "d_kv": 64,
  "d_model": 1024,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 24,
  "num_heads": 16,
  "num_layers": 24,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.21.3",
  "use_cache": true,
  "vocab_size": 32128
}

loading weights file FlanT5LargeIntent