### Sentiment Model Training Script

In [None]:
# Import necessary libraries
# Training model on GPU
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)
import torch
import accelerate

# Check GPU status
print("Using GPU: ", torch.cuda.is_available())  # Should print True
print("GPU Name: ", torch.cuda.get_device_name(0))  # Should show NVIDIA GPU

# Load GoEmotions dataset
dataset = load_dataset("go_emotions")

# Ensure labels are integers, not lists
def preprocess_labels(example):
    if isinstance(example["labels"], list) and len(example["labels"]) > 0:
        label = example["labels"][0]  # Get the first label
        label = min(label, 26)  # Ensure label is in range 0-26
        example["labels"] = int(label)  # Convert to integer
    else:
        example["labels"] = 0  # Default label if missing
    return example

# Apply label preprocessing
dataset = dataset.map(preprocess_labels)

train_labels = [ex["labels"] for ex in dataset["train"]]
val_labels = [ex["labels"] for ex in dataset["validation"]]

print("Unique train labels (after fix):", set(train_labels))
print("Unique validation labels (after fix):", set(val_labels))

# Load the tokenizer for RoBERTa-Base
model_checkpoint = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define number of labels (27 emotions from GoEmotions)
num_labels = 27

# Load RoBERTa-base with classification head
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=num_labels
)

# Training arguments (Optimized for GPU training)
training_args = TrainingArguments(
    output_dir="./my_sentiment_analysis_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,  # Increase batch size for GPU
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,  # Enables Mixed Precision for faster training on GPU
)

%env CUDA_LAUNCH_BLOCKING=1
# Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
)

# Start training
trainer.train()

# Save trained model
trainer.save_model("my_sentiment_analysis_model")
tokenizer.save_pretrained("my_sentiment_analysis_model")


### Sample Test

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the trained three-class model from your local directory
model_name = 'jaynwanze/interview_sentiment_model'  # update if necessary
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define a threshold for uncertainty (e.g., if the score difference between the top two is less than 0.1)
THRESHOLD = 0.1

# Define a simplified mapping for a three-class model:
id2label = {
    0: "negative",
    1: "neutral",
    2: "positive"
}

# Create the sentiment analysis pipeline
sentiment_analyzer = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)

# Sample texts to test
sample_texts = [
    # Positive feedback examples
    "The candidate demonstrated excellent learning agility by quickly mastering necessary skills in graphic design and marketing under a tight deadline. This adaptability directly contributed to the team's success and exceeded typical expectations.",
    "The candidate showed strong flexibility by stepping into the marketing director's role on short notice. Adapting to new responsibilities and learning new skills on the fly indicates a high level of flexibility.",
    "The candidate likely maintained composure given the successful outcome under what would have been a high-pressure situation, indicating good emotional resilience. More specific examples of handling stress would provide clearer insights.",
    # # Neutral/balanced feedback examples
    "The candidate admits to not being the best at learning quickly but shows capability to handle situations that require new knowledge, albeit with external help.",
    "The candidate demonstrated flexibility by adapting to a new challenge and working collaboratively to solve it, though it wasn't an independent effort.",
    "While the candidate does not explicitly describe their emotional state during the situation, their ability to seek help and successfully solve the problem suggests a level of resilience.",
    # Negative feedback examples
    "The candidate admits to a significant struggle with learning new skills and knowledge, relying heavily on others rather than developing personal competence.",
    "Based on the candidate's reliance on others for learning and task completion, it suggests a low level of flexibility in adapting to new roles or responsibilities independently.",
    "There is insufficient information to accurately assess emotional resilience directly; however, the dependency on others might indicate potential challenges in handling stress or pressure independently."
]


# Run sentiment analysis
for text in sample_texts:
    result = sentiment_analyzer(text)
    # Sort results by score in descending order
    sorted_results = sorted(result[0], key=lambda x: x['score'], reverse=True)
    top_result = sorted_results[0]
    second_result = sorted_results[1]



    # Check the score difference between top two predictions
    if top_result['score'] - second_result['score'] < THRESHOLD:
        # If difference is small, flag as 'uncertain' or assign neutral
        predicted_label = "neutral"
        final_score = (top_result['score'] + second_result['score']) / 2  # or any custom logic
    else:
        label_str = top_result['label']
        # Robustly extract the label (if it is numeric or a string)
        if label_str.startswith("LABEL_"):
            predicted_label_id = int(label_str.split('_')[1])
            predicted_label = id2label[predicted_label_id]
        elif label_str.isdigit():
            predicted_label_id = int(label_str)
            predicted_label = id2label[predicted_label_id]
        else:
            # If the label is already final (e.g., "positive")
            predicted_label = label_str
        final_score = top_result['score']

    print(f"Text: {text}")
    print(f"Predicted sentiment: {predicted_label} (score: {final_score:.3f})\n")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Text: The candidate demonstrated excellent learning agility by quickly mastering necessary skills in graphic design and marketing under a tight deadline. This adaptability directly contributed to the team's success and exceeded typical expectations.
Predicted sentiment: positive (score: 0.998)

Text: The candidate showed strong flexibility by stepping into the marketing director's role on short notice. Adapting to new responsibilities and learning new skills on the fly indicates a high level of flexibility.
Predicted sentiment: positive (score: 0.998)

Text: The candidate likely maintained composure given the successful outcome under what would have been a high-pressure situation, indicating good emotional resilience. More specific examples of handling stress would provide clearer insights.
Predicted sentiment: neutral (score: 0.812)

Text: The candidate admits to not being the best at learning quickly but shows capability to handle situations that require new knowledge, albeit with ex

### Fine Tuning

In [None]:
import pandas as pd
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch

# Check GPU availability
print("Using GPU:", torch.cuda.is_available())

# Load and Prepare the CSV Dataset
csv_file = "interview_fine_tuning_dataset.csv"  # Update with your CSV file path
data_df = pd.read_csv(csv_file)

# Convert the pandas DataFrame into a Hugging Face Dataset
dataset = Dataset.from_pandas(data_df)

# Define label mapping for 3-class sentiment analysis
label2id = {
    "negative": 0,
    "neutral": 1,
    "positive": 2,
}
id2label = {v: k for k, v in label2id.items()}

# Preprocess Labels: Map string labels to integers
def preprocess_labels(example):
    if example["label"] in label2id:
        example["label"] = label2id[example["label"]]
    else:
        example["label"] = -1  # You can filter these out later if needed
    return example

dataset = dataset.map(preprocess_labels)
dataset = dataset.filter(lambda example: example["label"] != -1)

print("Unique labels in dataset:", set(dataset["label"]))  # Expect {0, 1, 2}

# Load Tokenizer and Tokenize Data
model_checkpoint = "./my_sentiment_analysis_model"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Load Pre-trained Model for 3-Class Classification
num_labels = 3  # negative, neutral, positive
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
)


# Set Training Arguments
training_args = TrainingArguments(
    output_dir="./domain_finetuned_model2",
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=7,              
    learning_rate=5e-6,             
    weight_decay=0.01,
    logging_steps=10,
    fp16=torch.cuda.is_available(),
)

# Create Trainer and Fine-Tune
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,  # If you have a separate eval split, use that
    tokenizer=tokenizer,
)

# Start fine-tuning
trainer.train()

# Save the Fine-Tuned Model and Tokenizer
trainer.save_model("./domain_finetuned_model")
tokenizer.save_pretrained("./domain_finetuned_model")

print("Fine-tuning complete. Model saved to './domain_finetuned_model'.")


Using GPU: False


Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 809/809 [00:00<00:00, 26646.29 examples/s]
Filter: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 809/809 [00:00<00:00, 146504.55 examples/s]


Unique labels in dataset: {0, 1, 2}


Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 809/809 [00:00<00:00, 21809.11 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./my_sentiment_analysis_model and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([27]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.out_proj.weight: found shape torch.Size([27, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
                                               
  0%|          | 1/714 [00:34<28:32,  2.40s/it] 

{'loss': 1.2752, 'grad_norm': 11.561749458312988, 'learning_rate': 4.929971988795519e-06, 'epoch': 0.1}


                                               
  0%|          | 1/714 [00:58<28:32,  2.40s/it] 

{'loss': 1.0886, 'grad_norm': 6.76071834564209, 'learning_rate': 4.859943977591037e-06, 'epoch': 0.2}


                                               
  0%|          | 1/714 [01:22<28:32,  2.40s/it] 

{'loss': 0.9179, 'grad_norm': 7.423820972442627, 'learning_rate': 4.7899159663865555e-06, 'epoch': 0.29}


                                               
  0%|          | 1/714 [01:48<28:32,  2.40s/it] 

{'loss': 0.796, 'grad_norm': 6.351580619812012, 'learning_rate': 4.719887955182073e-06, 'epoch': 0.39}


                                               
  0%|          | 1/714 [02:09<28:32,  2.40s/it] 

{'loss': 0.6768, 'grad_norm': 5.218016624450684, 'learning_rate': 4.6498599439775914e-06, 'epoch': 0.49}


                                               
  0%|          | 1/714 [02:31<28:32,  2.40s/it] 

{'loss': 0.5702, 'grad_norm': 7.124213695526123, 'learning_rate': 4.57983193277311e-06, 'epoch': 0.59}


                                               
  0%|          | 1/714 [02:52<28:32,  2.40s/it] 

{'loss': 0.4695, 'grad_norm': 12.873468399047852, 'learning_rate': 4.509803921568628e-06, 'epoch': 0.69}


                                               
  0%|          | 1/714 [03:15<28:32,  2.40s/it] 

{'loss': 0.4279, 'grad_norm': 11.075864791870117, 'learning_rate': 4.439775910364146e-06, 'epoch': 0.78}


                                               
  0%|          | 1/714 [03:40<28:32,  2.40s/it] 

{'loss': 0.3226, 'grad_norm': 3.280181646347046, 'learning_rate': 4.369747899159664e-06, 'epoch': 0.88}


                                               
  0%|          | 1/714 [04:08<28:32,  2.40s/it]  

{'loss': 0.2761, 'grad_norm': 12.315227508544922, 'learning_rate': 4.2997198879551825e-06, 'epoch': 0.98}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [05:25<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.16685998439788818, 'eval_runtime': 74.0009, 'eval_samples_per_second': 10.932, 'eval_steps_per_second': 1.378, 'epoch': 1.0}


                                               
  0%|          | 1/714 [05:47<28:32,  2.40s/it]  

{'loss': 0.2716, 'grad_norm': 4.469427108764648, 'learning_rate': 4.229691876750701e-06, 'epoch': 1.08}


                                               
  0%|          | 1/714 [06:15<28:32,  2.40s/it]  

{'loss': 0.1899, 'grad_norm': 19.305538177490234, 'learning_rate': 4.159663865546219e-06, 'epoch': 1.18}


                                               
  0%|          | 1/714 [06:46<28:32,  2.40s/it]  

{'loss': 0.1207, 'grad_norm': 1.8711103200912476, 'learning_rate': 4.089635854341737e-06, 'epoch': 1.27}


                                               
  0%|          | 1/714 [07:16<28:32,  2.40s/it]  

{'loss': 0.0774, 'grad_norm': 1.9173388481140137, 'learning_rate': 4.019607843137255e-06, 'epoch': 1.37}


                                               
  0%|          | 1/714 [07:41<28:32,  2.40s/it]  

{'loss': 0.0614, 'grad_norm': 0.8243932127952576, 'learning_rate': 3.9495798319327735e-06, 'epoch': 1.47}


                                               
  0%|          | 1/714 [08:18<28:32,  2.40s/it]  

{'loss': 0.073, 'grad_norm': 1.0969237089157104, 'learning_rate': 3.879551820728292e-06, 'epoch': 1.57}


                                               
  0%|          | 1/714 [08:45<28:32,  2.40s/it]  

{'loss': 0.0664, 'grad_norm': 43.335533142089844, 'learning_rate': 3.80952380952381e-06, 'epoch': 1.67}


                                               
  0%|          | 1/714 [09:09<28:32,  2.40s/it]  

{'loss': 0.054, 'grad_norm': 49.75318145751953, 'learning_rate': 3.739495798319328e-06, 'epoch': 1.76}


                                               
  0%|          | 1/714 [09:33<28:32,  2.40s/it]  

{'loss': 0.0313, 'grad_norm': 1.274950623512268, 'learning_rate': 3.669467787114846e-06, 'epoch': 1.86}


                                               
  0%|          | 1/714 [09:59<28:32,  2.40s/it]  

{'loss': 0.0338, 'grad_norm': 0.4950462281703949, 'learning_rate': 3.5994397759103645e-06, 'epoch': 1.96}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [11:16<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.010555342771112919, 'eval_runtime': 66.6791, 'eval_samples_per_second': 12.133, 'eval_steps_per_second': 1.53, 'epoch': 2.0}


                                               
  0%|          | 1/714 [11:31<28:32,  2.40s/it]  

{'loss': 0.017, 'grad_norm': 0.26788094639778137, 'learning_rate': 3.529411764705883e-06, 'epoch': 2.06}


                                               
  0%|          | 1/714 [11:58<28:32,  2.40s/it]  

{'loss': 0.0137, 'grad_norm': 0.5735370516777039, 'learning_rate': 3.459383753501401e-06, 'epoch': 2.16}


                                               
  0%|          | 1/714 [12:23<28:32,  2.40s/it]  

{'loss': 0.011, 'grad_norm': 0.1785293072462082, 'learning_rate': 3.3893557422969192e-06, 'epoch': 2.25}


                                               
  0%|          | 1/714 [12:45<28:32,  2.40s/it]  

{'loss': 0.0444, 'grad_norm': 0.15469670295715332, 'learning_rate': 3.319327731092437e-06, 'epoch': 2.35}


                                               
  0%|          | 1/714 [13:08<28:32,  2.40s/it]  

{'loss': 0.0096, 'grad_norm': 0.1888919472694397, 'learning_rate': 3.2492997198879555e-06, 'epoch': 2.45}


                                               
  0%|          | 1/714 [13:33<28:32,  2.40s/it]  

{'loss': 0.0093, 'grad_norm': 4.568709850311279, 'learning_rate': 3.1792717086834735e-06, 'epoch': 2.55}


                                               
  0%|          | 1/714 [14:00<28:32,  2.40s/it]  

{'loss': 0.0291, 'grad_norm': 0.12409527599811554, 'learning_rate': 3.109243697478992e-06, 'epoch': 2.65}


                                               
  0%|          | 1/714 [14:23<28:32,  2.40s/it]  

{'loss': 0.011, 'grad_norm': 0.21006400883197784, 'learning_rate': 3.03921568627451e-06, 'epoch': 2.75}


                                               
  0%|          | 1/714 [14:46<28:32,  2.40s/it]  

{'loss': 0.0069, 'grad_norm': 0.13052129745483398, 'learning_rate': 2.969187675070028e-06, 'epoch': 2.84}


                                               
  0%|          | 1/714 [15:10<28:32,  2.40s/it]  

{'loss': 0.0097, 'grad_norm': 0.11785423010587692, 'learning_rate': 2.8991596638655466e-06, 'epoch': 2.94}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [16:23<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.0038624631706625223, 'eval_runtime': 60.0514, 'eval_samples_per_second': 13.472, 'eval_steps_per_second': 1.699, 'epoch': 3.0}


                                               
  0%|          | 1/714 [16:32<28:32,  2.40s/it]  

{'loss': 0.0064, 'grad_norm': 0.12010066956281662, 'learning_rate': 2.8291316526610645e-06, 'epoch': 3.04}


                                               
  0%|          | 1/714 [16:55<28:32,  2.40s/it]  

{'loss': 0.0062, 'grad_norm': 0.09620436280965805, 'learning_rate': 2.759103641456583e-06, 'epoch': 3.14}


                                               
  0%|          | 1/714 [17:19<28:32,  2.40s/it]  

{'loss': 0.0057, 'grad_norm': 0.0963892862200737, 'learning_rate': 2.689075630252101e-06, 'epoch': 3.24}


                                               
  0%|          | 1/714 [17:42<28:32,  2.40s/it]  

{'loss': 0.0052, 'grad_norm': 0.08186311274766922, 'learning_rate': 2.6190476190476192e-06, 'epoch': 3.33}


                                               
  0%|          | 1/714 [18:06<28:32,  2.40s/it]  

{'loss': 0.0049, 'grad_norm': 0.07950861006975174, 'learning_rate': 2.549019607843137e-06, 'epoch': 3.43}


                                               
  0%|          | 1/714 [18:32<28:32,  2.40s/it]  

{'loss': 0.005, 'grad_norm': 0.07384249567985535, 'learning_rate': 2.4789915966386555e-06, 'epoch': 3.53}


                                               
  0%|          | 1/714 [18:55<28:32,  2.40s/it]  

{'loss': 0.0048, 'grad_norm': 0.1781349927186966, 'learning_rate': 2.408963585434174e-06, 'epoch': 3.63}


                                               
  0%|          | 1/714 [19:16<28:32,  2.40s/it]  

{'loss': 0.0044, 'grad_norm': 0.07146653532981873, 'learning_rate': 2.338935574229692e-06, 'epoch': 3.73}


                                               
  0%|          | 1/714 [19:37<28:32,  2.40s/it]  

{'loss': 0.0044, 'grad_norm': 0.10152450948953629, 'learning_rate': 2.2689075630252102e-06, 'epoch': 3.82}


                                               
  0%|          | 1/714 [20:00<28:32,  2.40s/it]  

{'loss': 0.0042, 'grad_norm': 1.1126689910888672, 'learning_rate': 2.1988795518207286e-06, 'epoch': 3.92}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [21:43<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.00242591486312449, 'eval_runtime': 83.8675, 'eval_samples_per_second': 9.646, 'eval_steps_per_second': 1.216, 'epoch': 4.0}


                                               
  0%|          | 1/714 [21:48<28:32,  2.40s/it]    

{'loss': 0.0046, 'grad_norm': 0.05745465308427811, 'learning_rate': 2.1288515406162466e-06, 'epoch': 4.02}


                                               
  0%|          | 1/714 [22:15<28:32,  2.40s/it]  

{'loss': 0.0042, 'grad_norm': 0.0924827829003334, 'learning_rate': 2.058823529411765e-06, 'epoch': 4.12}


                                               
  0%|          | 1/714 [22:39<28:32,  2.40s/it]  

{'loss': 0.0038, 'grad_norm': 0.09697197377681732, 'learning_rate': 1.988795518207283e-06, 'epoch': 4.22}


                                               
  0%|          | 1/714 [23:07<28:32,  2.40s/it]  

{'loss': 0.005, 'grad_norm': 0.061234015971422195, 'learning_rate': 1.9187675070028013e-06, 'epoch': 4.31}


                                               
  0%|          | 1/714 [23:41<28:32,  2.40s/it]  

{'loss': 0.0043, 'grad_norm': 0.07060720026493073, 'learning_rate': 1.8487394957983196e-06, 'epoch': 4.41}


                                               
  0%|          | 1/714 [24:03<28:32,  2.40s/it]  

{'loss': 0.0035, 'grad_norm': 0.05273345112800598, 'learning_rate': 1.7787114845938378e-06, 'epoch': 4.51}


                                               
  0%|          | 1/714 [24:25<28:32,  2.40s/it]  

{'loss': 0.0041, 'grad_norm': 0.054452117532491684, 'learning_rate': 1.708683473389356e-06, 'epoch': 4.61}


                                               
  0%|          | 1/714 [24:53<28:32,  2.40s/it]  

{'loss': 0.0033, 'grad_norm': 0.07443356513977051, 'learning_rate': 1.6386554621848741e-06, 'epoch': 4.71}


                                               
  0%|          | 1/714 [25:19<28:32,  2.40s/it]  

{'loss': 0.0032, 'grad_norm': 0.06537928432226181, 'learning_rate': 1.5686274509803923e-06, 'epoch': 4.8}


                                               
  0%|          | 1/714 [25:44<28:32,  2.40s/it]  

{'loss': 0.0032, 'grad_norm': 0.05515480414032936, 'learning_rate': 1.4985994397759105e-06, 'epoch': 4.9}


                                               
  0%|          | 1/714 [26:06<28:32,  2.40s/it]  

{'loss': 0.003, 'grad_norm': 0.07485446333885193, 'learning_rate': 1.4285714285714286e-06, 'epoch': 5.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [27:17<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.001939198700711131, 'eval_runtime': 70.9507, 'eval_samples_per_second': 11.402, 'eval_steps_per_second': 1.438, 'epoch': 5.0}


                                               
  0%|          | 1/714 [27:41<28:32,  2.40s/it]  

{'loss': 0.0032, 'grad_norm': 0.07802172750234604, 'learning_rate': 1.3585434173669468e-06, 'epoch': 5.1}


                                               
  0%|          | 1/714 [28:11<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.056034527719020844, 'learning_rate': 1.288515406162465e-06, 'epoch': 5.2}


                                               
  0%|          | 1/714 [28:37<28:32,  2.40s/it]  

{'loss': 0.0031, 'grad_norm': 0.08754955232143402, 'learning_rate': 1.2184873949579833e-06, 'epoch': 5.29}


                                               
  0%|          | 1/714 [29:00<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.04419339448213577, 'learning_rate': 1.1484593837535015e-06, 'epoch': 5.39}


                                               
  0%|          | 1/714 [29:25<28:32,  2.40s/it]  

{'loss': 0.0028, 'grad_norm': 0.04598620533943176, 'learning_rate': 1.0784313725490197e-06, 'epoch': 5.49}


                                               
  0%|          | 1/714 [29:54<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.04886883869767189, 'learning_rate': 1.0084033613445378e-06, 'epoch': 5.59}


                                               
  0%|          | 1/714 [30:20<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.049491122364997864, 'learning_rate': 9.383753501400561e-07, 'epoch': 5.69}


                                               
  0%|          | 1/714 [30:43<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.052363138645887375, 'learning_rate': 8.683473389355742e-07, 'epoch': 5.78}


                                               
  0%|          | 1/714 [31:11<28:32,  2.40s/it]  

{'loss': 0.0027, 'grad_norm': 0.04519492760300636, 'learning_rate': 7.983193277310924e-07, 'epoch': 5.88}


                                               
  0%|          | 1/714 [31:41<28:32,  2.40s/it]  

{'loss': 0.0028, 'grad_norm': 0.04052465781569481, 'learning_rate': 7.282913165266108e-07, 'epoch': 5.98}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [32:54<28:32,  2.40s/it]  
[A
[A

{'eval_loss': 0.0016582279931753874, 'eval_runtime': 69.1742, 'eval_samples_per_second': 11.695, 'eval_steps_per_second': 1.475, 'epoch': 6.0}


                                               
  0%|          | 1/714 [33:17<28:32,  2.40s/it]  

{'loss': 0.0028, 'grad_norm': 0.051314812153577805, 'learning_rate': 6.58263305322129e-07, 'epoch': 6.08}


                                               
  0%|          | 1/714 [33:43<28:32,  2.40s/it]  

{'loss': 0.0026, 'grad_norm': 0.034940071403980255, 'learning_rate': 5.882352941176471e-07, 'epoch': 6.18}


                                               
  0%|          | 1/714 [34:09<28:32,  2.40s/it]  

{'loss': 0.0027, 'grad_norm': 0.040694691240787506, 'learning_rate': 5.182072829131654e-07, 'epoch': 6.27}


                                               
  0%|          | 1/714 [34:38<28:32,  2.40s/it]  

{'loss': 0.0029, 'grad_norm': 0.04908066242933273, 'learning_rate': 4.481792717086835e-07, 'epoch': 6.37}


                                               
  0%|          | 1/714 [35:05<28:32,  2.40s/it]  

{'loss': 0.0027, 'grad_norm': 0.046657949686050415, 'learning_rate': 3.781512605042017e-07, 'epoch': 6.47}


                                               
  0%|          | 1/714 [35:28<28:32,  2.40s/it]  

{'loss': 0.0028, 'grad_norm': 0.04070271924138069, 'learning_rate': 3.081232492997199e-07, 'epoch': 6.57}


                                               
  0%|          | 1/714 [35:56<28:32,  2.40s/it]  

{'loss': 0.0032, 'grad_norm': 0.03689040616154671, 'learning_rate': 2.3809523809523811e-07, 'epoch': 6.67}


                                               
  0%|          | 1/714 [36:25<28:32,  2.40s/it]  

{'loss': 0.0026, 'grad_norm': 0.05785620957612991, 'learning_rate': 1.680672268907563e-07, 'epoch': 6.76}


                                               
  0%|          | 1/714 [36:51<28:32,  2.40s/it]  

{'loss': 0.0026, 'grad_norm': 0.044532354921102524, 'learning_rate': 9.803921568627452e-08, 'epoch': 6.86}


                                               
  0%|          | 1/714 [37:14<28:32,  2.40s/it]  

{'loss': 0.0026, 'grad_norm': 0.0409274585545063, 'learning_rate': 2.8011204481792718e-08, 'epoch': 6.96}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                               
[A                                              

  0%|          | 1/714 [38:44<28:32,  2.40s/it]  
[A
                                               
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 714/714 [38:37<00:00,  3.25s/it]


{'eval_loss': 0.0015791155165061355, 'eval_runtime': 80.2535, 'eval_samples_per_second': 10.081, 'eval_steps_per_second': 1.271, 'epoch': 7.0}
{'train_runtime': 2317.4296, 'train_samples_per_second': 2.444, 'train_steps_per_second': 0.308, 'train_loss': 0.11362725891647678, 'epoch': 7.0}
Fine-tuning complete. Model saved to './domain_finetuned_model'.
