In [1]:
import os
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Check if CUDA is available
print("CUDA is available" if torch.cuda.is_available() else "CUDA is not available")

# Load your dataset from Excel
data = pd.read_csv(r'C:\Users\admin\Desktop\SQ\try.csv')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)
print(num_labels)
print(data['target'].value_counts())

  from .autonotebook import tqdm as notebook_tqdm


CUDA is not available
8
target
3    7
1    7
0    4
5    4
4    4
6    4
7    4
2    4
Name: count, dtype: int64


In [2]:
# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['target'], random_state=42)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_data.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_data.reset_index(drop=True))

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/roberta-large-zeroshot-v2.0-c")

# Tokenize the texts
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Rename the target column to labels
train_dataset = train_dataset.rename_column("target", "labels")
test_dataset = test_dataset.rename_column("target", "labels")

# Set format for PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
print(train_data)
print(train_dataset)

Map: 100%|██████████| 30/30 [00:00<00:00, 5841.11 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2575.37 examples/s]

                                                 text  target
5   xyz ltd declares bankruptcy amidst financial t...       0
3         def inc. secures $30 million loan from bank       1
9                 abc and xyz form strategic alliance       4
30            abc corp reports record earnings for q1       7
34        abc corp issues new shares to raise capital       3
36              def ltd announces debt financing deal       1
25           xyz ltd introduces next-gen tech product       5
14            xyz corp. announces new esg initiatives       2
35     company xyz completes successful funding round       3
13              xyz achieves record quarterly revenue       7
10                major agency downgrades company xyz       6
24           tech company def launches new smartphone       5
16                     new share issuance by def corp       3
20       company xyz signs $40 million loan agreement       1
22             xyz ltd faces bankruptcy due to losses       0
17  comp




In [None]:
# Load the model
model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/roberta-large-zeroshot-v2.0-c", num_labels=num_labels, ignore_mismatched_sizes=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./saved_model',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    report_to=[]
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(f"Evaluation results: {results}")

# Save the model
trainer.save_model('./saved_model')

# Optionally, save the tokenizer as well
tokenizer.save_pretrained('./saved_model')

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load the saved model and tokenizer
model_path = './saved_model'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)


# Function to classify new titles with confidence scores
def classify_titles(titles):
    results = []
    for title in titles:
        # Tokenize the title
        inputs = tokenizer(title, return_tensors="pt", truncation=True, padding=True)
        
        # Get model predictions
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get the predicted class and confidence score
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]
        
        # Get the corresponding category
        predicted_category = label_encoder.inverse_transform([predicted_class_id])[0]
        
        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Title: '{title}'")
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        
        results.append((title, predicted_category, confidence_score))
    
    return results

# Example titles to classify
titles_to_classify = ["Volkswagen delays ID.7 EV launch in US, Canada"]
classified_titles = classify_titles(titles_to_classify)

# Print the results
for title, category, confidence in classified_titles:
    print(f"Title: '{title}' is classified as '{category}'" )



In [None]:
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from transformers import EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import numpy as np
import os

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Load your dataset from Excel
data = pd.read_excel(r'C:\Users\admin\Desktop\traini\newsdata.xlsx')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Remove classes with fewer than 2 samples
min_samples = 2
value_counts = data['target'].value_counts()
data = data[data['target'].isin(value_counts[value_counts >= min_samples].index)]

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)

print(f"Number of classes after filtering: {num_labels}")
print(data['target'].value_counts())

# Tokenizer
model_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize function
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5)

# Cross-validation training and evaluation
accuracies = []
precisions = []
recalls = []
f1_scores = []

for fold, (train_index, val_index) in enumerate(kf.split(data, data['target'])):
    print(f"Training fold {fold + 1}")
    
    train_data = data.iloc[train_index].reset_index(drop=True)
    val_data = data.iloc[val_index].reset_index(drop=True)

    # Convert to Hugging Face Dataset
    train_dataset = Dataset.from_pandas(train_data)
    val_dataset = Dataset.from_pandas(val_data)
    
    # Tokenize the datasets
    train_dataset = train_dataset.map(tokenize, batched=True)
    val_dataset = val_dataset.map(tokenize, batched=True)

    # Rename the target column to labels
    train_dataset = train_dataset.rename_column("target", "labels")
    val_dataset = val_dataset.rename_column("target", "labels")

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Load the model
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True)

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results_fold_{fold}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=10,
        weight_decay=0.01,
        use_cpu=not torch.cuda.is_available(),
        report_to=[],
        save_total_limit=2,
        load_best_model_at_end=True,
    )

    # Create a Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    results = trainer.evaluate()
    print(f"Evaluation results for fold {fold + 1}: {results}")

    # Save the model and tokenizer
    model.save_pretrained(f'./results_fold_{fold}')
    tokenizer.save_pretrained(f'./results_fold_{fold}')
    
    # Calculate predictions and true labels
    predictions = np.argmax(trainer.predict(val_dataset).predictions, axis=1)
    true_labels = val_dataset['labels'].numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
    
    # Append metrics for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

# Calculate average metrics across all folds
average_accuracy = np.mean(accuracies)
average_precision = np.mean(precisions)
average_recall = np.mean(recalls)
average_f1_score = np.mean(f1_scores)

print(f"Average accuracy: {average_accuracy:.4f}")
print(f"Average precision: {average_precision:.4f}")
print(f"Average recall: {average_recall:.4f}")
print(f"Average F1-score: {average_f1_score:.4f}")

# Determine the best model based on average F1-score (or another metric)
best_fold_index = np.argmax(f1_scores)
print(f"The best model is from fold {best_fold_index + 1}")

# Load the best model for final evaluation on the test set
model_path = f'./results_fold_{best_fold_index}'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Function to classify new titles and calculate accuracy on the test set
def classify_and_evaluate(test_dataset):
    all_predictions = []
    all_labels = []

    for i in range(len(test_dataset)):
        inputs = {
            'input_ids': test_dataset[i]['input_ids'].unsqueeze(0),
            'attention_mask': test_dataset[i]['attention_mask'].unsqueeze(0)
        }
        labels = test_dataset[i]['labels'].unsqueeze(0)

        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]

        # Append predictions and true labels
        all_predictions.append(predicted_class_id)
        all_labels.append(labels.numpy()[0])

        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Sample {i}")
        print(f"Logits: {logits}")
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        print(f"True label: {labels.numpy()[0]}")
        print()

    # Calculate accuracy
    accuracy = np.mean(np.array(all_predictions) == np.array(all_labels))
    return accuracy

# Convert test data to Hugging Face Dataset and tokenize
test_data = data.iloc[val_index].reset_index(drop=True)
test_dataset = Dataset.from_pandas(test_data)
test_dataset = test_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.rename_column("target", "labels")
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Evaluate the model on the test dataset
accuracy = classify_and_evaluate(test_dataset)
print(f"Accuracy on the test dataset: {accuracy:.4f}")


In [3]:
import optuna
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import numpy as np
import os

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Load your dataset from Excel
data = pd.read_excel(r'C:\Users\admin\Desktop\traini\newsdata.xlsx')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Remove classes with fewer than 2 samples
min_samples = 2
value_counts = data['target'].value_counts()
data = data[data['target'].isin(value_counts[value_counts >= min_samples].index)]

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)

print(f"Number of classes after filtering: {num_labels}")
print(data['target'].value_counts())

# Tokenizer
model_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize function
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5)

# Objective function for Optuna
def objective(trial):
    accuracies = []
    
    for fold, (train_index, val_index) in enumerate(kf.split(data, data['target'])):
        print(f"Training fold {fold + 1}")
        
        train_data = data.iloc[train_index].reset_index(drop=True)
        val_data = data.iloc[val_index].reset_index(drop=True)

        # Convert to Hugging Face Dataset
        train_dataset = Dataset.from_pandas(train_data)
        val_dataset = Dataset.from_pandas(val_data)
        
        # Tokenize the datasets
        train_dataset = train_dataset.map(tokenize, batched=True)
        val_dataset = val_dataset.map(tokenize, batched=True)

        # Rename the target column to labels
        train_dataset = train_dataset.rename_column("target", "labels")
        val_dataset = val_dataset.rename_column("target", "labels")

        # Set format for PyTorch
        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
        val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

        # Load the model
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True)

        # Define hyperparameters to tune
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)
        num_train_epochs = trial.suggest_int('num_train_epochs', 2, 10)
        per_device_train_batch_size = trial.suggest_int('per_device_train_batch_size', 4, 16)

        # Define training arguments
        training_args = TrainingArguments(
            output_dir=f'./results_fold_{fold}',
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=learning_rate,
            per_device_train_batch_size=per_device_train_batch_size,
            per_device_eval_batch_size=8,
            num_train_epochs=num_train_epochs,
            weight_decay=0.01,
            use_cpu=not torch.cuda.is_available(),
            report_to=[],
            save_total_limit=2,
            load_best_model_at_end=True,
        )

        # Create a Trainer instance
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
        )

        # Train the model
        trainer.train()

        # Evaluate the model
        predictions = np.argmax(trainer.predict(val_dataset).predictions, axis=1)
        true_labels = val_dataset['labels'].numpy()
        accuracy = accuracy_score(true_labels, predictions)
        accuracies.append(accuracy)

    average_accuracy = np.mean(accuracies)
    return average_accuracy

# Create a study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Print best hyperparameters
print(f"Best hyperparameters: {study.best_params}")

# Train the final model with the best hyperparameters
best_params = study.best_params

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5)

accuracies = []
precisions = []
recalls = []
f1_scores = []

for fold, (train_index, val_index) in enumerate(kf.split(data, data['target'])):
    print(f"Training fold {fold + 1}")
    
    train_data = data.iloc[train_index].reset_index(drop=True)
    val_data = data.iloc[val_index].reset_index(drop=True)

    # Convert to Hugging Face Dataset
    train_dataset = Dataset.from_pandas(train_data)
    val_dataset = Dataset.from_pandas(val_data)
    
    # Tokenize the datasets
    train_dataset = train_dataset.map(tokenize, batched=True)
    val_dataset = val_dataset.map(tokenize, batched=True)

    # Rename the target column to labels
    train_dataset = train_dataset.rename_column("target", "labels")
    val_dataset = val_dataset.rename_column("target", "labels")

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Load the model
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True)

    # Define training arguments with the best hyperparameters
    training_args = TrainingArguments(
        output_dir=f'./results_fold_{fold}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=best_params['learning_rate'],
        per_device_train_batch_size=best_params['per_device_train_batch_size'],
        per_device_eval_batch_size=8,
        num_train_epochs=best_params['num_train_epochs'],
        weight_decay=0.01,
        use_cpu=not torch.cuda.is_available(),
        report_to=[],
        save_total_limit=2,
        load_best_model_at_end=True,
    )

    # Create a Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    results = trainer.evaluate()
    print(f"Evaluation results for fold {fold + 1}: {results}")

    # Save the model and tokenizer
    model.save_pretrained(f'./results_fold_{fold}')
    tokenizer.save_pretrained(f'./results_fold_{fold}')
    
    # Calculate predictions and true labels
    predictions = np.argmax(trainer.predict(val_dataset).predictions, axis=1)
    true_labels = val_dataset['labels'].numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
    
    # Append metrics for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

# Calculate average metrics across all folds
average_accuracy = np.mean(accuracies)
average_precision = np.mean(precisions)
average_recall = np.mean(recalls)
average_f1_score = np.mean(f1_scores)

print(f"Average accuracy: {average_accuracy:.4f}")
print(f"Average precision: {average_precision:.4f}")
print(f"Average recall: {average_recall:.4f}")
print(f"Average F1-score: {average_f1_score:.4f}")

# Determine the best model based on average F1-score (or another metric)
best_fold_index = np.argmax(f1_scores)
print(f"The best model is from fold {best_fold_index + 1}")

# Load the best model for final evaluation on the test set
model_path = f'./results_fold_{best_fold_index}'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Function to classify new titles and calculate accuracy on the test set
def classify_and_evaluate(test_dataset):
    all_predictions = []
    all_labels = []

    for i in range(len(test_dataset)):
        inputs = {
            'input_ids': test_dataset[i]['input_ids'].unsqueeze(0),
            'attention_mask': test_dataset[i]['attention_mask'].unsqueeze(0)
        }
        labels = test_dataset[i]['labels'].unsqueeze(0)

        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]

        # Append predictions and true labels
        all_predictions.append(predicted_class_id)
        all_labels.append(labels.numpy()[0])

        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Sample {i}")
        print(f"Logits: {logits}")
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        print(f"True label: {labels.numpy()[0]}")
        print()

    # Calculate accuracy
    accuracy = np.mean(np.array(all_predictions) == np.array(all_labels))
    return accuracy

# Convert test data to Hugging Face Dataset and tokenize
test_data = data.iloc[val_index].reset_index(drop=True)
test_dataset = Dataset.from_pandas(test_data)
test_dataset = test_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.rename_column("target", "labels")
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Evaluate the model on the test dataset
accuracy = classify_and_evaluate(test_dataset)
print(f"Accuracy on the test dataset: {accuracy:.4f}")


Number of classes after filtering: 19
target
16    34
17    32
10    18
18    13
5     12
6     11
8      9
1      8
2      7
7      6
3      3
0      3
14     3
13     3
11     3
12     2
9      2
4      2
15     2
Name: count, dtype: int64


[I 2024-06-19 18:29:52,802] A new study created in memory with name: no-name-9f197fe0-6b81-44cc-8af3-4e841768777a


Training fold 1


Map: 100%|██████████| 138/138 [00:00<00:00, 19762.84 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 10973.29 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at MoritzLaurer/roberta-large-zeroshot-v2.0-c and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([19]) in the model instantiated
- classifier.out_proj.weight: found shape torch.Size([2, 1024]) in the checkpoint and torch.Size([19, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)

[A
[A
[A
[A
[A

[A[A                                       
                                                
 30%|███       | 54/180 [19:09<05:32,  2.64s/it]
[A

{'eval_loss': 2.6152985095977783, 'eval_runtime': 2.9661, 'eval_samples_per_second': 11.8, 'eval_steps_per_second': 1.686, 'epoch': 1.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [20:23<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.5781986713409424, 'eval_runtime': 2.169, 'eval_samples_per_second': 16.136, 'eval_steps_per_second': 2.305, 'epoch': 2.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [21:37<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.5664045810699463, 'eval_runtime': 2.2079, 'eval_samples_per_second': 15.852, 'eval_steps_per_second': 2.265, 'epoch': 3.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [22:47<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.5602946281433105, 'eval_runtime': 2.1889, 'eval_samples_per_second': 15.99, 'eval_steps_per_second': 2.284, 'epoch': 4.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [23:57<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.5373024940490723, 'eval_runtime': 2.1843, 'eval_samples_per_second': 16.023, 'eval_steps_per_second': 2.289, 'epoch': 5.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [25:12<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.519718885421753, 'eval_runtime': 2.2058, 'eval_samples_per_second': 15.867, 'eval_steps_per_second': 2.267, 'epoch': 6.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [26:23<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.4960179328918457, 'eval_runtime': 2.1832, 'eval_samples_per_second': 16.032, 'eval_steps_per_second': 2.29, 'epoch': 7.0}



[A
[A
[A
[A
[A
                                                

 30%|███       | 54/180 [27:34<05:32,  2.64s/it]
[A
[A

{'eval_loss': 2.489193916320801, 'eval_runtime': 2.1819, 'eval_samples_per_second': 16.041, 'eval_steps_per_second': 2.292, 'epoch': 8.0}



100%|██████████| 80/80 [09:44<00:00,  7.31s/it]]


{'train_runtime': 584.4973, 'train_samples_per_second': 1.889, 'train_steps_per_second': 0.137, 'train_loss': 2.489993667602539, 'epoch': 8.0}


100%|██████████| 5/5 [00:01<00:00,  4.41it/s]


Training fold 2


Map: 100%|██████████| 138/138 [00:00<00:00, 24045.11 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 8774.17 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at MoritzLaurer/roberta-large-zeroshot-v2.0-c and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([19]) in the model instantiated
- classifier.out_proj.weight: found shape torch.Size([2, 1024]) in the checkpoint and torch.Size([19, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)

[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [28:59<05:32,  2.64s/it]
[A

{'eval_loss': 2.6826119422912598, 'eval_runtime': 1.9393, 'eval_samples_per_second': 18.048, 'eval_steps_per_second': 2.578, 'epoch': 1.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [30:13<05:32,  2.64s/it]
[A

{'eval_loss': 2.5644454956054688, 'eval_runtime': 1.9375, 'eval_samples_per_second': 18.065, 'eval_steps_per_second': 2.581, 'epoch': 2.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [31:19<05:32,  2.64s/it]
[A

{'eval_loss': 2.521848440170288, 'eval_runtime': 1.9423, 'eval_samples_per_second': 18.019, 'eval_steps_per_second': 2.574, 'epoch': 3.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [32:38<05:32,  2.64s/it]
[A

{'eval_loss': 2.4516537189483643, 'eval_runtime': 1.9373, 'eval_samples_per_second': 18.066, 'eval_steps_per_second': 2.581, 'epoch': 4.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [33:47<05:32,  2.64s/it]
[A

{'eval_loss': 2.398916721343994, 'eval_runtime': 1.9571, 'eval_samples_per_second': 17.884, 'eval_steps_per_second': 2.555, 'epoch': 5.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [34:56<05:32,  2.64s/it]
[A

{'eval_loss': 2.3602490425109863, 'eval_runtime': 1.9453, 'eval_samples_per_second': 17.992, 'eval_steps_per_second': 2.57, 'epoch': 6.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [36:14<05:32,  2.64s/it]
[A

{'eval_loss': 2.3529090881347656, 'eval_runtime': 1.9383, 'eval_samples_per_second': 18.057, 'eval_steps_per_second': 2.58, 'epoch': 7.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [37:29<05:32,  2.64s/it]
[A

{'eval_loss': 2.36598801612854, 'eval_runtime': 1.9548, 'eval_samples_per_second': 17.904, 'eval_steps_per_second': 2.558, 'epoch': 8.0}


                                                
100%|██████████| 80/80 [09:44<00:00,  7.30s/it]]


{'train_runtime': 584.3901, 'train_samples_per_second': 1.889, 'train_steps_per_second': 0.137, 'train_loss': 2.422211456298828, 'epoch': 8.0}


100%|██████████| 5/5 [00:01<00:00,  4.83it/s]


Training fold 3


Map: 100%|██████████| 138/138 [00:00<00:00, 21674.37 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 7787.01 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at MoritzLaurer/roberta-large-zeroshot-v2.0-c and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([19]) in the model instantiated
- classifier.out_proj.weight: found shape torch.Size([2, 1024]) in the checkpoint and torch.Size([19, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)

[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [38:56<05:32,  2.64s/it]
[A

{'eval_loss': 2.631277084350586, 'eval_runtime': 2.4726, 'eval_samples_per_second': 14.155, 'eval_steps_per_second': 2.022, 'epoch': 1.0}



[A
[A
[A
[A
[A

                                                
[A                                            
 30%|███       | 54/180 [40:12<05:32,  2.64s/it]
[A

{'eval_loss': 2.5164554119110107, 'eval_runtime': 2.4695, 'eval_samples_per_second': 14.173, 'eval_steps_per_second': 2.025, 'epoch': 2.0}


[W 2024-06-19 18:52:05,015] Trial 0 failed with parameters: {'learning_rate': 1.0491958148795307e-05, 'num_train_epochs': 8, 'per_device_train_batch_size': 15} because of the following error: RuntimeError('[enforce fail at inline_container.cc:595] . unexpected pos 403767616 vs 403767504').
Traceback (most recent call last):
  File "c:\Users\admin\Desktop\traini\autotrain-env\Lib\site-packages\torch\serialization.py", line 628, in save
    _save(obj, opened_zipfile, pickle_module, pickle_protocol, _disable_byteorder_record)
  File "c:\Users\admin\Desktop\traini\autotrain-env\Lib\site-packages\torch\serialization.py", line 862, in _save
    zip_file.write_record(name, storage, num_bytes)
RuntimeError: [enforce fail at inline_container.cc:769] . PytorchStreamWriter failed writing file data/134: file write failed

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\admin\Desktop\traini\autotrain-env\Lib\site-packages\optu

RuntimeError: [enforce fail at inline_container.cc:595] . unexpected pos 403767616 vs 403767504