In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm import tqdm
import torch.nn.functional as F

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the dataset
def load_dataset(csv_path):
    """Load and prepare the dataset from CSV file."""
    df = pd.read_csv(csv_path)

    # Print dataset info
    print(f"Dataset loaded with shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")

    return df

# Data preprocessing
def preprocess_data(df, text_columns=['Title', 'Description'], target_column='PrimaryCategory'):
    """Preprocess the data for BERT fine-tuning."""
    # Combine text columns
    df['combined_text'] = df[text_columns].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

    # Encode target labels
    label_encoder = LabelEncoder()
    df['label'] = label_encoder.fit_transform(df[target_column])

    num_labels = len(label_encoder.classes_)
    print(f"Number of unique labels: {num_labels}")
    print(f"Labels: {label_encoder.classes_}")

    return df, label_encoder, num_labels

# Custom Dataset class
class APIDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Function to calculate metrics
def calculate_metrics(preds, labels):
    """Calculate accuracy, F1, precision, and recall."""
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Function to plot confusion matrix
def plot_confusion_matrix(cm, class_names, title='Confusion Matrix'):
    """Plot confusion matrix as a heatmap."""
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(title)
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.close()

# Function to plot metrics history
def plot_metrics_history(metrics_history):
    """Plot the metrics over epochs."""
    epochs = range(1, len(metrics_history['train_loss'])+1)

    # Plot training and validation loss
    plt.figure(figsize=(12, 10))

    # Loss subplot
    plt.subplot(2, 2, 1)
    plt.plot(epochs, metrics_history['train_loss'], 'b-', label='Training Loss')
    plt.plot(epochs, metrics_history['val_loss'], 'r-', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy subplot
    plt.subplot(2, 2, 2)
    plt.plot(epochs, metrics_history['train_accuracy'], 'b-', label='Training Accuracy')
    plt.plot(epochs, metrics_history['val_accuracy'], 'r-', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # F1 Score subplot
    plt.subplot(2, 2, 3)
    plt.plot(epochs, metrics_history['train_f1'], 'b-', label='Training F1')
    plt.plot(epochs, metrics_history['val_f1'], 'r-', label='Validation F1')
    plt.title('Training and Validation F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    # Precision and Recall subplot
    plt.subplot(2, 2, 4)
    plt.plot(epochs, metrics_history['train_precision'], 'b-', label='Training Precision')
    plt.plot(epochs, metrics_history['train_recall'], 'b--', label='Training Recall')
    plt.plot(epochs, metrics_history['val_precision'], 'r-', label='Validation Precision')
    plt.plot(epochs, metrics_history['val_recall'], 'r--', label='Validation Recall')
    plt.title('Training and Validation Precision/Recall')
    plt.xlabel('Epochs')
    plt.ylabel('Score')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_metrics.png')
    plt.close()

    # Generate the new combined metrics plot
    plot_combined_metrics(metrics_history)

    print("Metrics plots saved as 'training_metrics.png' and 'combined_metrics.png'")

# New function to plot all validation metrics together in one figure
def plot_combined_metrics(metrics_history):
    """Plot all validation metrics together in one figure."""
    epochs = range(1, len(metrics_history['val_accuracy'])+1)
    
    plt.figure(figsize=(12, 8))
    
    # Plot all validation metrics on the same graph
    plt.plot(epochs, metrics_history['val_accuracy'], 'b-', linewidth=2, label='Accuracy')
    plt.plot(epochs, metrics_history['val_f1'], 'r-', linewidth=2, label='F1 Score')
    plt.plot(epochs, metrics_history['val_precision'], 'g-', linewidth=2, label='Precision')
    plt.plot(epochs, metrics_history['val_recall'], 'y-', linewidth=2, label='Recall')
    
    # Add grid for better readability
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # Add title and labels
    plt.title('Combined Validation Metrics Over Epochs', fontsize=16)
    plt.xlabel('Epochs', fontsize=14)
    plt.ylabel('Score', fontsize=14)
    
    # Add legend
    plt.legend(fontsize=12)
    
    # Display epoch numbers on x-axis
    plt.xticks(epochs)
    
    # Set y-axis limits
    plt.ylim(0, 1.05)
    
    # Add a horizontal line at y=1 for reference
    plt.axhline(y=1, color='k', linestyle=':', alpha=0.3)
    
    # Add annotations for final values
    final_epoch = len(epochs)
    plt.annotate(f"{metrics_history['val_accuracy'][-1]:.4f}", 
                xy=(final_epoch, metrics_history['val_accuracy'][-1]), 
                xytext=(5, 5), textcoords='offset points')
    plt.annotate(f"{metrics_history['val_f1'][-1]:.4f}", 
                xy=(final_epoch, metrics_history['val_f1'][-1]), 
                xytext=(5, 5), textcoords='offset points')
    plt.annotate(f"{metrics_history['val_precision'][-1]:.4f}", 
                xy=(final_epoch, metrics_history['val_precision'][-1]), 
                xytext=(5, 5), textcoords='offset points')
    plt.annotate(f"{metrics_history['val_recall'][-1]:.4f}", 
                xy=(final_epoch, metrics_history['val_recall'][-1]), 
                xytext=(5, 5), textcoords='offset points')
    
    plt.tight_layout()
    plt.savefig('combined_metrics.png', dpi=300)
    plt.close()

    # Generate individual validation metric graphs for more detailed view
    plot_detailed_validation_metrics(metrics_history)

# New function to plot individual validation metrics for more detailed analysis
def plot_detailed_validation_metrics(metrics_history):
    """Plot detailed individual validation metrics."""
    epochs = range(1, len(metrics_history['val_accuracy'])+1)
    
    metrics = {
        'Accuracy': metrics_history['val_accuracy'],
        'F1 Score': metrics_history['val_f1'],
        'Precision': metrics_history['val_precision'],
        'Recall': metrics_history['val_recall']
    }
    
    colors = ['b', 'r', 'g', 'y']
    
    plt.figure(figsize=(16, 12))
    
    for i, (metric_name, metric_values) in enumerate(metrics.items(), 1):
        plt.subplot(2, 2, i)
        
        # Plot the metric
        plt.plot(epochs, metric_values, f'{colors[i-1]}-o', linewidth=2, markersize=6)
        
        # Add grid for better readability
        plt.grid(True, linestyle='--', alpha=0.7)
        
        # Add title and labels
        plt.title(f'Validation {metric_name} Over Epochs', fontsize=14)
        plt.xlabel('Epochs', fontsize=12)
        plt.ylabel(metric_name, fontsize=12)
        
        # Display epoch numbers on x-axis
        plt.xticks(epochs)
        
        # Set y-axis limits with a bit of padding
        min_val = max(0, min(metric_values) - 0.05)
        max_val = min(1.05, max(metric_values) + 0.05)
        plt.ylim(min_val, max_val)
        
        # Annotate each point with its value
        for x, y in zip(epochs, metric_values):
            plt.annotate(f"{y:.4f}", 
                        xy=(x, y), 
                        xytext=(0, 5), 
                        textcoords='offset points',
                        ha='center',
                        fontsize=8)
    
    plt.tight_layout()
    plt.savefig('detailed_validation_metrics.png', dpi=300)
    plt.close()

# Training function with metrics tracking
def train_model(model, train_dataloader, val_dataloader, optimizer, num_epochs=3, label_encoder=None):
    """Train the BERT model and track metrics."""
    # Initialize metrics history
    metrics_history = {
        'train_loss': [],
        'val_loss': [],
        'train_accuracy': [],
        'val_accuracy': [],
        'train_f1': [],
        'val_f1': [],
        'train_precision': [],
        'train_recall': [],
        'val_precision': [],
        'val_recall': []
    }

    # Training loop
    best_val_f1 = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")

        # Training phase
        model.train()
        train_loss = 0
        train_steps = 0
        all_train_preds = []
        all_train_labels = []

        for batch in tqdm(train_dataloader, desc="Training"):
            # Move batch to device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            train_loss += loss.item()

            # Apply softmax to get probabilities
            logits = outputs.logits
            probs = F.softmax(logits, dim=1)

            # Get predictions
            preds = torch.argmax(probs, dim=1)

            # Store predictions and labels for metrics calculation
            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

            # Backward pass
            loss.backward()

            # Update parameters
            optimizer.step()

            train_steps += 1

        # Calculate training metrics
        avg_train_loss = train_loss / train_steps
        train_metrics = calculate_metrics(all_train_preds, all_train_labels)

        print(f"Average training loss: {avg_train_loss:.4f}")
        print(f"Training metrics: Accuracy={train_metrics['accuracy']:.4f}, F1={train_metrics['f1']:.4f}, "
              f"Precision={train_metrics['precision']:.4f}, Recall={train_metrics['recall']:.4f}")

        # Validation phase
        model.eval()
        val_loss = 0
        val_steps = 0
        all_val_preds = []
        all_val_labels = []
        all_val_probs = []

        with torch.no_grad():
            for batch in tqdm(val_dataloader, desc="Validation"):
                # Move batch to device
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                # Forward pass
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                val_loss += loss.item()

                # Apply softmax to get probabilities
                logits = outputs.logits
                probs = F.softmax(logits, dim=1)
                all_val_probs.extend(probs.cpu().numpy())

                # Get predictions
                preds = torch.argmax(probs, dim=1)

                # Store predictions and labels for metrics calculation
                all_val_preds.extend(preds.cpu().numpy())
                all_val_labels.extend(labels.cpu().numpy())

                val_steps += 1

        # Calculate validation metrics
        avg_val_loss = val_loss / val_steps
        val_metrics = calculate_metrics(all_val_preds, all_val_labels)

        print(f"Validation Loss: {avg_val_loss:.4f}")
        print(f"Validation metrics: Accuracy={val_metrics['accuracy']:.4f}, F1={val_metrics['f1']:.4f}, "
              f"Precision={val_metrics['precision']:.4f}, Recall={val_metrics['recall']:.4f}")

        # Update metrics history
        metrics_history['train_loss'].append(avg_train_loss)
        metrics_history['val_loss'].append(avg_val_loss)
        metrics_history['train_accuracy'].append(train_metrics['accuracy'])
        metrics_history['val_accuracy'].append(val_metrics['accuracy'])
        metrics_history['train_f1'].append(train_metrics['f1'])
        metrics_history['val_f1'].append(val_metrics['f1'])
        metrics_history['train_precision'].append(train_metrics['precision'])
        metrics_history['train_recall'].append(train_metrics['recall'])
        metrics_history['val_precision'].append(val_metrics['precision'])
        metrics_history['val_recall'].append(val_metrics['recall'])

        # Save the best model based on F1 score
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            print(f"New best F1 score: {best_val_f1:.4f}. Saving model...")
            torch.save(model.state_dict(), "best_bert_model.pt")

        # Create confusion matrix at the end of each epoch
        if label_encoder is not None and epoch == num_epochs - 1:
            cm = confusion_matrix(all_val_labels, all_val_preds)
            plot_confusion_matrix(cm, label_encoder.classes_, f'Confusion Matrix - Epoch {epoch+1}')

    # Plot metrics history
    plot_metrics_history(metrics_history)

    print(f"Training complete! Best validation F1 score: {best_val_f1:.4f}")
    return model, metrics_history

# Main function
def main():
    # Parameters
    csv_path = "/kaggle/input/multi-agent-system-project/output_scrapy_cleaned.csv"  # Replace with your CSV file path
    batch_size = 16
    learning_rate = 2e-5
    num_epochs = 25
    bert_model_name = "bert-base-uncased"  # You can use other BERT variants

    # Load and preprocess data
    df = load_dataset(csv_path)
    df, label_encoder, num_labels = preprocess_data(df)

    # Split data into train and validation sets
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

    print(f"Training set size: {len(train_df)}")
    print(f"Validation set size: {len(val_df)}")

    # Initialize tokenizer
    tokenizer = BertTokenizer.from_pretrained(bert_model_name)

    # Create datasets
    train_dataset = APIDataset(
        texts=train_df['combined_text'].values,
        labels=train_df['label'].values,
        tokenizer=tokenizer
    )

    val_dataset = APIDataset(
        texts=val_df['combined_text'].values,
        labels=val_df['label'].values,
        tokenizer=tokenizer
    )

    # Create dataloaders
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )

    val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size
    )

    # Initialize model
    model = BertForSequenceClassification.from_pretrained(
        bert_model_name,
        num_labels=num_labels
    )

    # Move model to device
    model = model.to(device)

    # Initialize optimizer
    optimizer = AdamW(model.parameters(), lr=learning_rate)

    # Train model
    model, metrics_history = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optimizer,
        num_epochs=num_epochs,
        label_encoder=label_encoder
    )

    # Save label encoder for inference
    import pickle
    with open('label_encoder.pkl', 'wb') as f:
        pickle.dump(label_encoder, f)

    print("Model and label encoder saved successfully!")

    # Example of how to use the model for inference with softmax probabilities
    def predict_category_with_probs(text, model, tokenizer, label_encoder):
        model.eval()
        encoding = tokenizer(
            text,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            # Apply softmax to get probabilities
            logits = outputs.logits
            probs = F.softmax(logits, dim=1)[0]

            # Get prediction
            prediction = torch.argmax(probs).item()
            predicted_category = label_encoder.classes_[prediction]

            # Get top 3 predictions with probabilities
            top_probs, top_indices = torch.topk(probs, k=min(3, len(label_encoder.classes_)))
            top_categories = [(label_encoder.classes_[idx], probs[idx].item()) for idx in top_indices]

        return predicted_category, top_categories

    # Test example
    example_text = "API that provides weather data with JSON responses"
    category, top_categories = predict_category_with_probs(
        example_text, model, tokenizer, label_encoder
    )

    print("\nPrediction:")
    print(f"Text: {example_text}")
    print(f"Predicted category: {category}")
    print("Top predictions with probabilities:")
    for cat, prob in top_categories:
        print(f"  {cat}: {prob:.4f}")

if __name__ == "__main__":
    main()

2025-05-10 15:01:19.306237: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746889279.482817      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746889279.536382      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using device: cuda
Dataset loaded with shape: (11200, 14)
Columns: ['merge_key', 'PrimaryCategory', 'Title', 'Description', 'APIProvider', 'APIEndpoint', 'SupportedResponseFormats', 'SupportedRequestFormats', 'AuthenticationModel', 'ArchitecturalStyle', 'RestrictedAccess', 'SSLSupport', 'IsThisanUnofficialAPI', 'SecondaryCategories']
Number of unique labels: 509
Labels: ['3D' '3D|Authentication' 'API' 'API Design'
 'API Design|Cameras|Cloud|Games|Health|Home Automation|Photos|Security'
 'API Education' 'API Management'
 'API Management|Browsers|Contacts|Database|Internet of Things|Management|Messaging|Networking|Notifications|Statistics|Tasks|Telephony|Time|Tools|Visualizations|Wi-Fi'
 'API|Credit Cards' 'Accessibility' 'Accounting' 'Accounts'
 'Accounts|Activity Streams|Metadata|Social'
 'Accounts|Analytics|Banking|Billing|Demographics|Financial|Payments|Rewards|Sales'
 'Accounts|Analytics|Real Time|eCommerce'
 'Accounts|Campaigns|Contacts|Content|Data|Marketing|Reporting|SEO'
 'Activ

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/25


Training: 100%|██████████| 560/560 [14:05<00:00,  1.51s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 4.8093
Training metrics: Accuracy=0.1890, F1=0.1372, Precision=0.1872, Recall=0.1890


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 3.9253
Validation metrics: Accuracy=0.3384, F1=0.2327, Precision=0.2574, Recall=0.3384
New best F1 score: 0.2327. Saving model...

Epoch 2/25


Training: 100%|██████████| 560/560 [14:30<00:00,  1.56s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 3.4884
Training metrics: Accuracy=0.4061, F1=0.3069, Precision=0.2829, Recall=0.4061


Validation: 100%|██████████| 140/140 [01:17<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 3.2827
Validation metrics: Accuracy=0.4259, F1=0.3281, Precision=0.3012, Recall=0.4259
New best F1 score: 0.3281. Saving model...

Epoch 3/25


Training: 100%|██████████| 560/560 [14:30<00:00,  1.56s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 2.8490
Training metrics: Accuracy=0.4940, F1=0.3906, Precision=0.3632, Recall=0.4940


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.9312
Validation metrics: Accuracy=0.4732, F1=0.3784, Precision=0.3287, Recall=0.4732
New best F1 score: 0.3784. Saving model...

Epoch 4/25


Training: 100%|██████████| 560/560 [14:31<00:00,  1.56s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 2.3934
Training metrics: Accuracy=0.5595, F1=0.4612, Precision=0.4320, Recall=0.5595


Validation: 100%|██████████| 140/140 [01:17<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.7414
Validation metrics: Accuracy=0.4902, F1=0.4097, Precision=0.3749, Recall=0.4902
New best F1 score: 0.4097. Saving model...

Epoch 5/25


Training: 100%|██████████| 560/560 [14:30<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 2.0269
Training metrics: Accuracy=0.6183, F1=0.5311, Precision=0.5077, Recall=0.6183


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.6236
Validation metrics: Accuracy=0.5080, F1=0.4415, Precision=0.4206, Recall=0.5080
New best F1 score: 0.4415. Saving model...

Epoch 6/25


Training: 100%|██████████| 560/560 [14:29<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 1.7106
Training metrics: Accuracy=0.6748, F1=0.5997, Precision=0.5825, Recall=0.6748


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5596
Validation metrics: Accuracy=0.5210, F1=0.4636, Precision=0.4423, Recall=0.5210
New best F1 score: 0.4636. Saving model...

Epoch 7/25


Training: 100%|██████████| 560/560 [14:29<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 1.4303
Training metrics: Accuracy=0.7273, F1=0.6624, Precision=0.6267, Recall=0.7273


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5303
Validation metrics: Accuracy=0.5330, F1=0.4872, Precision=0.4734, Recall=0.5330
New best F1 score: 0.4872. Saving model...

Epoch 8/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 1.2005
Training metrics: Accuracy=0.7754, F1=0.7206, Precision=0.7015, Recall=0.7754


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5107
Validation metrics: Accuracy=0.5299, F1=0.4893, Precision=0.4713, Recall=0.5299
New best F1 score: 0.4893. Saving model...

Epoch 9/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 1.0099
Training metrics: Accuracy=0.8122, F1=0.7657, Precision=0.7489, Recall=0.8122


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5288
Validation metrics: Accuracy=0.5353, F1=0.5026, Precision=0.4966, Recall=0.5353
New best F1 score: 0.5026. Saving model...

Epoch 10/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.8479
Training metrics: Accuracy=0.8489, F1=0.8096, Precision=0.7966, Recall=0.8489


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.4858
Validation metrics: Accuracy=0.5335, F1=0.5045, Precision=0.4932, Recall=0.5335
New best F1 score: 0.5045. Saving model...

Epoch 11/25


Training: 100%|██████████| 560/560 [14:29<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.7216
Training metrics: Accuracy=0.8748, F1=0.8430, Precision=0.8328, Recall=0.8748


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5384
Validation metrics: Accuracy=0.5437, F1=0.5219, Precision=0.5247, Recall=0.5437
New best F1 score: 0.5219. Saving model...

Epoch 12/25


Training: 100%|██████████| 560/560 [14:29<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.6056
Training metrics: Accuracy=0.8993, F1=0.8729, Precision=0.8617, Recall=0.8993


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5342
Validation metrics: Accuracy=0.5442, F1=0.5241, Precision=0.5268, Recall=0.5442
New best F1 score: 0.5241. Saving model...

Epoch 13/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.5092
Training metrics: Accuracy=0.9172, F1=0.8951, Precision=0.8852, Recall=0.9172


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.5356
Validation metrics: Accuracy=0.5420, F1=0.5196, Precision=0.5204, Recall=0.5420

Epoch 14/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.4355
Training metrics: Accuracy=0.9340, F1=0.9159, Precision=0.9070, Recall=0.9340


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.6069
Validation metrics: Accuracy=0.5442, F1=0.5225, Precision=0.5213, Recall=0.5442

Epoch 15/25


Training: 100%|██████████| 560/560 [14:26<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.3829
Training metrics: Accuracy=0.9420, F1=0.9261, Precision=0.9204, Recall=0.9420


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.6303
Validation metrics: Accuracy=0.5482, F1=0.5218, Precision=0.5191, Recall=0.5482

Epoch 16/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.3269
Training metrics: Accuracy=0.9510, F1=0.9366, Precision=0.9285, Recall=0.9510


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.6684
Validation metrics: Accuracy=0.5487, F1=0.5324, Precision=0.5384, Recall=0.5487
New best F1 score: 0.5324. Saving model...

Epoch 17/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.2791
Training metrics: Accuracy=0.9614, F1=0.9498, Precision=0.9444, Recall=0.9614


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.6566
Validation metrics: Accuracy=0.5509, F1=0.5337, Precision=0.5337, Recall=0.5509
New best F1 score: 0.5337. Saving model...

Epoch 18/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.2379
Training metrics: Accuracy=0.9693, F1=0.9596, Precision=0.9545, Recall=0.9693


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.7091
Validation metrics: Accuracy=0.5446, F1=0.5305, Precision=0.5360, Recall=0.5446

Epoch 19/25


Training: 100%|██████████| 560/560 [14:29<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.2097
Training metrics: Accuracy=0.9730, F1=0.9651, Precision=0.9614, Recall=0.9730


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.7817
Validation metrics: Accuracy=0.5460, F1=0.5313, Precision=0.5397, Recall=0.5460

Epoch 20/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1836
Training metrics: Accuracy=0.9787, F1=0.9722, Precision=0.9686, Recall=0.9787


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.8822
Validation metrics: Accuracy=0.5397, F1=0.5257, Precision=0.5353, Recall=0.5397

Epoch 21/25


Training: 100%|██████████| 560/560 [14:26<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1943
Training metrics: Accuracy=0.9730, F1=0.9663, Precision=0.9627, Recall=0.9730


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.8390
Validation metrics: Accuracy=0.5487, F1=0.5323, Precision=0.5359, Recall=0.5487

Epoch 22/25


Training: 100%|██████████| 560/560 [14:27<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1534
Training metrics: Accuracy=0.9824, F1=0.9772, Precision=0.9741, Recall=0.9824


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.8890
Validation metrics: Accuracy=0.5437, F1=0.5326, Precision=0.5427, Recall=0.5437

Epoch 23/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1620
Training metrics: Accuracy=0.9808, F1=0.9766, Precision=0.9740, Recall=0.9808


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.9047
Validation metrics: Accuracy=0.5379, F1=0.5249, Precision=0.5417, Recall=0.5379

Epoch 24/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1251
Training metrics: Accuracy=0.9864, F1=0.9830, Precision=0.9806, Recall=0.9864


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.8930
Validation metrics: Accuracy=0.5478, F1=0.5305, Precision=0.5385, Recall=0.5478

Epoch 25/25


Training: 100%|██████████| 560/560 [14:28<00:00,  1.55s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Average training loss: 0.1040
Training metrics: Accuracy=0.9906, F1=0.9878, Precision=0.9858, Recall=0.9906


Validation: 100%|██████████| 140/140 [01:16<00:00,  1.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Loss: 2.9165
Validation metrics: Accuracy=0.5464, F1=0.5339, Precision=0.5448, Recall=0.5464
New best F1 score: 0.5339. Saving model...


  plt.tight_layout()


Metrics plots saved as 'training_metrics.png' and 'combined_metrics.png'
Training complete! Best validation F1 score: 0.5339
Model and label encoder saved successfully!

Prediction:
Text: API that provides weather data with JSON responses
Predicted category: Weather
Top predictions with probabilities:
  Weather: 0.9990
  Data: 0.0001
  Medical: 0.0000
