In [None]:
import pandas as pd
import torch
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('/content/dataset (1).csv')

# Assume the DataFrame df has columns 'Example' for the text and 'Idiom' for the labels
x = df['Example']
y = df['Idiom']

# Encode the labels to ensure they are in integer format
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(np.unique(y_encoded))  # Determine the number of unique classes

# Split the data into train, validation, and test sets
X_train, X_temp, Y_train, Y_temp = train_test_split(x, y_encoded, test_size=0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

# Function to train and evaluate a BERT-based classifier
def train_and_evaluate_bert(X_train, Y_train, X_val, Y_val, X_test, Y_test, dataset_name, num_classes):
    print(f"Dataset: {dataset_name}")
    print("Training labels distribution:", np.bincount(Y_train))
    print("Validation labels distribution:", np.bincount(Y_val))
    print("Test labels distribution:", np.bincount(Y_test))

    # Set device to GPU if available, otherwise use CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load a pre-trained BERT model and tokenizer
    model_name = 'bert-base-uncased'
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
    model.to(device)

    # Tokenize the text data and convert it to PyTorch tensors
    X_train_tokens = tokenizer(X_train.tolist(), truncation=True, padding=True, return_tensors='pt').to(device)
    X_val_tokens = tokenizer(X_val.tolist(), truncation=True, padding=True, return_tensors='pt').to(device)
    X_test_tokens = tokenizer(X_test.tolist(), truncation=True, padding=True, return_tensors='pt').to(device)

    # Convert labels to PyTorch tensors
    Y_train_tensor = torch.tensor(Y_train).to(device)
    Y_val_tensor = torch.tensor(Y_val).to(device)
    Y_test_tensor = torch.tensor(Y_test).to(device)

    # Define training parameters
    batch_size = 16
    learning_rate = 2e-5
    num_epochs = 3  # Adjusted for demonstration

    # Create a DataLoader for training, validation, and testing data
    train_data = torch.utils.data.TensorDataset(X_train_tokens.input_ids, X_train_tokens.attention_mask, Y_train_tensor)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)

    val_data = torch.utils.data.TensorDataset(X_val_tokens.input_ids, X_val_tokens.attention_mask, Y_val_tensor)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)

    test_data = torch.utils.data.TensorDataset(X_test_tokens.input_ids, X_test_tokens.attention_mask, Y_test_tensor)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

    # Create an optimizer and a loss function
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    criterion = torch.nn.CrossEntropyLoss()
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for batch in train_loader:
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1}, Average Loss: {total_loss/len(train_loader)}")

    # Evaluation
    model.eval()
    y_pred = []
    with torch.no_grad():
        for batch in test_loader:
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predicted_labels = logits.argmax(dim=1).tolist()
            y_pred.extend(predicted_labels)

    # Convert predicted labels to NumPy array
            y_pred = np.array(y_pred)

            # Calculate and print accuracy
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Accuracy for : {accuracy:.4f}")

            # Calculate and print the macro F1 score
            f1 = f1_score(y_test, y_pred, average='macro')
            print(f"F1 Score for : {f1:.4f}")
            return y_true, y_pred

    # Evaluate on validation set
    Y_val_true, y_pred_val = evaluate_model(val_loader)
    accuracy_val = accuracy_score(Y_val_true, y_pred_val)
    f1_val = f1_score(Y_val_true, y_pred_val, average='macro')
    print(f"Validation Accuracy for {dataset_name}: {accuracy_val:.4f}")
    print(f"Validation F1 Score for {dataset_name}: {f1_val:.4f}")

    # Evaluate on test set
    Y_test_true, y_pred_test = evaluate_model(test_loader)
    accuracy_test = accuracy_score(Y_test_true, y_pred_test)
    f1_test = f1_score(Y_test_true, y_pred_test, average='macro')
    print(f"Test Accuracy for {dataset_name}: {accuracy_test:.4f}")
    print(f"Test F1 Score for {dataset_name}: {f1_test:.4f}")

# Call the function with dataset name
train_and_evaluate_bert(X_train, Y_train, X_val, Y_val, X_test, Y_test, 'My Custom Dataset', num_classes)
