In [1]:
import pandas as pd
import torch
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers import XLNetTokenizer, XLNetForSequenceClassification, XLNetConfig
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
df = pd.read_csv("/kaggle/input/final-dataset/final_dataset") 

# Split the dataset into train and test using the same random state
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Initialize XLNet tokenizer and model
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=3)

# Move the model to the selected device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

        # Initialize the LabelEncoder
        self.label_encoder = LabelEncoder()
        self.label_encoder.fit(data['Sentiment'])  # Fit the encoder on the sentiment column

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        review = self.data.iloc[idx]['reviewText']
        sentiment = self.data.iloc[idx]['Sentiment']

        encoding = self.tokenizer(review, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt')
        # Use the LabelEncoder to convert sentiment labels to numerical labels
        sentiment = self.label_encoder.transform([sentiment])[0]

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(sentiment, dtype=torch.long)  # Convert labels to torch.long
        }

# Create test dataset
max_len = 400
batch=20
test_dataset = CustomDataset(test_df, tokenizer, max_len)
test_dataloader = DataLoader(test_dataset, batch_size=batch)

# Lists to store performance metrics for both models
accuracy_values_finetuned = []
precision_values_finetuned = []
recall_values_finetuned = []
f1_values_finetuned = []

accuracy_values_pretrained = []
precision_values_pretrained = []
recall_values_pretrained = []
f1_values_pretrained = []

# Load the model configuration
config = XLNetConfig.from_pretrained("/kaggle/input/config-file/config.json")
fine_tuned_model = XLNetForSequenceClassification(config)
fine_tuned_model.load_state_dict(torch.load("/kaggle/input/xlnet-ftmodel/finetuned_model.pth"))
fine_tuned_model = fine_tuned_model.to(device)

# Ensure test dataset labels are of string data type
test_df['Sentiment'] = test_df['Sentiment'].astype(str)


# Evaluate the fine-tuned model
fine_tuned_model.eval()
predicted_labels_finetuned = []


with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = fine_tuned_model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        predicted_labels = torch.argmax(logits, dim=1).cpu().tolist()
        predicted_labels_finetuned.extend(predicted_labels)

    # Calculate performance metrics for the fine-tuned model
    accuracy = accuracy_score(test_df['Sentiment'], predicted_labels_finetuned)
    precision = precision_score(test_df['Sentiment'], predicted_labels_finetuned, average='weighted')
    recall = recall_score(test_df['Sentiment'], predicted_labels_finetuned, average='weighted')
    f1 = f1_score(test_df['Sentiment'], predicted_labels_finetuned, average='weighted')

    accuracy_values_finetuned.append(accuracy)
    precision_values_finetuned.append(precision)
    recall_values_finetuned.append(recall)
    f1_values_finetuned.append(f1)

# Load the pre-trained model
pretrained_model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=3)
pretrained_model = pretrained_model.to(device)


# Evaluate the pre-trained model
pretrained_model.eval()
predicted_labels_pretrained = []

with tqdm(total=len(test_dataloader), desc="Predicting Labels for Pretrained Model") as pbar:
    with torch.no_grad():
        for batch in test_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = pretrained_model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            predicted_labels = torch.argmax(logits, dim=1).cpu().tolist()
            predicted_labels_pretrained.extend(predicted_labels)
            pbar.update(1)
    pbar.close()

    # Calculate performance metrics for the pre-trained model
    accuracy = accuracy_score(test_df['Sentiment'], predicted_labels_pretrained)
    precision = precision_score(test_df['Sentiment'], predicted_labels_pretrained, average='weighted')
    recall = recall_score(test_df['Sentiment'], predicted_labels_pretrained, average='weighted')
    f1 = f1_score(test_df['Sentiment'], predicted_labels_pretrained, average='weighted')

    accuracy_values_pretrained.append(accuracy)
    precision_values_pretrained.append(precision)
    recall_values_pretrained.append(recall)
    f1_values_pretrained.append(f1)

# Print or store the performance metrics for both models
print("Performance Metrics for Fine-Tuned Model:")
print(f"Accuracy: {accuracy_values_finetuned[0]:.4f}")
print(f"Precision: {precision_values_finetuned[0]:.4f}")
print(f"Recall: {recall_values_finetuned[0]:.4f}")
print(f"F1-Score: {f1_values_finetuned[0]:.4f}")

print("Performance Metrics for Pre-Trained Model:")
print(f"Accuracy: {accuracy_values_pretrained[0]:.4f}")
print(f"Precision: {precision_values_pretrained[0]:.4f}")
print(f"Recall: {recall_values_pretrained[0]:.4f}")
print(f"F1-Score: {f1_values_pretrained[0]:.4f}")


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: Mix of label input types (string and number)