In [5]:
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import matplotlib.pyplot as plt
import pandas as pd
import json
import random
from lime.lime_text import LimeTextExplainer

# Load your labeled data from the CSV file
data = pd.read_csv('labeled-dataset.csv')

# Sample 200 random texts from the data
text_samples = data['text'].sample(n=200, random_state=42).tolist()

from transformers import RobertaForSequenceClassification, RobertaTokenizer

model = RobertaForSequenceClassification.from_pretrained('fine_tuned_roberta1')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Replace this with your predict function for the fine-tuned RoBERTa model
def predict_fn(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    return logits

# LimeTextExplainer instance
explainer = LimeTextExplainer()

# Number of features to display
num_features = 10

# List to store text samples and their feature importances as dictionaries
data = []

# Iterate over text samples for explanations
for text in text_samples:
    explanation = explainer.explain_instance(text, predict_fn, num_features=num_features)
    feature_dict = {}
    feature_dict['text'] = text

    # Get feature names and importances
    feature_names = explanation.as_list(label=1)  # Use label 1 for "Human" class
    feature_names = feature_names[:num_features]  # Limit to num_features
    feature_names, feature_importances = zip(*feature_names)

    # Store top features and their importances in a dictionary
    features_data = {}
    for j, feature_name in enumerate(feature_names):
        features_data[f"Feature {j + 1}"] = {
            "Name": feature_name,
            "Importance": feature_importances[j]
        }

    feature_dict['top_features'] = features_data
    data.append(feature_dict)

# Save the data to a JSON file
with open('text_samples_features1.json', 'w') as outfile:
    json.dump(data, outfile, indent=4)

print("Data saved to text_samples_features.json")


Data saved to text_samples_features.json
