In [None]:
# Environment Setup and Library Installation
!pip install transformers torch torchvision torchaudio
!pip install datasets
!pip install lime shap
!pip install nltk spacy
!pip install scikit-learn pandas numpy matplotlib seaborn
!pip install wordcloud
!pip install tensorflow

import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt_tab') # Added missing resource download

!python -m spacy download en_core_web_sm

# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tag import pos_tag
from wordcloud import WordCloud

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Conv1D, MaxPooling1D
from tensorflow.keras.layers import Embedding, Dropout, GlobalMaxPooling1D, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import TrainingArguments, Trainer
import torch

import lime
from lime.lime_text import LimeTextExplainer
import shap

plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

print("Environment setup complete! ✅")
print(f"TensorFlow version: {tf.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

# Data Acquisition and Processing
def create_sample_data():
    fake_news_data = {
        'title': [
            'Breaking: Scientists discover cure for all diseases',
            'Government secretly controlling weather with satellites',
            'Celebrity endorses miracle weight loss pill',
            'Local mayor caught in corruption scandal',
            'Stock market manipulation exposed by whistleblower'
        ],
        'text': [
            'In a groundbreaking discovery that will change medicine forever, researchers claim to have found a universal cure...',
            'Leaked documents reveal government weather control program using advanced satellite technology...',
            'Popular celebrity claims this one pill helped them lose 50 pounds in just two weeks...',
            'Investigation reveals mayor received kickbacks from construction companies for city contracts...',
            'Former trader reveals how major banks manipulate stock prices through coordinated trading...'
        ],
        'label': [1, 1, 1, 0, 0]  # 1 = fake, 0 = real
    }

    sentiment_data = {
        'text': [
            'I absolutely love this new technology! It\'s amazing!',
            'This product is terrible, waste of money.',
            'The weather is okay today, nothing special.',
            'Outstanding performance by the team today!',
            'Not sure how I feel about this decision.'
        ],
        'sentiment': ['positive', 'negative', 'neutral', 'positive', 'neutral']
    }

    return pd.DataFrame(fake_news_data), pd.DataFrame(sentiment_data)

try:
    import os
    if not os.path.exists('WELFake_Dataset.csv'):
        !wget -q https://zenodo.org/record/4561253/files/WELFake_Dataset.csv

    if not os.path.exists('train.csv'):
        !wget -q https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv

    fake_news_df = pd.read_csv('WELFake_Dataset.csv')
    sentiment_df = pd.read_csv('train.csv', encoding='latin-1', header=None)
    sentiment_df.columns = ['sentiment', 'id', 'date', 'query', 'user', 'text']
    print(f"Loaded {len(fake_news_df)} fake news samples and {len(sentiment_df)} sentiment samples")
except Exception:
    print("Using sample data for demonstration...")
    fake_news_df, sentiment_df = create_sample_data()
    print(f"Created {len(fake_news_df)} fake news samples and {len(sentiment_df)} sentiment samples")

# Text Preprocessing Pipeline
class TextPreprocessor:
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        self.stop_words = set(stopwords.words('english'))
        self.nlp = spacy.load('en_core_web_sm')

    def clean_text(self, text):
        if pd.isna(text):
            return ""
        text = str(text).lower()
        text = re.sub(r'http[s]?://\S+', '', text)
        text = re.sub(r'@[A-Za-z0-9_]+', '', text)
        text = re.sub(r'#[A-Za-z0-9_]+', '', text)
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def tokenize_text(self, text):
        return word_tokenize(text)

    def remove_stopwords(self, tokens):
        return [token for token in tokens if token not in self.stop_words and len(token) > 2]

    def lemmatize_tokens(self, tokens):
        return [self.lemmatizer.lemmatize(token) for token in tokens]

    def stem_tokens(self, tokens):
        return [self.stemmer.stem(token) for token in tokens]

    def extract_named_entities(self, text):
        doc = self.nlp(text)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        return entities

    def preprocess_text(self, text, include_entities=True, use_lemmatization=True):
        cleaned = self.clean_text(text)
        tokens = self.tokenize_text(cleaned)
        tokens = self.remove_stopwords(tokens)
        tokens = self.lemmatize_tokens(tokens) if use_lemmatization else self.stem_tokens(tokens)

        entities = self.extract_named_entities(cleaned) if include_entities else []

        return {
            'processed_text': ' '.join(tokens),
            'tokens': tokens,
            'entities': entities,
            'original_length': len(text.split()),
            'processed_length': len(tokens),
        }

preprocessor = TextPreprocessor()

# Apply preprocessing to datasets
print("Applying preprocessing to datasets...")

if 'text' in fake_news_df.columns:
    fake_news_df['processed_text'] = fake_news_df['text'].apply(
        lambda x: preprocessor.preprocess_text(str(x))['processed_text']
    )

sentiment_df['processed_text'] = sentiment_df['text'].apply(
    lambda x: preprocessor.preprocess_text(str(x))['processed_text']
)

# Neural Network Architectures
class NeuralArchitectures:
    def __init__(self, vocab_size=10000, max_length=100, embedding_dim=128):
        self.vocab_size = vocab_size
        self.max_length = max_length
        self.embedding_dim = embedding_dim

    def create_lstm_model(self, num_classes=3, lstm_units=64):
        model = Sequential([
            Embedding(self.vocab_size, self.embedding_dim, input_length=self.max_length),
            LSTM(lstm_units, dropout=0.3, recurrent_dropout=0.3),
            Dense(64, activation='relu'),
            Dropout(0.5),
            Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid')
        ])
        return model

    def create_bilstm_model(self, num_classes=3, lstm_units=64):
        model = Sequential([
            Embedding(self.vocab_size, self.embedding_dim, input_length=self.max_length),
            Bidirectional(LSTM(lstm_units, dropout=0.3, recurrent_dropout=0.3)),
            Dense(128, activation='relu'),
            Dropout(0.5),
            Dense(64, activation='relu'),
            Dropout(0.3),
            Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid')
        ])
        return model

    def create_cnn_model(self, num_classes=3, filters=64, kernel_sizes=[3,4,5]):
        inputs = Input(shape=(self.max_length,))
        embedding = Embedding(self.vocab_size, self.embedding_dim)(inputs)
        conv_blocks = []
        for kernel_size in kernel_sizes:
            conv = Conv1D(filters, kernel_size, activation='relu')(embedding)
            conv = GlobalMaxPooling1D()(conv)
            conv_blocks.append(conv)
        concat = tf.keras.layers.concatenate(conv_blocks)
        dense = Dense(128, activation='relu')(concat)
        dropout = Dropout(0.5)(dense)
        outputs = Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid')(dropout)
        model = Model(inputs=inputs, outputs=outputs)
        return model

    def create_hybrid_cnn_bilstm_model(self, num_classes=3, cnn_filters=64, lstm_units=64):
        inputs = Input(shape=(self.max_length,))
        embedding = Embedding(self.vocab_size, self.embedding_dim)(inputs)
        conv1 = Conv1D(cnn_filters, 3, activation='relu')(embedding)
        conv1 = MaxPooling1D(2)(conv1)
        conv2 = Conv1D(cnn_filters, 4, activation='relu')(conv1)
        conv2 = MaxPooling1D(2)(conv2)
        lstm = Bidirectional(LSTM(lstm_units, return_sequences=True, dropout=0.3))(conv2)
        lstm = Bidirectional(LSTM(lstm_units//2, dropout=0.3))(lstm)
        dense = Dense(128, activation='relu')(lstm)
        dropout = Dropout(0.5)(dense)
        dense2 = Dense(64, activation='relu')(dropout)
        dropout2 = Dropout(0.3)(dense2)
        outputs = Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid')(dropout2)
        model = Model(inputs=inputs, outputs=outputs)
        return model


# Data Preparation and Feature Engineering
class DataPreparation:
    def __init__(self, vocab_size=10000, max_length=100):
        self.vocab_size = vocab_size
        self.max_length = max_length
        self.tokenizer = None
        self.label_encoder = LabelEncoder()

    def prepare_text_data(self, texts, labels=None, fit_tokenizer=True):
        if fit_tokenizer or self.tokenizer is None:
            self.tokenizer = Tokenizer(num_words=self.vocab_size, oov_token='<OOV>')
            self.tokenizer.fit_on_texts(texts)
        sequences = self.tokenizer.texts_to_sequences(texts)
        X = pad_sequences(sequences, maxlen=self.max_length, padding='post', truncating='post')
        y = None
        if labels is not None:
            # Fit LabelEncoder only if it hasn't been fitted yet or if fit_tokenizer is True
            if fit_tokenizer or not hasattr(self.label_encoder, 'classes_'):
                self.label_encoder.fit(labels)
            y = self.label_encoder.transform(labels)
        return X, y

    def create_train_test_split(self, X, y, test_size=0.2, validation_size=0.1, stratify=None): # Modified to accept stratify
        X_temp, X_test, y_temp, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42, stratify=stratify # Modified to use stratify argument
        )
        val_size_adjusted = validation_size / (1 - test_size)
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=val_size_adjusted, random_state=42, stratify=stratify # Modified to use stratify argument
        )
        return X_train, X_val, X_test, y_train, y_val, y_test

# Data Preparation for Sentiment Analysis
data_prep_sentiment = DataPreparation(vocab_size=10000, max_length=150)
sentiment_texts = sentiment_df['processed_text'].fillna('').tolist()
sentiment_labels = sentiment_df['sentiment'].tolist() if 'sentiment' in sentiment_df.columns else ['neutral'] * len(sentiment_texts)
X_sentiment, y_sentiment = data_prep_sentiment.prepare_text_data(sentiment_texts, sentiment_labels, fit_tokenizer=True)

# Check if using sample data and adjust stratify accordingly
if len(sentiment_df) <= 5: # Assuming sample data has 5 rows or less
    X_train_sent, X_val_sent, X_test_sent, y_train_sent, y_val_sent, y_test_sent = data_prep_sentiment.create_train_test_split(
        X_sentiment, y_sentiment, stratify=None # Set stratify to None for sample data
    )
else:
     X_train_sent, X_val_sent, X_test_sent, y_train_sent, y_val_sent, y_test_sent = data_prep_sentiment.create_train_test_split(
        X_sentiment, y_sentiment, stratify=y_sentiment # Use stratify for larger datasets
    )


# Data Preparation for Fake News Detection
data_prep_fake = DataPreparation(vocab_size=data_prep_sentiment.vocab_size, max_length=data_prep_sentiment.max_length)
data_prep_fake.tokenizer = data_prep_sentiment.tokenizer # Use the same tokenizer
if 'processed_text' in fake_news_df.columns:
    fake_texts = fake_news_df['processed_text'].fillna('').tolist()
    fake_labels = fake_news_df['label'].tolist() if 'label' in fake_news_df.columns else [0] * len(fake_texts)

    X_fake, y_fake = data_prep_fake.prepare_text_data(fake_texts, fake_labels, fit_tokenizer=False)

    # Check if using sample data and adjust stratify accordingly
    if len(fake_news_df) <= 5: # Assuming sample data has 5 rows or less
        X_train_fake, X_val_fake, X_test_fake, y_train_fake, y_val_fake, y_test_fake = data_prep_fake.create_train_test_split(
            X_fake, y_fake, stratify=None # Set stratify to None for sample data
        )
    else:
        X_train_fake, X_val_fake, X_test_fake, y_train_fake, y_val_fake, y_test_fake = data_prep_fake.create_train_test_split(
            X_fake, y_fake, stratify=y_fake # Use stratify for larger datasets
        )

else:
    print("Fake news data not available, using sample data")
    X_train_fake = X_val_fake = X_test_fake = np.array([[1, 2, 3]])
    y_train_fake = y_val_fake = y_test_fake = np.array([0])


print(f"Sentiment data - Train: {X_train_sent.shape[0]}, Val: {X_val_sent.shape[0]}, Test: {X_test_sent.shape[0]}")
print(f"Fake news data - Train: {X_train_fake.shape[0]}, Val: {X_val_fake.shape[0]}, Test: {X_test_fake.shape[0]}")
print(f"Vocabulary size: {len(data_prep_sentiment.tokenizer.word_index)}")
print(f"Sequence length: {data_prep_sentiment.max_length}")

# Model Training with Callbacks
class ModelTrainer:
    def __init__(self):
        self.history = {}
        self.trained_models = {}

    def get_callbacks(self, patience=5, min_delta=0.001):
        return [
            EarlyStopping(monitor='val_loss', patience=patience, min_delta=min_delta, restore_best_weights=True, verbose=1),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
        ]

    def train_model(self, model, X_train, y_train, X_val, y_val, model_name='model', epochs=50, batch_size=32):
        print(f"Training {model_name}...")
        num_classes = len(np.unique(y_train))
        loss_fn = 'binary_crossentropy' if num_classes == 2 else 'sparse_categorical_crossentropy'
        metrics = ['accuracy']
        model.compile(optimizer='adam', loss=loss_fn, metrics=metrics)

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=self.get_callbacks(),
            verbose=1
        )
        self.history[model_name] = history
        self.trained_models[model_name] = model
        print(f"{model_name} training completed!")
        return history

    def plot_training_history(self, model_names=None):
        if model_names is None:
            model_names = list(self.history.keys())
        fig, axs = plt.subplots(1, 2, figsize=(15,5))

        for name in model_names:
            h = self.history[name]
            axs[0].plot(h.history['accuracy'], label=f"{name} Train")
            axs[0].plot(h.history['val_accuracy'], label=f"{name} Val")
            axs[1].plot(h.history['loss'], label=f"{name} Train")
            axs[1].plot(h.history['val_loss'], label=f"{name} Val")
        axs[0].set_title("Accuracy")
        axs[0].legend()
        axs[1].set_title("Loss")
        axs[1].legend()
        plt.show()

trainer = ModelTrainer()
nn_architectures_sentiment = NeuralArchitectures(vocab_size=data_prep_sentiment.vocab_size, max_length=data_prep_sentiment.max_length)
nn_architectures_fake = NeuralArchitectures(vocab_size=data_prep_fake.vocab_size, max_length=data_prep_fake.max_length)


# Train sentiment models (Example: LSTM, BiLSTM, Hybrid CNN-BiLSTM)
lstm_sentiment = nn_architectures_sentiment.create_lstm_model(num_classes=len(data_prep_sentiment.label_encoder.classes_))
trainer.train_model(lstm_sentiment, X_train_sent, y_train_sent, X_val_sent, y_val_sent, model_name='LSTM_Sentiment', epochs=10, batch_size=32)

bilstm_sentiment = nn_architectures_sentiment.create_bilstm_model(num_classes=len(data_prep_sentiment.label_encoder.classes_))
trainer.train_model(bilstm_sentiment, X_train_sent, y_train_sent, X_val_sent, y_val_sent, model_name='BiLSTM_Sentiment', epochs=10, batch_size=32)

hybrid_sentiment = nn_architectures_sentiment.create_hybrid_cnn_bilstm_model(num_classes=len(data_prep_sentiment.label_encoder.classes_))
trainer.train_model(hybrid_sentiment, X_train_sent, y_train_sent, X_val_sent, y_val_sent, model_name='Hybrid_CNN_BiLSTM', epochs=10, batch_size=32)

trainer.plot_training_history(model_names=['LSTM_Sentiment', 'BiLSTM_Sentiment', 'Hybrid_CNN_BiLSTM'])

# Train fake news models (Example: LSTM, BiLSTM, Hybrid CNN-BiLSTM)
lstm_fake = nn_architectures_fake.create_lstm_model(num_classes=len(data_prep_fake.label_encoder.classes_))
trainer.train_model(lstm_fake, X_train_fake, y_train_fake, X_val_fake, y_val_fake, model_name='LSTM_Fake', epochs=10, batch_size=32)

bilstm_fake = nn_architectures_fake.create_bilstm_model(num_classes=len(data_prep_fake.label_encoder.classes_))
trainer.train_model(bilstm_fake, X_train_fake, y_train_fake, X_val_fake, y_val_fake, model_name='BiLSTM_Fake', epochs=10, batch_size=32)

hybrid_fake = nn_architectures_fake.create_hybrid_cnn_bilstm_model(num_classes=len(data_prep_fake.label_encoder.classes_))
trainer.train_model(hybrid_fake, X_train_fake, y_train_fake, X_val_fake, y_val_fake, model_name='Hybrid_CNN_BiLSTM_Fake', epochs=10, batch_size=32)

trainer.plot_training_history(model_names=['LSTM_Fake', 'BiLSTM_Fake', 'Hybrid_CNN_BiLSTM_Fake'])


# Model Evaluation
class ModelEvaluator:
    def __init__(self):
        self.evaluation_results = {}

    def evaluate_model(self, model, X_test, y_test, model_name='model'):
        print(f"Evaluating {model_name}...")
        y_pred_proba = model.predict(X_test, verbose=0)
        if len(y_pred_proba.shape) > 1 and y_pred_proba.shape[1] > 1:
            y_pred = np.argmax(y_pred_proba, axis=1)
            is_binary = False
        else:
            y_pred = (y_pred_proba > 0.5).astype(int).flatten()
            is_binary = True

        accuracy = accuracy_score(y_test, y_pred)
        if is_binary:
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
        else:
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')

        self.evaluation_results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'y_true': y_test,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba,
        }
        print(f"{model_name} evaluation completed!")
        print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")
        return self.evaluation_results[model_name]

    def plot_confusion_matrices(self, model_names=None):
        if model_names is None:
            model_names = list(self.evaluation_results.keys())
        n = len(model_names)
        fig, axs = plt.subplots(1, n, figsize=(5*n, 4))
        if n == 1:
            axs = [axs]
        for i, name in enumerate(model_names):
            results = self.evaluation_results[name]
            cm = confusion_matrix(results['y_true'], results['y_pred'])
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axs[i])
            axs[i].set_title(f"{name} Confusion Matrix")
            axs[i].set_xlabel("Predicted")
            axs[i].set_ylabel("Actual")
        plt.show()

evaluator = ModelEvaluator()

# Evaluate sentiment models
for model_name in ['LSTM_Sentiment', 'BiLSTM_Sentiment', 'Hybrid_CNN_BiLSTM']:
    evaluator.evaluate_model(trainer.trained_models[model_name], X_test_sent, y_test_sent, model_name)
evaluator.plot_confusion_matrices(model_names=['LSTM_Sentiment', 'BiLSTM_Sentiment', 'Hybrid_CNN_BiLSTM'])

# Evaluate fake news models
for model_name in ['LSTM_Fake', 'BiLSTM_Fake', 'Hybrid_CNN_BiLSTM_Fake']:
    evaluator.evaluate_model(trainer.trained_models[model_name], X_test_fake, y_test_fake, model_name)
evaluator.plot_confusion_matrices(model_names=['LSTM_Fake', 'BiLSTM_Fake', 'Hybrid_CNN_BiLSTM_Fake'])


# Explainable AI Implementation
class ExplainableAI:
    def __init__(self, tokenizer, label_encoder=None):
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder
        self.lime_explainer = LimeTextExplainer(class_names=label_encoder.classes_ if label_encoder is not None else ['negative', 'neutral', 'positive'])

    def create_prediction_function(self, model, max_length=150):
        def predict_fn(texts):
            sequences = self.tokenizer.texts_to_sequences(texts)
            padded = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
            preds = model.predict(padded, verbose=0)
            if len(preds.shape) == 1:
                proba = np.array([1 - preds, preds]).T
            else:
                proba = preds
            return proba
        return predict_fn

    def explain_prediction_lime(self, model, text, num_features=10, max_length=150):
        # Re-initialize explainer with correct class names based on the model's task
        class_names = self.label_encoder.classes_.tolist() if self.label_encoder is not None else ['negative', 'neutral', 'positive']
        self.lime_explainer = LimeTextExplainer(class_names=class_names)

        predict_fn = self.create_prediction_function(model, max_length)
        explanation = self.lime_explainer.explain_instance(text, predict_fn, num_features=num_features)
        return explanation

    def visualize_lime_explanation(self, explanation, save_path=None):
        exp_list = explanation.as_list()
        words = [item[0] for item in exp_list]
        weights = [item[1] for item in exp_list]
        colors = ['red' if w < 0 else 'green' for w in weights]

        plt.figure(figsize=(10,6))
        bars = plt.barh(words, weights, color=colors, alpha=0.7)
        plt.xlabel('Feature Importance')
        plt.title('LIME Explanation - Word Importance')
        plt.axvline(x=0, color='black', linestyle='-', alpha=0.3)

        for bar, weight in zip(bars, weights):
            plt.text(weight + (0.01 if weight >= 0 else -0.01), bar.get_y() + bar.get_height()/2,
                     f'{weight:.3f}', ha='left' if weight >= 0 else 'right', va='center')
        plt.tight_layout()
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.show()

# Explainable AI for Sentiment Analysis
explainer_sentiment = ExplainableAI(data_prep_sentiment.tokenizer, data_prep_sentiment.label_encoder)

# Example usage for explanation for sentiment analysis
sample_text_sentiment = "The government has announced a new policy that positively impacts economic growth."
sample_processed_sentiment = preprocessor.preprocess_text(sample_text_sentiment)['processed_text']
explanation_sentiment = explainer_sentiment.explain_prediction_lime(trainer.trained_models['Hybrid_CNN_BiLSTM'], sample_processed_sentiment)
explainer_sentiment.visualize_lime_explanation(explanation_sentiment)

# Explainable AI for Fake News Detection
explainer_fake = ExplainableAI(data_prep_fake.tokenizer, data_prep_fake.label_encoder)

# Example usage for explanation for fake news detection
sample_text_fake = "Breaking: Scientists discover cure for all diseases"
sample_processed_fake = preprocessor.preprocess_text(sample_text_fake)['processed_text']
explanation_fake = explainer_fake.explain_prediction_lime(trainer.trained_models['Hybrid_CNN_BiLSTM_Fake'], sample_processed_fake)
explainer_fake.visualize_lime_explanation(explanation_fake)


# Continuous Learning Mechanism
class ContinuousLearning:
    def __init__(self, base_model, tokenizer, label_encoder, learning_rate=0.0001):
        self.base_model = base_model
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder
        self.learning_rate = learning_rate
        self.update_history = []
        self.performance_tracking = []

    def incremental_update(self, new_texts, new_labels, epochs=3, batch_size=16, validation_split=0.2):
        print(f"Performing incremental update with {len(new_texts)} new samples...")
        sequences = self.tokenizer.texts_to_sequences(new_texts)
        X_new = pad_sequences(sequences, maxlen=150, padding='post', truncating='post')
        y_new = self.label_encoder.transform(new_labels)
        self.base_model.optimizer.learning_rate = self.learning_rate
        history = self.base_model.fit(
            X_new, y_new,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            verbose=1
        )
        update_info = {
            'timestamp': pd.Timestamp.now(),
            'num_samples': len(new_texts),
            'final_loss': history.history['loss'][-1],
            'final_accuracy': history.history['accuracy'][-1],
        }
        self.update_history.append(update_info)
        print("Incremental update completed!")
        print(f"Final Loss: {update_info['final_loss']:.4f}")
        print(f"Final Accuracy: {update_info['final_accuracy']:.4f}")
        return history

    def evaluate_performance_drift(self, X_test, y_test, threshold=0.05):
        y_pred = np.argmax(self.base_model.predict(X_test, verbose=0), axis=1)
        current_accuracy = accuracy_score(y_test, y_pred)
        performance_info = {'timestamp': pd.Timestamp.now(), 'accuracy': current_accuracy}
        self.performance_tracking.append(performance_info)
        if len(self.performance_tracking) > 1:
            previous_accuracy = self.performance_tracking[-2]['accuracy']
            drift = abs(current_accuracy - previous_accuracy)
            if drift > threshold:
                print(f"Performance drift detected: {drift:.4f}")
                return True, drift
            else:
                print(f"Performance stable: {drift:.4f}")
                return False, drift
        return False, 0.0

# Continuous Learning for Sentiment Analysis
continuous_learner_sentiment = ContinuousLearning(base_model=trainer.trained_models['Hybrid_CNN_BiLSTM'], tokenizer=data_prep_sentiment.tokenizer, label_encoder=data_prep_sentiment.label_encoder)

# Example incremental learning simulation (with dummy new data for sentiment analysis)
new_texts_sentiment = [
    "The economy is improving steadily with new policies.",
    "Many people are skeptical about the government's new plan.",
    "This is a bad decision and hurts citizens greatly."
]
new_processed_sentiment = [preprocessor.preprocess_text(text)['processed_text'] for text in new_texts_sentiment]
new_labels_sentiment = ['positive', 'neutral', 'negative']

continuous_learner_sentiment.incremental_update(new_processed_sentiment, new_labels_sentiment, epochs=2)

# Continuous Learning for Fake News Detection
continuous_learner_fake = ContinuousLearning(base_model=trainer.trained_models['Hybrid_CNN_BiLSTM_Fake'], tokenizer=data_prep_fake.tokenizer, label_encoder=data_prep_fake.label_encoder)

# Example incremental learning simulation (with dummy new data for fake news detection)
new_texts_fake = [
    "New study shows coffee cures cancer",
    "Local hero saves cat from tree",
    "Politician resigns amid scandal"
]
new_processed_fake = [preprocessor.preprocess_text(text)['processed_text'] for text in new_texts_fake]
new_labels_fake = [1, 0, 0] # 1 = fake, 0 = real

continuous_learner_fake.incremental_update(new_processed_fake, new_labels_fake, epochs=2)


print("Complete NLP system source code ready for execution in Google Colab.")