<a href="https://colab.research.google.com/github/buriihenry/AI-detector-and-Plag-Checker/blob/master/Copy_of_AI_detector_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch sentence-transformers nltk flask shap scikit-learn


In [None]:
!pip install gradio

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    pipeline
)
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve
import logging
from tqdm import tqdm
import nltk
from typing import Dict

# Configure logging
logging.basicConfig(level=logging.INFO)
nltk.download('punkt')

class AIDetector:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.class_weights = None
        self.optimal_threshold = 0.5
        self.text_generator = None

        # Initialize models with dropout
        self.tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
        self.model = RobertaForSequenceClassification.from_pretrained(
            "roberta-base",
            num_labels=2,
            hidden_dropout_prob=0.7,
            attention_probs_dropout_prob=0.5
        ).to(self.device)

        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Configuration
        self.max_length = 512
        self.batch_size = 4  # Reduced for better gradient updates
        self.num_epochs = 5
        self.weight_decay = 0.1  # Strong L2 regularization

    def _augment_ai_texts(self, original_texts: list, num_samples: int = 1000) -> list:
        """Generate synthetic AI texts using GPT-2"""
        try:
            if not self.text_generator:
                self.text_generator = pipeline(
                    'text-generation',
                    model='gpt2-medium',
                    device=0 if torch.cuda.is_available() else -1
                )

            augmented = []
            for text in tqdm(original_texts, desc="Augmenting AI texts"):
                generated = self.text_generator(
                    text,
                    max_length=self.max_length,
                    num_return_sequences=5,
                    do_sample=True,
                    temperature=0.9,
                    top_k=50,
                    repetition_penalty=1.2
                )
                augmented.extend([g['generated_text'] for g in generated])

                if len(augmented) >= num_samples:
                    break

            return augmented[:num_samples]

        except Exception as e:
            logging.error(f"Text generation failed: {str(e)}")
            return original_texts * 200  # Fallback replication

    def load_data(self, file_path: str):
        """Load and balance dataset with synthetic AI texts"""
        try:
            # Load original data
            df = pd.read_csv(file_path)
            human_texts = df[df['generated'] == 0]['text'].tolist()
            ai_texts = df[df['generated'] == 1]['text'].tolist()

            # Generate synthetic AI texts
            logging.info(f"Original AI texts: {len(ai_texts)}")
            augmented_ai = self._augment_ai_texts(ai_texts, num_samples=len(human_texts))
            logging.info(f"Generated {len(augmented_ai)} synthetic AI texts")

            # Create balanced dataset
            balanced_texts = human_texts + augmented_ai
            balanced_labels = [0]*len(human_texts) + [1]*len(augmented_ai)

            # Stratified split
            X_train, X_temp, y_train, y_temp = train_test_split(
                balanced_texts, balanced_labels,
                test_size=0.3, stratify=balanced_labels, random_state=42
            )
            X_val, X_test, y_val, y_test = train_test_split(
                X_temp, y_temp,
                test_size=0.5, stratify=y_temp, random_state=42
            )

            # Assign to instance variables
            self.X_train, self.y_train = X_train, y_train
            self.X_val, self.y_val = X_val, y_val
            self.X_test, self.y_test = X_test, y_test

            # Calculate aggressive class weights
            pos_weight = len(y_train)/sum(y_train) if sum(y_train) > 0 else 1000
            self.class_weights = torch.tensor([1.0, pos_weight], device=self.device)

            logging.info("\nBalanced Dataset Statistics:")
            logging.info(f"Training: {len(y_train)} (AI: {sum(y_train)})")
            logging.info(f"Validation: {len(y_val)} (AI: {sum(y_val)})")
            logging.info(f"Testing: {len(y_test)} (AI: {sum(y_test)})")

        except Exception as e:
            logging.error(f"Data loading failed: {str(e)}")
            raise

    def _create_dataloader(self, texts: list, labels: list = None):
        """Create tensor dataloaders"""
        encodings = self.tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        if labels is not None:
            dataset = TensorDataset(
                encodings['input_ids'],
                encodings['attention_mask'],
                torch.tensor(labels)
            )
            shuffle = True
        else:
            dataset = TensorDataset(
                encodings['input_ids'],
                encodings['attention_mask']
            )
            shuffle = False

        return DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle)

    def train(self):
        """Training process with aggressive class weighting"""
        try:
            train_loader = self._create_dataloader(self.X_train, self.y_train)
            val_loader = self._create_dataloader(self.X_val, self.y_val)

            optimizer = torch.optim.AdamW(
                self.model.parameters(),
                lr=1e-5,
                weight_decay=self.weight_decay
            )
            criterion = nn.CrossEntropyLoss(weight=self.class_weights)

            best_f1 = 0
            patience = 0

            for epoch in range(self.num_epochs):
                # Training
                self.model.train()
                total_loss = 0
                for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
                    inputs = {
                        'input_ids': batch[0].to(self.device),
                        'attention_mask': batch[1].to(self.device),
                        'labels': batch[2].to(self.device)
                    }

                    optimizer.zero_grad()
                    outputs = self.model(**inputs)
                    loss = criterion(outputs.logits, inputs['labels'])
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                    optimizer.step()
                    total_loss += loss.item()

                # Validation
                val_probs, val_labels = [], []
                self.model.eval()
                with torch.no_grad():
                    for batch in val_loader:
                        inputs = {
                            'input_ids': batch[0].to(self.device),
                            'attention_mask': batch[1].to(self.device)
                        }
                        outputs = self.model(**inputs)
                        val_probs.extend(F.softmax(outputs.logits, dim=1)[:,1].cpu().numpy())
                        val_labels.extend(batch[2].cpu().numpy())

                # Threshold tuning
                precision, recall, thresholds = precision_recall_curve(val_labels, val_probs)
                f1_scores = 2 * (precision * recall) / (precision + recall + 1e-9)
                best_idx = np.argmax(f1_scores)
                self.optimal_threshold = thresholds[best_idx]

                current_f1 = f1_scores[best_idx]
                logging.info(f"\nEpoch {epoch+1}:")
                logging.info(f"Train Loss: {total_loss/len(train_loader):.4f}")
                logging.info(f"Val F1: {current_f1:.4f}")
                logging.info(f"Optimal Threshold: {self.optimal_threshold:.4f}")

                # Early stopping
                if current_f1 > best_f1:
                    best_f1 = current_f1
                    patience = 0
                    torch.save(self.model.state_dict(), "best_model.pt")
                else:
                    patience += 1
                    if patience >= 2:
                        logging.info("Early stopping triggered")
                        break

            # Load best model
            self.model.load_state_dict(torch.load("best_model.pt"))

        except Exception as e:
            logging.error(f"Training failed: {str(e)}")
            raise

    def evaluate(self):
        """Comprehensive evaluation with AI detection metrics"""
        test_loader = self._create_dataloader(self.X_test)
        probs, preds = [], []

        self.model.eval()
        with torch.no_grad():
            for batch in test_loader:
                inputs = {
                    'input_ids': batch[0].to(self.device),
                    'attention_mask': batch[1].to(self.device)
                }
                outputs = self.model(**inputs)
                batch_probs = F.softmax(outputs.logits, dim=1)
                probs.extend(batch_probs[:,1].cpu().numpy())
                preds.extend((batch_probs[:,1] >= self.optimal_threshold).cpu().numpy().astype(int))

        # AI Detection Metrics
        ai_indices = np.where(np.array(self.y_test) == 1)[0]
        human_indices = np.where(np.array(self.y_test) == 0)[0]

        ai_detection_rate = sum(np.array(preds)[ai_indices])/len(ai_indices)
        human_fp_rate = sum(np.array(preds)[human_indices])/len(human_indices)

        logging.info("\nFinal Evaluation:")
        logging.info(f"AI Detection Rate: {ai_detection_rate:.2%}")
        logging.info(f"Human False Positive Rate: {human_fp_rate:.2%}")
        logging.info("\nClassification Report:")
        logging.info(classification_report(self.y_test, preds))
        logging.info("\nConfusion Matrix:")
        logging.info(confusion_matrix(self.y_test, preds))

    def detect(self, text: str) -> Dict:
        """Enhanced detection with similarity checks"""
        try:
            # Model prediction
            inputs = self.tokenizer(
                text,
                return_tensors="pt",
                max_length=self.max_length,
                truncation=True
            ).to(self.device)

            with torch.no_grad():
                outputs = self.model(**inputs)
                probs = F.softmax(outputs.logits, dim=1)
                ai_prob = probs[0][1].item()

            # Similarity analysis
            text_embedding = self.sentence_model.encode([text])[0]
            ref_embeddings = self.sentence_model.encode(
                self.X_train[:500] + self.X_train[-500:]  # Balanced references
            )
            similarities = F.cosine_similarity(
                torch.tensor(text_embedding).unsqueeze(0),
                torch.tensor(ref_embeddings)
            )
            similarity_score = similarities.mean().item()

            return {
                'is_ai_generated': ai_prob >= self.optimal_threshold,
                'confidence': ai_prob,
                'similarity_score': similarity_score,
                'threshold': self.optimal_threshold
            }

        except Exception as e:
            logging.error(f"Detection error: {str(e)}")
            return {
                'error': str(e),
                'is_ai_generated': None,
                'confidence': None,
                'similarity_score': None
            }

# Usage Example

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
!ls '/content/drive/My Drive/Colab Notebooks/data_train.csv'

'/content/drive/My Drive/Colab Notebooks/data_train.csv'


In [None]:
# Usage Example
if __name__ == "__main__":
    detector = AIDetector()

    try:
        detector.load_data("/content/drive/My Drive/Colab Notebooks/data_train.csv")
        detector.train()
        detector.evaluate()

        test_text = "The rapid development of artificial intelligence has revolutionized numerous industries..."
        results = detector.detect(test_text)

        print("\nDetection Results:")
        print(f"Prediction: {'AI-Generated' if results['is_ai_generated'] else 'Human'}")
        print(f"Confidence: {results['confidence']:.2%}")
        print(f"Similarity Score: {results['similarity_score']:.2f}")
        print(f"Threshold: {results['threshold']:.4f}")

    except Exception as e:
        print(f"Pipeline error: {str(e)}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0
Augmenting AI texts:   0%|          | 0/3 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examp


Detection Results:
Prediction: AI-Generated
Confidence: 0.45%
Similarity Score: 0.13
Threshold: 0.0000


In [None]:
!pip install gradio

In [None]:

import gradio as gr

def analyze_text(text):
    """Analyze text and return formatted results"""
    if not text.strip():
        return "Please enter some text to analyze."

    results = detector.detect(text)

    # Format results into a nice output
    output = "📊 Analysis Results:\n\n"

    if 'error' in results:
        return f"Error: {results['error']}"

    output += f"🤖 AI Generated: {'Yes ✓' if results['is_ai_generated'] else 'No ✗'}\n"
    output += f"🎯 Confidence: {results['confidence']*100:.1f}%\n"
    output += f"🔄 Similarity Score: {results['similarity_score']:.2f}\n\n"

    # Add interpretation
    output += "💡 Interpretation:\n"
    if results['is_ai_generated']:
        if results['confidence'] > 0.9:
            output += "This text shows strong indicators of being AI-generated."
        else:
            output += "This text shows some indicators of being AI-generated, but with moderate confidence."
    else:
        if results['confidence'] < 0.3:
            output += "This text shows strong indicators of being human-written."
        else:
            output += "This text shows some indicators of being human-written, but with moderate confidence."

    return output

# Create the interface
iface = gr.Interface(
    fn=analyze_text,
    inputs=[
        gr.Textbox(
            lines=8,
            label="Enter text to analyze",
            placeholder="Paste or type the text you want to analyze here..."
        )
    ],
    outputs=gr.Textbox(lines=10, label="Analysis Results"),
    title="AI Text Detector",
    description="This tool analyzes text to determine if it was written by AI or a human.",
    examples=[
        ["The quick brown fox jumps over the lazy dog. This is a sample human-written text that you can use to test the detector."],
        ["In examining the fundamental principles of quantum mechanics, we observe that particles exhibit both wave-like and particle-like properties, a phenomenon known as wave-particle duality."]
    ],
    theme="default"
)

# Launch the interface
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b6544eb47a50102d12.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


