In [2]:
import torch
import math
import gradio as gr
import spacy
import pandas as pd
import numpy as np
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from collections import Counter


In [3]:
# --- MODEL SETUP ---
# We force CPU if CUDA is acting weird, but usually try CUDA first
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"‚è≥ Loading models on {device}...")

try:
    # Load GPT-2 for Perplexity
    model_id = "gpt2"
    tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
    model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
    model.eval()

    # Load spaCy for POS
    nlp = spacy.load("en_core_web_sm")
    print("‚úÖ Models loaded successfully.")
except Exception as e:
    print(f"‚ùå Error loading models: {e}")
    print("Try restarting the session again if this persists.")

‚è≥ Loading models on cpu...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

‚úÖ Models loaded successfully.


In [7]:
# --- FUNCTIONS ---

def calculate_perplexity(text):
    if not text or len(text.strip()) == 0:
        return 0
        
    encodings = tokenizer(text, return_tensors="pt")
    max_length = model.config.n_positions
    stride = 512
    seq_len = encodings.input_ids.size(1)

    nlls = []
    prev_end_loc = 0
    
    for begin_loc in range(0, seq_len, stride):
        end_loc = min(begin_loc + max_length, seq_len)
        trg_len = end_loc - prev_end_loc 
        input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
        target_ids = input_ids.clone()
        target_ids[:, :-trg_len] = -100

        with torch.no_grad():
            outputs = model(input_ids, labels=target_ids)
            neg_log_likelihood = outputs.loss

        nlls.append(neg_log_likelihood)
        prev_end_loc = end_loc
        if end_loc == seq_len:
            break

    if not nlls: return 0
    ppl = torch.exp(torch.stack(nlls).mean())
    return ppl.item()

def analyze_pos_features(text):
    doc = nlp(text)
    total_tokens = len(doc)
    if total_tokens == 0: return {}, 0
    
    counts = Counter([token.pos_ for token in doc])
    
    ratios = {
        "Noun Ratio": counts.get("NOUN", 0) / total_tokens,
        "Verb Ratio": counts.get("VERB", 0) / total_tokens,
        "Adjective Ratio": counts.get("ADJ", 0) / total_tokens,
        "Pronoun Ratio": counts.get("PRON", 0) / total_tokens,
    }
    
    probs = np.array([c / total_tokens for c in counts.values()])
    entropy = -(probs * np.log2(probs)).sum()
    
    return ratios, entropy

def detect_ai_human(text):
    if not text.strip():
        return "Please enter text.", {}, {}, "N/A"

    ppl = calculate_perplexity(text)
    ratios, entropy = analyze_pos_features(text)
    
    # Heuristic Scoring
    ai_score = 0
    human_score = 0
    
    # Perplexity Logic
    if ppl < 20: ai_score += 4
    elif ppl < 30: ai_score += 2
    elif ppl > 45: human_score += 4
    elif ppl > 35: human_score += 2
    
    # Entropy Logic
    if entropy < 3.32: ai_score += 1
    elif entropy > 3.36: human_score += 1
    
    # Ratio Logic
    if ratios["Noun Ratio"] > 0.22: ai_score += 1
    if ratios["Verb Ratio"] > 0.12: human_score += 1
    
    if ai_score > human_score: verdict = "Likely AI "
    elif human_score > ai_score: verdict = "Likely Human "
    else: verdict = "Uncertain "

    return verdict, f"{ppl:.2f}", f"{entropy:.4f}", ratios

In [None]:
# --- CORRECTED GRADIO UI CODE ---
import gradio as gr

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üïµÔ∏è AI vs Human Text Analyzer")
    gr.Markdown("Server running on Kaggle T4 GPU.")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(label="Input Text", lines=8, placeholder="Type here...")
            analyze_btn = gr.Button("Analyze", variant="primary")
        
        with gr.Column():
            lbl_verdict = gr.Label(label="Verdict")
            with gr.Row():
                # We define the label variable here as lbl_ppl
                lbl_ppl = gr.Textbox(label="Perplexity") 
                lbl_entropy = gr.Textbox(label="Entropy")
            lbl_ratios = gr.JSON(label="POS Ratios")

    analyze_btn.click(
        fn=detect_ai_human,
        inputs=input_text,
        # CORRECTED LINE BELOW: used 'lbl_ppl' instead of 'ppl'
        outputs=[lbl_verdict, lbl_ppl, lbl_entropy, lbl_ratios]
    )

# Launch with server share enabled
demo.launch(share=True, debug=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://c7d7ece413d3235e0a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
