# Generate AI Texts  from Previous AI-generated Texts using Stealth GPT

In [None]:
import json
import requests
import os
from tqdm import tqdm
from getpass import getpass

# ======== Configure paths and token ========
input_file = '' # JSON FILE FROM AI-GENERATED TEXT
output_file = ''

# Securely prompt for the API token
api_token = getpass("Please enter your StealthGPT API token: ")
if not api_token:
    raise ValueError("A StealthGPT API token is required to run this script.")

API_URL = 'https://stealthgpt.ai/api/stealthify'
MAX_PROMPT_LENGTH = 3000

# ======== Load existing output data (if it exists) ========
if os.path.exists(output_file):
    with open(output_file, 'r', encoding='utf-8') as f:
        # Create a dictionary lookup for entries that have already been processed
        processed_lookup = {entry.get('topic'): entry for entry in json.load(f)}
    print(f"Loaded {len(processed_lookup)} entries from existing output file.")
else:
    processed_lookup = {}

# ======== Load the main input data ========
with open(input_file, 'r') as f:
    input_data = json.load(f)

# The final list of data we will save
final_data = list(processed_lookup.values())

# ======== Process entries one by one ========
for entry in tqdm(input_data, desc="Processing and rephrasing entries"):
    topic = entry.get('topic')
    
    # Decide whether to process this entry as new or as an update
    if topic in processed_lookup:
        # --- UPDATE LOGIC: Entry exists, check for failed 'None' values ---
        existing_entry = processed_lookup[topic]
        rephrased_texts = existing_entry.get("stealthgpt_rephrased", {})
        was_updated = False
        
        for model_name, rephrased_text in rephrased_texts.items():
            if rephrased_text is None:
                # This specific model failed before, so we retry it
                original_text = entry.get("ai_generated", {}).get(model_name)
                if not original_text: continue

                # (API call logic is duplicated here for clarity)
                if len(original_text) > MAX_PROMPT_LENGTH:
                    original_text = original_text[:MAX_PROMPT_LENGTH]
                payload = {"prompt": original_text, "rephrase": True, "tone": "Standard", "mode": "Medium"}
                headers = {"api-token": api_token, "Content-Type": "application/json"}
                
                try:
                    response = requests.post(API_URL, headers=headers, json=payload)
                    response.raise_for_status()
                    result = response.json().get("result", "")
                    existing_entry["stealthgpt_rephrased"][model_name] = result
                    was_updated = True
                    print(f"\n✅ Retried and rephrased '{model_name}' for topic: {topic[:40]}...")
                except requests.RequestException as e:
                    print(f"\n❌ Retry failed for '{model_name}' on topic: {topic[:40]}...: {e}")
        
        if was_updated:
            # If we made an update, save progress immediately
            with open(output_file, 'w') as f:
                json.dump(final_data, f, ensure_ascii=False, indent=2)

    else:
        # --- NEW ENTRY LOGIC: Process this entry for the first time ---
        ai_texts = entry.get("ai_generated", {})
        if not ai_texts: continue

        entry.setdefault("stealthgpt_rephrased", {})
        
        for model_name, original_text in ai_texts.items():
            if not original_text: continue

            if len(original_text) > MAX_PROMPT_LENGTH:
                original_text = original_text[:MAX_PROMPT_LENGTH]
            payload = {"prompt": original_text, "rephrase": True, "tone": "Standard", "mode": "Medium"}
            headers = {"api-token": api_token, "Content-Type": "application/json"}

            try:
                response = requests.post(API_URL, headers=headers, json=payload)
                response.raise_for_status()
                result = response.json().get("result", "")
                entry["stealthgpt_rephrased"][model_name] = result
                print(f"\n✅ Processed '{model_name}' for new topic: {topic[:40]}...")
            except requests.RequestException as e:
                print(f"\n❌ Failed to process '{model_name}' for new topic: {topic[:40]}...: {e}")
                entry["stealthgpt_rephrased"][model_name] = None
        
        # Add the newly processed entry to our data and save
        final_data.append(entry)
        with open(output_file, 'w') as f:
            json.dump(final_data, f, ensure_ascii=False, indent=2)

print(f"\n✅ All done! Final results saved to: {output_file}")

# Use Five AI Detectors (4 API and 1 fine-tuned model) to Detect Stealth GPT Texts and Provide Score

In [None]:
# merge all ai-detector, and remain default prob
# Multi-detector AI text analysis with default prob
import json
import requests
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm import tqdm

from getpass import getpass

# === Prompt for API keys securely ===
api_keys = {
    "pengram": getpass("Enter Pengram API Key: "),
    "originality": getpass("Enter Originality.AI API Key: "),
    "gptzero": getpass("Enter GPTZero API Key: ")
}

print("✅ API keys loaded for this session.")

# === Pangram 检测函数 ===
def detect_ai_pangram(text):
    if not text.strip():
        return None
    headers = {
        "Content-Type": "application/json",
        "x-api-key": api_keys["pengram"]
    }
    payload = {"text": text}
    try:
        response = requests.post("https://text.api.pangramlabs.com", headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()

        likelihood = result.get("ai_likelihood")
        prediction = result.get("prediction")

        return {
            "ai_likelihood": likelihood,
            "prediction": prediction
        }
    except Exception as e:
        return {"error": str(e)}

# === Originality.ai wrapper ===
def detect_ai_originality(text):
    if not text.strip():
        return None
    try:
        headers = {
            "X-OAI-API-KEY": api_keys["originality"],
            "Content-Type": "application/json"
        }
        payload = {
            "check_ai": True,
            "check_plagiarism": False,
            "check_facts": False,
            "check_readability": False,

            "check_grammar": False,
            "check_contentOptimizer": False,
            "storeScan": False,
            "aiModelVersion": "lite",
            "content": text
        }
        response = requests.post("https://api.originality.ai/api/v3/scan", headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        classification = result.get("results", {}).get("ai", {}).get("classification", {})
        confidence = result.get("results", {}).get("ai", {}).get("confidence", {})
        return {
            "classification": {
                "AI": classification.get("AI"),
                "Original": classification.get("Original")
            },
            "confidence": {
                "AI": confidence.get("AI"),
                "Original": confidence.get("Original")
            }
        }
    except Exception as e:
        return {"error": str(e)}

# === GPTZero detector wrapper ===
def detect_ai_gptzero(text):
    if not text.strip():
        return None
    try:
        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "x-api-key": api_keys["gptzero"]
        }
        payload = {
            "document": text,
            "multilingual": False
        }
        response = requests.post(
            "https://api.gptzero.me/v2/predict/text",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        doc = response.json().get("documents", [{}])[0]
        return {
            "average_generated_prob": doc.get("average_generated_prob")
        }
    except Exception as e:
        return {"error": str(e)}

# === Load RoBERTa-based OpenAI Detector ===
MODEL_NAME = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
detector = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).eval()

# === RoBERTa detector function ===
def detect_ai_roberta(text):
    if not text.strip():
        return None
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
    with torch.no_grad():
        logits = detector(**inputs).logits
    prob_ai = torch.softmax(logits, dim=-1)[0, 1].item()
    return prob_ai

# === Load input data ===
input_path = "" ## JSON FILE FROM STEALTH GPT GENERATED TEXT
# --- NEW: Define the output path once at the top ---
output_path = input_path.replace(".json", "_all_detectors.json")
with open(input_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# === NEW: Define how often to save the file (e.g., every 50 items) ===
SAVE_INTERVAL = 50

# 统计信息
total_items = len(data)
skipped_items = 0
processed_items = 0

# === Run all detectors ===
for item in tqdm(data, desc="Running all AI detectors"):
    # 检查是否已有检测结果，如果有则跳过
    if "human_verdict" in item and "ai_verdicts" in item and all(model in item["ai_verdicts"] for model in item.get("stealthgpt_rephrased", {})):
        skipped_items += 1
        continue

    human_text = item.get("text", "")

    # Initialize verdict structure if needed
    item.setdefault("human_verdict", {})

    # Add results from each detector
    item["human_verdict"]["pengram"] = detect_ai_pangram(human_text)
    item["human_verdict"]["originality"] = detect_ai_originality(human_text)
    item["human_verdict"]["gptzero"] = detect_ai_gptzero(human_text)
    item["human_verdict"]["roberta-base-detector"] = detect_ai_roberta(human_text)

    # Process AI-generated texts
    ai_texts = item.get("stealthgpt_rephrased", {})
    item.setdefault("ai_verdicts", {})

    for model_name, text in ai_texts.items():
        if not text:
            continue
        item["ai_verdicts"].setdefault(model_name, {})
        item["ai_verdicts"][model_name]["pengram"] = detect_ai_pangram(text)
        item["ai_verdicts"][model_name]["originality"] = detect_ai_originality(text)
        item["ai_verdicts"][model_name]["gptzero"] = detect_ai_gptzero(text)
        item["ai_verdicts"][model_name]["roberta-base-detector"] = detect_ai_roberta(text)

    processed_items += 1

    # --- NEW: Automatically save progress at the specified interval ---
    if processed_items > 0 and processed_items % SAVE_INTERVAL == 0:
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        # Using tqdm.write is better than print() here as it doesn't break the progress bar
        tqdm.write(f"💾 Progress saved! ({processed_items} items processed)")


# === Final Save Output ===
# This final save ensures that any remaining items are saved after the loop finishes.
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"✅ Multi-detector analysis completed.")
print(f"Total items: {total_items}")
print(f"Skipped items (already processed): {skipped_items}")
print(f"Processed items: {processed_items}")
print(f"Results saved to: {output_path}")

Enter Pengram API Key:  ········
Enter Originality.AI API Key:  ········
Enter GPTZero API Key:  ········


✅ API keys loaded for this session.


Some weights of the model checkpoint at roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Running all AI detectors:   3%|▎         | 50/1992 [05:10<4:38:37,  8.61s/it]

💾 Progress saved! (50 items processed)


Running all AI detectors:   5%|▌         | 100/1992 [11:22<3:40:20,  6.99s/it]

💾 Progress saved! (100 items processed)


Running all AI detectors:   8%|▊         | 150/1992 [16:19<2:24:31,  4.71s/it]

💾 Progress saved! (150 items processed)


Running all AI detectors:  10%|█         | 200/1992 [19:55<2:21:49,  4.75s/it]

💾 Progress saved! (200 items processed)


Running all AI detectors:  13%|█▎        | 250/1992 [23:38<2:35:31,  5.36s/it]

💾 Progress saved! (250 items processed)


Running all AI detectors:  15%|█▌        | 300/1992 [27:12<2:19:00,  4.93s/it]

💾 Progress saved! (300 items processed)


Running all AI detectors:  18%|█▊        | 350/1992 [30:26<1:58:07,  4.32s/it]

💾 Progress saved! (350 items processed)


Running all AI detectors:  20%|██        | 400/1992 [33:52<1:43:11,  3.89s/it]

💾 Progress saved! (400 items processed)


Running all AI detectors:  23%|██▎       | 450/1992 [39:29<3:12:11,  7.48s/it]

💾 Progress saved! (450 items processed)


Running all AI detectors:  25%|██▌       | 500/1992 [45:25<3:04:58,  7.44s/it]

💾 Progress saved! (500 items processed)


Running all AI detectors:  28%|██▊       | 550/1992 [48:35<1:25:09,  3.54s/it]

💾 Progress saved! (550 items processed)


Running all AI detectors:  30%|███       | 600/1992 [52:01<1:41:48,  4.39s/it]

💾 Progress saved! (600 items processed)


Running all AI detectors:  33%|███▎      | 650/1992 [57:55<2:36:47,  7.01s/it]

💾 Progress saved! (650 items processed)


Running all AI detectors:  35%|███▌      | 700/1992 [1:03:15<1:37:37,  4.53s/it]

💾 Progress saved! (700 items processed)


Running all AI detectors:  38%|███▊      | 750/1992 [1:08:09<1:39:28,  4.81s/it]

💾 Progress saved! (750 items processed)


Running all AI detectors:  40%|████      | 800/1992 [1:12:39<2:40:14,  8.07s/it]

💾 Progress saved! (800 items processed)


Running all AI detectors:  43%|████▎     | 850/1992 [1:17:25<1:26:15,  4.53s/it]

💾 Progress saved! (850 items processed)


Running all AI detectors:  45%|████▌     | 900/1992 [1:21:51<1:45:28,  5.79s/it]

💾 Progress saved! (900 items processed)


Running all AI detectors:  48%|████▊     | 950/1992 [1:26:32<2:14:22,  7.74s/it]

💾 Progress saved! (950 items processed)


Running all AI detectors:  50%|█████     | 1000/1992 [1:31:46<1:52:39,  6.81s/it]

💾 Progress saved! (1000 items processed)


Running all AI detectors:  53%|█████▎    | 1050/1992 [1:38:12<1:50:04,  7.01s/it]

💾 Progress saved! (1050 items processed)


Running all AI detectors:  55%|█████▌    | 1100/1992 [1:47:47<3:05:29, 12.48s/it]

💾 Progress saved! (1100 items processed)


Running all AI detectors:  58%|█████▊    | 1150/1992 [1:55:35<1:58:07,  8.42s/it]

💾 Progress saved! (1150 items processed)


Running all AI detectors:  60%|██████    | 1200/1992 [2:03:09<1:45:39,  8.00s/it]

💾 Progress saved! (1200 items processed)


Running all AI detectors:  63%|██████▎   | 1250/1992 [2:10:05<1:38:47,  7.99s/it]

💾 Progress saved! (1250 items processed)


Running all AI detectors:  65%|██████▌   | 1300/1992 [2:18:13<2:11:22, 11.39s/it]

💾 Progress saved! (1300 items processed)


Running all AI detectors:  68%|██████▊   | 1350/1992 [2:30:23<2:44:58, 15.42s/it]

💾 Progress saved! (1350 items processed)


Running all AI detectors:  70%|███████   | 1400/1992 [2:41:04<2:56:00, 17.84s/it]

💾 Progress saved! (1400 items processed)


Running all AI detectors:  73%|███████▎  | 1450/1992 [2:49:59<1:37:19, 10.77s/it]

💾 Progress saved! (1450 items processed)


Running all AI detectors:  75%|███████▌  | 1500/1992 [2:57:26<48:26,  5.91s/it]  

💾 Progress saved! (1500 items processed)


Running all AI detectors:  78%|███████▊  | 1550/1992 [3:04:40<1:12:51,  9.89s/it]

💾 Progress saved! (1550 items processed)


Running all AI detectors:  80%|████████  | 1600/1992 [3:12:19<1:09:27, 10.63s/it]

💾 Progress saved! (1600 items processed)


Running all AI detectors:  83%|████████▎ | 1650/1992 [3:18:56<33:30,  5.88s/it]  

💾 Progress saved! (1650 items processed)


Running all AI detectors:  85%|████████▌ | 1700/1992 [3:23:58<34:41,  7.13s/it]

💾 Progress saved! (1700 items processed)


Running all AI detectors:  88%|████████▊ | 1750/1992 [3:29:01<24:29,  6.07s/it]

💾 Progress saved! (1750 items processed)


Running all AI detectors:  90%|█████████ | 1800/1992 [3:34:02<18:14,  5.70s/it]

💾 Progress saved! (1800 items processed)


Running all AI detectors:  93%|█████████▎| 1850/1992 [3:38:51<13:48,  5.83s/it]

💾 Progress saved! (1850 items processed)


Running all AI detectors:  95%|█████████▌| 1900/1992 [3:43:40<09:05,  5.92s/it]

💾 Progress saved! (1900 items processed)


Running all AI detectors:  98%|█████████▊| 1950/1992 [3:48:45<04:14,  6.07s/it]

💾 Progress saved! (1950 items processed)


Running all AI detectors: 100%|██████████| 1992/1992 [3:52:57<00:00,  7.02s/it]


✅ Multi-detector analysis completed.
Total items: 1992
Skipped items (already processed): 0
Processed items: 1992
Results saved to: generated_output_claude-opus-4-20250514_stealthgpt_all_detectors.json
