In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def detect_ai(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return {"real": float(scores[0][0]), "fake": float(scores[0][1])}

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of the model checkpoint at roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS 

In [4]:
txt = "Un élève suspecté d'avoir frappé à coups de couteau une surveillante d'un collège à Nogent (Haute-Marne), alors que des gendarmes procédaient à un contrôle des sacs aux abords de l'établissement, a été arrêté et placé en garde à vue mardi 10 juin, a appris France Télévisions auprès de la gendarmerie. La préfecture de Haute-Marne a annoncé que l'adolescent a 'blessé grièvement une assistante d'éducation' et précise que la victime âgée de 31 ans est en 'urgence absolue'.  Elle est actuellement prise en charge par le Samu, sur place. Un gendarme a été légèrement blessé par le couteau au moment de l'interpellation du suspect, précisent les gendarmes à France Télévisions. Les 324 élèves de l'établissement ont été confinés, ajoute la préfecture. La ministre de l'Education nationale Elisabeth Borne et la préfète de Haute-Marne ont annoncé se rendre sur place. J'exprime tout mon soutien à la victime et à ses proches, écrit Elisabeth Borne sur X(Nouvelle fenêtre)"

In [5]:
detect_ai(txt)

{'real': 0.00018289859872311354, 'fake': 0.9998170733451843}

In [9]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch
import math

# 🧠 Utilise un modèle GPT-2 francophone
model_name = "asi/gpt-fr-cased-small"  # version allégée de GPT2 pour français
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

def compute_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
    perplexity = math.exp(loss.item())
    return perplexity

In [10]:
compute_perplexity(txt)

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


27.519954659293212

In [None]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch, math, time
import pandas as pd

# Cell 2 – Modèles à tester
model_names = [
    "asi/gpt-fr-cased-small",
    "asi/gpt-fr-cased-base",
    "dbddv01/gpt2-french-small",
    "ClassCat/gpt2-base-french",
    "antoiloui/belgpt2"
]
models = {}

# Cell 3 – Chargement des modèles
for name in model_names:
    print(f"Chargement de {name}…")
    tokenizer = GPT2TokenizerFast.from_pretrained(name)
    model = GPT2LMHeadModel.from_pretrained(name)
    model.eval()
    models[name] = (tokenizer, model)

# Cell 4 – Fonction de perplexité
def compute_perplexity(tokenizer, model, text, max_length=1024):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_length)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss.item()
    return math.exp(loss)

# Cell 5 – Textes d’exemple
texte_humain = "Aujourd'hui, les élèves sont allés au musée pour découvrir l'histoire de l'art moderne."
texte_ia = "La planète est un système complexe où les interactions entre les éléments naturels créent des dynamiques évolutives permanentes."

# Cell 6 – Calcul des perplexités
results = []
for name, (tok, mod) in models.items():
    for label, txt in [("humain", texte_humain), ("ia", texte_ia)]:
        start = time.time()
        ppl = compute_perplexity(tok, mod, txt)
        elapsed = (time.time() - start) * 1000
        results.append({
            "modèle": name,
            "texte": label,
            "perplexité": ppl,
            "durée_ms": int(elapsed)
        })

df = pd.DataFrame(results)
pivot = df.pivot(index="modèle", columns="texte", values=["perplexité", "durée_ms"])
pivot


  from .autonotebook import tqdm as notebook_tqdm


Chargement de asi/gpt-fr-cased-small…
Chargement de asi/gpt-fr-cased-base…
Chargement de dbddv01/gpt2-french-small…
Chargement de ClassCat/gpt2-base-french…
Chargement de antoiloui/belgpt2…


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Unnamed: 0_level_0,perplexité,perplexité,durée_ms,durée_ms
texte,humain,ia,humain,ia
modèle,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
ClassCat/gpt2-base-french,6.839248,27.356639,161.0,50.0
antoiloui/belgpt2,112.966284,31.720293,168.0,48.0
asi/gpt-fr-cased-base,8.083413,18.63375,993.0,337.0
asi/gpt-fr-cased-small,9.382521,33.789664,131.0,50.0
dbddv01/gpt2-french-small,16.381933,83.377777,2800.0,50.0
