In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def detect_ai(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return {"real": float(scores[0][0]), "fake": float(scores[0][1])}

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of the model checkpoint at roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS 

In [4]:
txt = "Un √©l√®ve suspect√© d'avoir frapp√© √† coups de couteau une surveillante d'un coll√®ge √† Nogent (Haute-Marne), alors que des gendarmes proc√©daient √† un contr√¥le des sacs aux abords de l'√©tablissement, a √©t√© arr√™t√© et plac√© en garde √† vue mardi 10 juin, a appris France T√©l√©visions aupr√®s de la gendarmerie. La pr√©fecture de Haute-Marne a annonc√© que l'adolescent a 'bless√© gri√®vement une assistante d'√©ducation' et pr√©cise que la victime √¢g√©e de 31 ans est en 'urgence absolue'.  Elle est actuellement prise en charge par le Samu, sur place. Un gendarme a √©t√© l√©g√®rement bless√© par le couteau au moment de l'interpellation du suspect, pr√©cisent les gendarmes √† France T√©l√©visions. Les 324 √©l√®ves de l'√©tablissement ont √©t√© confin√©s, ajoute la pr√©fecture. La ministre de l'Education nationale Elisabeth Borne et la pr√©f√®te de Haute-Marne ont annonc√© se rendre sur place. J'exprime tout mon soutien √† la victime et √† ses proches, √©crit Elisabeth Borne sur X(Nouvelle fen√™tre)"

In [5]:
detect_ai(txt)

{'real': 0.00018289859872311354, 'fake': 0.9998170733451843}

In [9]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch
import math

# üß† Utilise un mod√®le GPT-2 francophone
model_name = "asi/gpt-fr-cased-small"  # version all√©g√©e de GPT2 pour fran√ßais
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

def compute_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
    perplexity = math.exp(loss.item())
    return perplexity

In [10]:
compute_perplexity(txt)

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


27.519954659293212

In [None]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch, math, time
import pandas as pd

# Cell 2 ‚Äì Mod√®les √† tester
model_names = [
    "asi/gpt-fr-cased-small",
    "asi/gpt-fr-cased-base",
    "dbddv01/gpt2-french-small",
    "ClassCat/gpt2-base-french",
    "antoiloui/belgpt2"
]
models = {}

# Cell 3 ‚Äì Chargement des mod√®les
for name in model_names:
    print(f"Chargement de {name}‚Ä¶")
    tokenizer = GPT2TokenizerFast.from_pretrained(name)
    model = GPT2LMHeadModel.from_pretrained(name)
    model.eval()
    models[name] = (tokenizer, model)

# Cell 4 ‚Äì Fonction de perplexit√©
def compute_perplexity(tokenizer, model, text, max_length=1024):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_length)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss.item()
    return math.exp(loss)

# Cell 5 ‚Äì Textes d‚Äôexemple
texte_humain = "Aujourd'hui, les √©l√®ves sont all√©s au mus√©e pour d√©couvrir l'histoire de l'art moderne."
texte_ia = "La plan√®te est un syst√®me complexe o√π les interactions entre les √©l√©ments naturels cr√©ent des dynamiques √©volutives permanentes."

# Cell 6 ‚Äì Calcul des perplexit√©s
results = []
for name, (tok, mod) in models.items():
    for label, txt in [("humain", texte_humain), ("ia", texte_ia)]:
        start = time.time()
        ppl = compute_perplexity(tok, mod, txt)
        elapsed = (time.time() - start) * 1000
        results.append({
            "mod√®le": name,
            "texte": label,
            "perplexit√©": ppl,
            "dur√©e_ms": int(elapsed)
        })

df = pd.DataFrame(results)
pivot = df.pivot(index="mod√®le", columns="texte", values=["perplexit√©", "dur√©e_ms"])
pivot


  from .autonotebook import tqdm as notebook_tqdm


Chargement de asi/gpt-fr-cased-small‚Ä¶
Chargement de asi/gpt-fr-cased-base‚Ä¶
Chargement de dbddv01/gpt2-french-small‚Ä¶
Chargement de ClassCat/gpt2-base-french‚Ä¶
Chargement de antoiloui/belgpt2‚Ä¶


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Unnamed: 0_level_0,perplexit√©,perplexit√©,dur√©e_ms,dur√©e_ms
texte,humain,ia,humain,ia
mod√®le,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
ClassCat/gpt2-base-french,6.839248,27.356639,161.0,50.0
antoiloui/belgpt2,112.966284,31.720293,168.0,48.0
asi/gpt-fr-cased-base,8.083413,18.63375,993.0,337.0
asi/gpt-fr-cased-small,9.382521,33.789664,131.0,50.0
dbddv01/gpt2-french-small,16.381933,83.377777,2800.0,50.0
