# Quickstart

In [1]:
import torch
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from textdefendr.encoder import TextEncoder

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


The dataset contains 9000 samples of attacks on Allociné + 20000 original reviews.

The `attack_name` column shows the name of the attack used, or "clean" for original texts.

The `perturbed_text` column contains the text modified by an attack, or the original text for unattacked samples.

In [3]:
df = load_dataset("baptiste-pasquier/attack-dataset", split="all").to_pandas()
df = df.sample(1000, random_state=42)
df

Using custom data configuration baptiste-pasquier--attack-dataset-c656294678469f2e
Reusing dataset csv (C:\Users\Baptiste\.cache\huggingface\datasets\baptiste-pasquier___csv\baptiste-pasquier--attack-dataset-c656294678469f2e\0.0.0\652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


Unnamed: 0,scenario,target_model,target_model_train_dataset,attack_toolchain,attack_name,target_dataset,test_index,original_text,perturbed_text,ground_truth,original_output,perturbed_output,status,num_queries,frac_words_changed
10227,sentiment,distilcamembert,allocine,none,clean,allocine,10227,C'est un très bon film qui n'est pas seulement...,C'est un très bon film qui n'est pas seulement...,1,[4.56757518e-04 9.99543242e-01],[4.56757518e-04 9.99543242e-01],clean,0,0.000000
7563,sentiment,distilcamembert,allocine,none,clean,allocine,7563,"Avec ce Parrain 3, Coppola règle ses comptes, ...","Avec ce Parrain 3, Coppola règle ses comptes, ...",1,[4.26541245e-04 9.99573459e-01],[4.26541245e-04 9.99573459e-01],clean,0,0.000000
23582,sentiment,distilcamembert,allocine,textattack,input_reduction,allocine,755,Pour ceux qui souhaiteraient prolonger le plai...,"ceux souhaiteraient """", id à d'ados aux, donc,...",0,"[0.999473512172699, 0.0005264984210953116]","[0.9982336759567261, 0.0017663395265117288]",success,319,0.355670
22798,sentiment,distilcamembert,allocine,textattack,deepwordbug,allocine,2000,Probablement le film d'espionnage le plus inte...,Probableent le film d'espionnage le plus intel...,1,"[0.0008815607288852334, 0.9991183876991272]","[0.9320080280303955, 0.06799197196960449]",success,105,0.045455
6909,sentiment,distilcamembert,allocine,none,clean,allocine,6909,"Très bon film d'horreur francais, du suspens ....","Très bon film d'horreur francais, du suspens ....",1,[9.58729705e-04 9.99041270e-01],[9.58729705e-04 9.99041270e-01],clean,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8390,sentiment,distilcamembert,allocine,none,clean,allocine,8390,Mais dans quel guêpier sont allés se fourrer c...,Mais dans quel guêpier sont allés se fourrer c...,0,[9.99811249e-01 1.88751290e-04],[9.99811249e-01 1.88751290e-04],clean,0,0.000000
15354,sentiment,distilcamembert,allocine,none,clean,allocine,15354,"Une comédie culte des années 70, l'age d'or de...","Une comédie culte des années 70, l'age d'or de...",1,[5.38780759e-04 9.99461219e-01],[5.38780759e-04 9.99461219e-01],clean,0,0.000000
843,sentiment,distilcamembert,allocine,none,clean,allocine,843,"Excellent, si j'avais un doute sur l'acteur (q...","Excellent, si j'avais un doute sur l'acteur (q...",1,[6.04682728e-04 9.99395317e-01],[6.04682728e-04 9.99395317e-01],clean,0,0.000000
14907,sentiment,distilcamembert,allocine,none,clean,allocine,14907,"On est très loin du chef-d’œuvre annoncé, cert...","On est très loin du chef-d’œuvre annoncé, cert...",0,[9.99792352e-01 2.07648342e-04],[9.99792352e-01 2.07648342e-04],clean,0,0.000000


To train a binary classification model, we can consider the binary variable that indicates whether a text comes from an attack.

In [4]:
X = df["perturbed_text"]
y = df["attack_name"] != "clean"
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Let's encode text samples with several language model embeddings.

In [5]:
encoder = TextEncoder(
    enable_tp=True, enable_lm_perplexity=True, enable_lm_proba=True, device=device
)
X_train_encoded = encoder.fit_transform(X_train)

  0%|          | 0/3 [00:00<?, ?it/s]

Encoding text properties with sentence-transformers/bert-base-nli-mean-tokens...
Encoding perplexity with gpt2...
Encoding proba and rank with roberta-base...


Now it is possible to use any usual classifier.

In [6]:
clf = LogisticRegression(random_state=42)
clf.fit(X_train_encoded, y_train)

In [7]:
X_test_encoded = encoder.transform(X_test)
clf.score(X_test_encoded, y_test)

  0%|          | 0/3 [00:00<?, ?it/s]

Encoding text properties with sentence-transformers/bert-base-nli-mean-tokens...
Encoding perplexity with gpt2...
Encoding proba and rank with roberta-base...


0.745