# Feature extraction

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import torch
from IPython.display import HTML, display
from transformers import (
    AutoModel,
    AutoTokenizer,
    CamembertForMaskedLM,
    CamembertTokenizer,
    GPT2LMHeadModel,
    GPT2TokenizerFast,
    RobertaForMaskedLM,
    RobertaTokenizer,
)

from nlp_adversarial_attacks.models.model_loading import load_target_model
from nlp_adversarial_attacks.reactdetect.feature_extraction import FeatureExtractor
from nlp_adversarial_attacks.reactdetect.feature_extraction.extractors.utils import (
    EXTR_FCNS_BY_NAME,
)

assert (
    torch.cuda.is_available()
), "encoding features is quite expensive, defenitely use gpus"
CUDA_DEVICE = torch.device("cuda")

In [3]:
EXTR_FCNS_BY_NAME

{'lm_proba_and_rank': <function nlp_adversarial_attacks.reactdetect.feature_extraction.extractors.language_model_properties.lm_proba_and_rank(text_list, lm_masked_model, lm_masked_tokenizer, device, logger=None, quantiles=None, regions=None, feature_list=None)>,
 'lm_perplexity': <function nlp_adversarial_attacks.reactdetect.feature_extraction.extractors.language_model_properties.lm_perplexity(text_list, lm_causal_model, lm_causal_tokenizer, device, logger=None, stride=1, regions=None, feature_list=None)>,
 'tm_posterior': <function nlp_adversarial_attacks.reactdetect.feature_extraction.extractors.target_model_properties.tm_posterior(text_list, target_model, device, batch_size=32, logger=None, feature_list=None)>,
 'tm_gradient': <function nlp_adversarial_attacks.reactdetect.feature_extraction.extractors.target_model_properties.tm_gradient(text_list, labels, target_model, device='cpu', logger=None, regions=None, quantiles=None, feature_list=None)>,
 'tm_activation': <function nlp_adver

## Text properties

In [4]:
def test_extraction_function(func_name, text_list=None, **kwargs):
    fe = FeatureExtractor(add_specific=[func_name])

    # get samples
    if text_list is None:
        text_list = ["hello there how are yuo.!", "h3llo there  how are you."]

    # call the feature extractor with the required arguments
    res = fe(return_dict=True, text_list=pd.Series(text_list), **kwargs)

    display(HTML(f"<b>Function: {func_name}<b/>"))
    print("Text_list:")
    for text in text_list:
        print("    " + text)

    features, values = res[func_name]

    display(pd.DataFrame(values, columns=features).T)

In [5]:
test_extraction_function("tp_num_chars")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_chars,25.0,25.0


In [6]:
test_extraction_function("tp_num_alpha_chars")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_alpha_chars,19.0,18.0


In [7]:
test_extraction_function("tp_num_digits")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_digits,0.0,1.0


In [8]:
test_extraction_function("tp_num_punctuation")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_punctuation,2.0,1.0


In [9]:
test_extraction_function("tp_num_multi_spaces")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_multi_spaces,0.0,1.0


In [10]:
test_extraction_function("tp_num_words")

Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_words,5.0,5.0


In [11]:
text_list = ["longlong longlong s s s s s s s s s s s s s s s s s s s s s s s"]
test_extraction_function("tp_avg_word_length", text_list=text_list)

Text_list:
    longlong longlong s s s s s s s s s s s s s s s s s s s s s s s


Unnamed: 0,0
avg_word_length_mean_region0,3.0
avg_word_length_var_region0,10.0
avg_word_length_quant0_region0,1.0
avg_word_length_quant1_region0,1.0
avg_word_length_quant2_region0,4.5
avg_word_length_mean_region1,1.0
avg_word_length_var_region1,0.0
avg_word_length_quant0_region1,1.0
avg_word_length_quant1_region1,1.0
avg_word_length_quant2_region1,1.0


In [12]:
text_list = ["hello there how are yuo.! 😍", "h3llo there  how are you."]
test_extraction_function("tp_num_non_ascii", text_list=text_list)

Text_list:
    hello there how are yuo.! 😍
    h3llo there  how are you.


Unnamed: 0,0,1
num_non_ascii,1.0,0.0


In [13]:
text_list = ["Hello There HOW ARE YOU.!", "h3llo there  how are you."]
test_extraction_function("tp_num_cased_letters", text_list=text_list)

Text_list:
    Hello There HOW ARE YOU.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_uppercase_letters,11.0,0.0
num_lowercase_letters,8.0,18.0
fraction_uppercase_letters,0.578947,1.482701e-311
fraction_lowercase_letters,0.421053,1.0


In [14]:
text_list = ["Hello There HOW ARE YOU.!", "h3llo there  how are you."]
test_extraction_function("tp_is_first_word_lowercase", text_list=text_list)

Text_list:
    Hello There HOW ARE YOU.!
    h3llo there  how are you.


Unnamed: 0,0,1
is_first_word_lowercase,0.0,1.0


In [15]:
text_list = ["Hello ThEre how are you.!", "h3llo there  how are you."]
test_extraction_function("tp_num_mixed_case_words", text_list=text_list)

Text_list:
    Hello ThEre how are you.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_mixed_case_words,1.0,0.0


In [16]:
text_list = ["hello there how are yuo.!", "h3llo there  h ow are you."]
test_extraction_function("tp_num_single_lowercase_letters", text_list=text_list)

Text_list:
    hello there how are yuo.!
    h3llo there  h ow are you.


Unnamed: 0,0,1
num_single_lowercase_letters,0.0,1.0


In [17]:
text_list = ["hello there how are yuo.!", "h3llo there . h ow are you."]
test_extraction_function("tp_num_lowercase_after_punctuation", text_list=text_list)

Text_list:
    hello there how are yuo.!
    h3llo there . h ow are you.


Unnamed: 0,0,1
num_lowercase_letters_after_punctuation,0.0,1.0


In [18]:
text_list = ["Hello There HOW ARE you.!", "h3llo there  how are you."]
test_extraction_function("tp_num_cased_word_switches", text_list=text_list)

Text_list:
    Hello There HOW ARE you.!
    h3llo there  how are you.


Unnamed: 0,0,1
num_cased_word_switches,1.0,0.0


In [19]:
bert_model_name = "sentence-transformers/bert-base-nli-mean-tokens"
print("--- loading lm")
print(f"AutoModel: {bert_model_name}")
lm_bert_model = AutoModel.from_pretrained(bert_model_name).to(CUDA_DEVICE)
lm_bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
print("--- lm loaded")

test_extraction_function(
    "tp_bert",
    lm_bert_model=lm_bert_model,
    lm_bert_tokenizer=lm_bert_tokenizer,
    device=CUDA_DEVICE,
)

--- loading lm
AutoModel: sentence-transformers/bert-base-nli-mean-tokens
--- lm loaded


Text_list:
    hello there how are yuo.!
    h3llo there  how are you.


Unnamed: 0,0,1
lm_bert_0,-0.313352,0.003020
lm_bert_1,-0.003227,0.192503
lm_bert_2,1.734016,1.710032
lm_bert_3,0.224571,0.254954
lm_bert_4,0.587513,0.701409
...,...,...
lm_bert_763,-0.540840,-0.058098
lm_bert_764,-0.109474,-0.603243
lm_bert_765,0.014358,-0.186244
lm_bert_766,0.035541,0.226444


## LM perplexity

In [20]:
for lm_causal_model_gpt_name in [
    "gpt2",
    "ClassCat/gpt2-base-french",
    "benjamin/gpt2-wechsel-french",
]:
    print("--- loading lm")
    print(f"GPT2LMHeadModel: {lm_causal_model_gpt_name}")
    lm_causal_model_gpt = GPT2LMHeadModel.from_pretrained(lm_causal_model_gpt_name).to(
        CUDA_DEVICE
    )
    lm_causal_tokenizer_gpt = GPT2TokenizerFast.from_pretrained(
        lm_causal_model_gpt_name
    )
    print("--- lm loaded")

    text_list = [
        "Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers",
        "Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers",
        "Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
        "Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
    ]
    test_extraction_function(
        "lm_perplexity",
        text_list=text_list,
        lm_causal_model=lm_causal_model_gpt,
        lm_causal_tokenizer=lm_causal_tokenizer_gpt,
        device=CUDA_DEVICE,
    )

--- loading lm
GPT2LMHeadModel: gpt2
--- lm loaded


Text_list:
    Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers
    Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers
    Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles
    Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles


Unnamed: 0,0,1,2,3
lm_perplexity_region0,24.814098,364.295197,134.643021,1074.929688
lm_perplexity_region1,189.984787,1508.723022,365.955444,842.555237
lm_perplexity_region2,288.875427,288.875427,207.24614,207.24614
lm_perplexity_region3,23.64061,700.263,114.287552,359.786011


--- loading lm
GPT2LMHeadModel: ClassCat/gpt2-base-french
--- lm loaded


Text_list:
    Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers
    Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers
    Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles
    Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles


Unnamed: 0,0,1,2,3
lm_perplexity_region0,140.834991,1179.50061,3.584707,4336.395996
lm_perplexity_region1,189.957336,2996.277588,677.830383,3966.690918
lm_perplexity_region2,412.35907,412.35907,81.623306,81.623306
lm_perplexity_region3,92.158951,594.701538,13.120065,1265.757446


--- loading lm
GPT2LMHeadModel: benjamin/gpt2-wechsel-french
--- lm loaded


Text_list:
    Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers
    Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers
    Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles
    Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles


Unnamed: 0,0,1,2,3
lm_perplexity_region0,158.683807,1116.182495,27.395039,3533.361084
lm_perplexity_region1,36.547207,798.260559,992.720642,2237.489014
lm_perplexity_region2,436.916443,436.916443,112.160156,112.160156
lm_perplexity_region3,27.84572,327.007568,18.26742,1001.386292


## LM proba

In [21]:
lm_masked_model_roberta_name = "roberta-base"
print("--- loading lm")
print(f"RobertaForMaskedLM: {lm_masked_model_roberta_name}")
lm_masked_model_roberta = RobertaForMaskedLM.from_pretrained(
    lm_masked_model_roberta_name, return_dict=True
).to(CUDA_DEVICE)
lm_masked_tokenizer_roberta = RobertaTokenizer.from_pretrained(
    lm_masked_model_roberta_name
)
print("--- lm loaded")

text_list = [
    "Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers",
    "Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers",
    "Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
    "Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
]
test_extraction_function(
    "lm_proba_and_rank",
    text_list=text_list,
    lm_masked_model=lm_masked_model_roberta,
    lm_masked_tokenizer=lm_masked_tokenizer_roberta,
    device=CUDA_DEVICE,
)

--- loading lm
RobertaForMaskedLM: roberta-base
--- lm loaded


Text_list:
    Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers
    Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers
    Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles
    Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles


Unnamed: 0,0,1,2,3
lm_proba_mean_region0,0.9934778,0.906722,0.998453,0.794494
lm_proba_var_region0,4.2137e-05,0.07434,1.6e-05,0.114626
lm_proba_quant0_region0,0.9880946,0.997665,0.999705,0.628281
lm_proba_quant1_region0,0.9959056,0.998837,0.999957,0.996032
lm_proba_quant2_region0,0.9996643,0.999396,0.999975,0.999344
lm_rank_mean_region0,1.0,1.0,1.0,1.411765
lm_rank_var_region0,0.0,0.0,0.0,2.00692
lm_rank_quant0_region0,1.0,1.0,1.0,1.0
lm_rank_quant1_region0,1.0,1.0,1.0,1.0
lm_rank_quant2_region0,1.0,1.0,1.0,1.0


In [22]:
lm_masked_model_name = "camembert-base"
print("--- loading lm")
print(f"CamembertForMaskedLM: {lm_masked_model_name}")
lm_masked_model = CamembertForMaskedLM.from_pretrained(
    lm_masked_model_name, return_dict=True
).to(CUDA_DEVICE)
lm_masked_tokenizer = CamembertTokenizer.from_pretrained(lm_masked_model_name)
print("--- lm loaded")

text_list = [
    "Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers",
    "Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers",
    "Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
    "Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles",
]
test_extraction_function(
    "lm_proba_and_rank",
    text_list=text_list,
    lm_masked_model=lm_masked_model,
    lm_masked_tokenizer=lm_masked_tokenizer,
    device=CUDA_DEVICE,
)

--- loading lm
CamembertForMaskedLM: camembert-base
--- lm loaded


Text_list:
    Wikipedia is a multilingual free online encyclopedia written and maintained by a community of volunteers
    Wik_prd9a is a mult8lingual fr5e o@line encyclapedia written and maintained by a community of volunteers
    Wikipédia est une encyclopédie en ligne gratuite multilingue écrite et maintenue par une communauté de bénévoles
    Wik_prd9a est t encyc7opédie en l@gne gratuite multilingue écrite et maintenue par une communauté de bénévoles


Unnamed: 0,0,1,2,3
lm_proba_mean_region0,0.5989743,0.8179866,0.7141277,0.7481702
lm_proba_var_region0,0.2391819,0.1486895,0.2039915,0.1740475
lm_proba_quant0_region0,3.878813e-10,0.999061,0.4994609,0.8079267
lm_proba_quant1_region0,0.9959031,0.9999179,0.9999765,0.994202
lm_proba_quant2_region0,0.9991779,0.9999875,0.9999982,0.9997178
lm_rank_mean_region0,7028.2,3040.273,5305.714,2403.588
lm_rank_var_region0,10000000.0,10000000.0,10000000.0,10000000.0
lm_rank_quant0_region0,1.0,1.0,1.0,1.0
lm_rank_quant1_region0,1.0,1.0,1.0,1.0
lm_rank_quant2_region0,13659.0,1.0,7415.0,1.0


## Target Model properties

In [23]:
target_model_name = "distilcamembert"
pretrained_model_name_or_path = "baptiste-pasquier/distilcamembert-allocine"
num_labels = 2
print("--- loading target model")
target_model = load_target_model(
    model_name=target_model_name,
    pretrained_model_name_or_path=pretrained_model_name_or_path,
    num_labels=num_labels,
    max_seq_len=None,
    device=CUDA_DEVICE,
)
regions = [(0.0, 0.25), (0.25, 0.75), (0.75, 1.0), (0.0, 1.0)]
print("--- target model loaded")

--- loading target model
--- target model loaded


In [24]:
text_list = [
    "Il relève tout simplement du meilleur du cinéma français.",
    "I_ rel@ve tyu3 s0m!le,ent du meilleur du cinéma français.",
    "Il relève tout simplement du pire du cinéma français.",
    "I_ rel@ve tyu3 s0m!le,ent du pire du cinéma français.",
]
test_extraction_function(
    "tm_posterior", text_list=text_list, target_model=target_model, device=CUDA_DEVICE
)

Text_list:
    Il relève tout simplement du meilleur du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du meilleur du cinéma français.
    Il relève tout simplement du pire du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du pire du cinéma français.


Unnamed: 0,0,1,2,3
tm_output_0,0.001268,0.005454,0.999496,0.99368
tm_output_1,0.998732,0.994546,0.000504,0.00632


In [25]:
labels = [1, 1, 0, 0]
test_extraction_function(
    "tm_gradient",
    text_list=text_list,
    labels=pd.Series(labels),
    target_model=target_model,
    device=CUDA_DEVICE,
)

Text_list:
    Il relève tout simplement du meilleur du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du meilleur du cinéma français.
    Il relève tout simplement du pire du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du pire du cinéma français.


Unnamed: 0,0,1,2,3
tm_gradient_mean_layer0_region0,-9.942255e-14,2.121823e-15,4.849881e-15,-9.457267e-14
tm_gradient_var_layer0_region0,1.633131e-06,3.848839e-07,1.040655e-06,5.418412e-06
tm_gradient_mean_layer1_region0,-6.039705e-12,-7.549632e-14,9.814521e-13,-1.207941e-12
tm_gradient_var_layer1_region0,1.016897e-04,2.396545e-05,6.479814e-05,3.373866e-04
tm_gradient_quant0_layer1_region0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...
tm_gradient_quant0_layer103_region3,-5.040066e-04,-1.720168e-03,-2.175319e-04,-1.810942e-03
tm_gradient_quant1_layer103_region3,-3.126388e-12,3.205969e-11,2.165734e-11,-6.821210e-12
tm_gradient_quant2_layer103_region3,5.040092e-04,1.720152e-03,2.175665e-04,1.810940e-03
tm_gradient_mean_layer104_region_3,6.693881e-09,-5.238689e-08,4.004687e-08,6.053597e-09


In [26]:
test_extraction_function(
    "tm_activation", text_list=text_list, target_model=target_model, device=CUDA_DEVICE
)

Text_list:
    Il relève tout simplement du meilleur du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du meilleur du cinéma français.
    Il relève tout simplement du pire du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du pire du cinéma français.


Unnamed: 0,0,1,2,3
tm_activation_mean_layer0_region0,-0.008056,0.004521,-0.008056,0.004521
tm_activation_var_layer0_region0,0.030968,0.020824,0.030968,0.020824
tm_activation_quant0_layer0_region0,-0.051361,-0.045224,-0.051361,-0.045224
tm_activation_quant1_layer0_region0,0.002041,0.009156,0.002041,0.009156
tm_activation_quant2_layer0_region0,0.052879,0.062488,0.052879,0.062488
...,...,...,...,...
tm_activation_quant0_layer79_region3,-0.384787,-0.289234,-0.440859,-0.299097
tm_activation_quant1_layer79_region3,0.032642,0.046557,-0.019357,0.011163
tm_activation_quant2_layer79_region3,0.404022,0.331211,0.426327,0.275700
tm_activation_mean_layer80_region3,-0.229810,-0.157200,0.223046,0.140201


In [27]:
test_extraction_function(
    "tm_saliency",
    text_list=text_list,
    labels=pd.Series(labels),
    target_model=target_model,
    device=CUDA_DEVICE,
)



Text_list:
    Il relève tout simplement du meilleur du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du meilleur du cinéma français.
    Il relève tout simplement du pire du cinéma français.
    I_ rel@ve tyu3 s0m!le,ent du pire du cinéma français.


Unnamed: 0,0,1,2,3
tm_simple_gradient_saliency_mean,0.083333,0.041667,0.083333,0.041667
tm_simple_gradient_saliency_var,0.007509,0.002019,0.003834,0.0009
tm_simple_gradient_saliency_quant0,0.042974,0.011701,0.034386,0.021037
tm_simple_gradient_saliency_quant1,0.062291,0.026884,0.079129,0.03745
tm_simple_gradient_saliency_quant2,0.089287,0.049714,0.128773,0.059414
