In [1]:
import pandas as pd
import numpy as np
import torch
from phd import get_phd
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import modeling_utils
if not hasattr(modeling_utils, "ALL_PARALLEL_STYLES") or modeling_utils.ALL_PARALLEL_STYLES is None:
    modeling_utils.ALL_PARALLEL_STYLES = ["tp", "none", "colwise", 'rowwise']


model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)



class AddDeleteWordAnalyzer:
    def __init__(self, tokenizer, number_of_texts=3, number_of_words_per_text=15, n_tries=10):
        self.tokenizer = tokenizer
        self.number_of_texts = number_of_texts
        self.number_of_words_per_text = number_of_words_per_text
        self.n_tries = n_tries

    def delete_word(self, texts):
        word2diff_phd = dict()
        for _ in tqdm(range(self.number_of_texts)): # 40
            print(len(texts))
            text = texts[np.random.choice(len(texts))]
            tokenized = tokenizer(text)['input_ids'][1:-1]
            df_example = pd.DataFrame({'text': [text]})
            true_phd = get_phd(df_example)[0][0]
            entropies = self.get_entropy_of_text(text)
            
            for i in np.random.choice(len(tokenized), size=self.number_of_words_per_text): # 10
                new_text = tokenizer.decode(tokenized[:i] + tokenized[i + 1:])# ' '.join(tokenized[:i] + tokenized[i + 1:])
                df_new = pd.DataFrame({'text': [new_text]})
                new_phd = get_phd(df_new, n_tries=self.n_tries)[0][0]
                word2diff_phd[tokenized[i]] = true_phd - new_phd, text, entropies[i]
            
        df_stats2 = pd.DataFrame(pd.Series(word2diff_phd), columns=['diff_phd']).sort_values(by='diff_phd')
        return df_stats2

    def get_entropy_of_text(self, text):
        inputs = self.tokenizer(text, return_tensors="pt")
        output = model(**inputs)
        probs = torch.softmax(output.logits.float(), dim=1)
        raw_entropy = -(probs * torch.log(probs)).detach().data.sum(axis=-1)
        result = list(
            zip(
                raw_entropy.reshape(-1).tolist(),
                [tokenizer.decode([token_id]) for token_id in inputs['input_ids'].reshape(-1)]
            )
        )
        return result

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
df = pd.read_parquet("data/train-00000-of-00009.parquet")
analyzer = AddDeleteWordAnalyzer(tokenizer)
human_texts = df.query("model == 'human'")['generation'].values.tolist()
llm_texts = df.query("model != 'human'")['generation'].values.tolist()

In [None]:
## Неужели все-таки это iid?
## или мб это из-за разброса?

In [3]:
human_phd_new = get_phd(df.query("model == 'human'").sample(1), 'generation', n_tries=10)

  0%|                                                     | 0/1 [00:00<?, ?it/s]

[[1.         0.99968134]
 [0.99968134 1.        ]]
[[1.         0.99992245]
 [0.99992245 1.        ]]
[[1.         0.99972662]
 [0.99972662 1.        ]]
[[1.         0.99991894]
 [0.99991894 1.        ]]
[[1.         0.99993806]
 [0.99993806 1.        ]]
[[1.         0.99988433]
 [0.99988433 1.        ]]
[[1.         0.99992866]
 [0.99992866 1.        ]]
[[1.         0.99989312]
 [0.99989312 1.        ]]
[[1.         0.99990121]
 [0.99990121 1.        ]]
[[1.         0.99968145]
 [0.99968145 1.        ]]
[[1.         0.99992345]
 [0.99992345 1.        ]]
[[1.         0.99981269]
 [0.99981269 1.        ]]
[[1.         0.99979095]
 [0.99979095 1.        ]]
[[1.         0.99976765]
 [0.99976765 1.        ]]
[[1.        0.9999192]
 [0.9999192 1.       ]]
[[1.         0.99966476]
 [0.99966476 1.        ]]
[[1.         0.99993707]
 [0.99993707 1.        ]]
[[1.         0.99974848]
 [0.99974848 1.        ]]
[[1.         0.99983883]
 [0.99983883 1.        ]]
[[1.         0.99995947]
 [0.999959

100%|█████████████████████████████████████████████| 1/1 [00:03<00:00,  3.80s/it]

[[1.         0.99984869]
 [0.99984869 1.        ]]
[[1.        0.9999183]
 [0.9999183 1.       ]]
[[1.         0.99993842]
 [0.99993842 1.        ]]





In [6]:
import matplotlib.pyplot as plt

In [4]:
human_phd_new = get_phd(df.query("model != 'human'").sample(1), 'generation', n_tries=10)

  0%|                                                     | 0/1 [00:00<?, ?it/s]

[[1.         0.99979196]
 [0.99979196 1.        ]]
[[1.         0.99965782]
 [0.99965782 1.        ]]
[[1.        0.9995707]
 [0.9995707 1.       ]]
[[1.         0.99974756]
 [0.99974756 1.        ]]
[[1.        0.9999169]
 [0.9999169 1.       ]]
[[1.         0.99963992]
 [0.99963992 1.        ]]
[[1.         0.99972743]
 [0.99972743 1.        ]]
[[1.         0.99920829]
 [0.99920829 1.        ]]
[[1.        0.9996041]
 [0.9996041 1.       ]]
[[1.         0.99970391]
 [0.99970391 1.        ]]
[[1.         0.99957414]
 [0.99957414 1.        ]]
[[1.         0.99986512]
 [0.99986512 1.        ]]
[[1.         0.99995573]
 [0.99995573 1.        ]]
[[1.        0.9999483]
 [0.9999483 1.       ]]
[[1.         0.99987877]
 [0.99987877 1.        ]]
[[1.         0.99986356]
 [0.99986356 1.        ]]
[[1.         0.99934319]
 [0.99934319 1.        ]]
[[1.         0.99982869]
 [0.99982869 1.        ]]
[[1.         0.99984115]
 [0.99984115 1.        ]]
[[1.         0.99996333]
 [0.99996333 1.       

100%|█████████████████████████████████████████████| 1/1 [00:03<00:00,  3.41s/it]

[[1.         0.99999359]
 [0.99999359 1.        ]]
[[1.         0.99960058]
 [0.99960058 1.        ]]
[[1.         0.99993394]
 [0.99993394 1.        ]]





In [5]:
df

Unnamed: 0,id,adv_source_id,source_id,model,decoding,repetition_penalty,attack,domain,title,prompt,generation
0,e5e058ce-be2b-459d-af36-32532aaba5ff,e5e058ce-be2b-459d-af36-32532aaba5ff,e5e058ce-be2b-459d-af36-32532aaba5ff,human,,,none,abstracts,FUTURE-AI: Guiding Principles and Consensus Re...,,The recent advancements in artificial intellig...
1,f95b107b-d176-4af5-90f7-4d0bb20caf93,f95b107b-d176-4af5-90f7-4d0bb20caf93,f95b107b-d176-4af5-90f7-4d0bb20caf93,human,,,none,abstracts,EdgeFlow: Achieving Practical Interactive Segm...,,High-quality training data play a key role in ...
2,856d8972-9e3d-4544-babc-0fe16f21e04d,856d8972-9e3d-4544-babc-0fe16f21e04d,856d8972-9e3d-4544-babc-0fe16f21e04d,human,,,none,abstracts,Semi-supervised Contrastive Learning for Label...,,The success of deep learning methods in medica...
3,fbc8a5ea-90fa-47b8-8fa7-73dd954f1524,fbc8a5ea-90fa-47b8-8fa7-73dd954f1524,fbc8a5ea-90fa-47b8-8fa7-73dd954f1524,human,,,none,abstracts,Combo Loss: Handling Input and Output Imbalanc...,,Simultaneous segmentation of multiple organs f...
4,72c41b8d-0069-4886-b734-a4000ffca286,72c41b8d-0069-4886-b734-a4000ffca286,72c41b8d-0069-4886-b734-a4000ffca286,human,,,none,abstracts,Attention-Based 3D Seismic Fault Segmentation ...,,Detection faults in seismic data is a crucial ...
...,...,...,...,...,...,...,...,...,...,...,...
434688,ff6fc5a1-cf57-4a2d-aabd-1c05ac18efd1,94ddbf2c-2ef9-46d1-9b95-65f3ee29d1a5,7d6a4f88-0682-4dd6-a4d5-931495e6636c,llama-chat,greedy,no,upper_lower,books,The Hydrogen Sonata,Write the body of a plot summary for a novel t...,"In a Distant future, humanity has colonized th..."
434689,91d3f099-8aec-4911-8f4b-02d025a48db7,1b337f85-6862-4ba7-a409-c01ed4328e23,7d6a4f88-0682-4dd6-a4d5-931495e6636c,llama-chat,sampling,yes,upper_lower,books,The Hydrogen Sonata,Write the body of a plot summary for a novel t...,"In a distant future, humanity has colonized th..."
434690,8f7193e2-e087-415b-9348-36fb6e1060b8,d628b6ca-3659-49bb-a1a7-73bedacd6abb,4964cb21-cf76-49ca-94d3-f2e653e3e552,llama-chat,greedy,no,upper_lower,books,Vampirates:Black Heart,Write the body of a plot summary for a novel t...,"In the dark waters of the Caribbean, a legenda..."
434691,620feb1d-9fa2-4102-bfaa-01be48914dba,b9f45522-f1d9-434e-bf77-dde344e59897,4964cb21-cf76-49ca-94d3-f2e653e3e552,llama-chat,sampling,yes,upper_lower,books,Vampirates:Black Heart,Write the body of a plot summary for a novel t...,In a world where vampires and pirates roam The...


In [9]:
## union two different texts:
n_samples = 10
df = df.query("model != 'human'").iloc[:n_samples, :]
new_llm_list = []
for i in range(n_samples):
    idx = np.random.choice(n_samples)
    while idx == i:
        idx = np.random.choice(n_samples)
    new_llm = df['generation'].iloc[i] + df['generation'].iloc[idx] 
    new_llm_list.append(new_llm)

df['union_llm_completion'] = new_llm_list

In [13]:
human_phd_new = get_phd(df.sample(1), 'union_llm_completion', n_tries=10)

  0%|                                                     | 0/1 [00:00<?, ?it/s]

[[1.         0.99956794]
 [0.99956794 1.        ]]
[[1.         0.99991912]
 [0.99991912 1.        ]]
[[1.         0.99962587]
 [0.99962587 1.        ]]
[[1.         0.99984098]
 [0.99984098 1.        ]]
[[1.         0.99978774]
 [0.99978774 1.        ]]
[[1.         0.99995878]
 [0.99995878 1.        ]]
[[1.         0.99985104]
 [0.99985104 1.        ]]
[[1.         0.99985967]
 [0.99985967 1.        ]]
[[1.         0.99983834]
 [0.99983834 1.        ]]
[[1.         0.99992696]
 [0.99992696 1.        ]]
[[1.         0.99994163]
 [0.99994163 1.        ]]
[[1.         0.99992045]
 [0.99992045 1.        ]]
[[1.         0.99994037]
 [0.99994037 1.        ]]
[[1.         0.99993104]
 [0.99993104 1.        ]]
[[1.         0.99979825]
 [0.99979825 1.        ]]
[[1.         0.99984985]
 [0.99984985 1.        ]]
[[1.         0.99987037]
 [0.99987037 1.        ]]
[[1.         0.99991537]
 [0.99991537 1.        ]]
[[1.         0.99965354]
 [0.99965354 1.        ]]
[[1.         0.99988325]
 [0.99

100%|█████████████████████████████████████████████| 1/1 [00:05<00:00,  5.90s/it]

[[1.         0.99981415]
 [0.99981415 1.        ]]
[[1.         0.99991071]
 [0.99991071 1.        ]]
[[1.         0.99979113]
 [0.99979113 1.        ]]





In [12]:
df

Unnamed: 0,id,adv_source_id,source_id,model,decoding,repetition_penalty,attack,domain,title,prompt,generation,union_llm_completion
367,2bd98bd7-3356-43bf-8c5d-69ef336d0536,2bd98bd7-3356-43bf-8c5d-69ef336d0536,e5e058ce-be2b-459d-af36-32532aaba5ff,llama-chat,greedy,no,none,abstracts,FUTURE-AI: Guiding Principles and Consensus Re...,Write the abstract for the academic paper titl...,"In the paper ""FUTURE-AI: Guiding Principles an...","In the paper ""FUTURE-AI: Guiding Principles an..."
368,e8bdc461-3ff2-4d68-8c7b-cdbc086f62b3,e8bdc461-3ff2-4d68-8c7b-cdbc086f62b3,e5e058ce-be2b-459d-af36-32532aaba5ff,llama-chat,sampling,yes,none,abstracts,FUTURE-AI: Guiding Principles and Consensus Re...,Write the abstract for the academic paper titl...,"In the paper ""Future-AI: Guiding Principles an...","In the paper ""Future-AI: Guiding Principles an..."
369,ee968d29-ce73-4c5d-804d-0a0efec4bea4,ee968d29-ce73-4c5d-804d-0a0efec4bea4,f95b107b-d176-4af5-90f7-4d0bb20caf93,llama-chat,greedy,no,none,abstracts,EdgeFlow: Achieving Practical Interactive Segm...,Write the abstract for the academic paper titl...,"In this paper, we present EdgeFlow, a novel ap...","In this paper, we present EdgeFlow, a novel ap..."
370,3d24eb90-f540-490f-81c8-e4a24fd49ad7,3d24eb90-f540-490f-81c8-e4a24fd49ad7,f95b107b-d176-4af5-90f7-4d0bb20caf93,llama-chat,sampling,yes,none,abstracts,EdgeFlow: Achieving Practical Interactive Segm...,Write the abstract for the academic paper titl...,"In this paper, we present a novel approach to ...","In this paper, we present a novel approach to ..."
371,7389e65b-2e27-4b90-999a-53e28b773315,7389e65b-2e27-4b90-999a-53e28b773315,856d8972-9e3d-4544-babc-0fe16f21e04d,llama-chat,sampling,yes,none,abstracts,Semi-supervised Contrastive Learning for Label...,Write the abstract for the academic paper titl...,"In this paper, we propose a novel approach to ...","In this paper, we propose a novel approach to ..."
372,8b79a378-67db-48e8-8950-4d3215cfef16,8b79a378-67db-48e8-8950-4d3215cfef16,fbc8a5ea-90fa-47b8-8fa7-73dd954f1524,llama-chat,greedy,no,none,abstracts,Combo Loss: Handling Input and Output Imbalanc...,Write the abstract for the academic paper titl...,"In the field of medical image segmentation, im...","In the field of medical image segmentation, im..."
373,16742a95-7417-4bca-96c8-cee548681a9b,16742a95-7417-4bca-96c8-cee548681a9b,fbc8a5ea-90fa-47b8-8fa7-73dd954f1524,llama-chat,sampling,yes,none,abstracts,Combo Loss: Handling Input and Output Imbalanc...,Write the abstract for the academic paper titl...,"In medical image segmentation, imbalanced inpu...","In medical image segmentation, imbalanced inpu..."
374,8c2ca078-bc3f-46fa-9bf4-6fe1d72226fe,8c2ca078-bc3f-46fa-9bf4-6fe1d72226fe,72c41b8d-0069-4886-b734-a4000ffca286,llama-chat,greedy,no,none,abstracts,Attention-Based 3D Seismic Fault Segmentation ...,Write the abstract for the academic paper titl...,"In this paper, we propose a novel approach for...","In this paper, we propose a novel approach for..."
375,0a0495a4-3c8e-4d29-9cfa-cf8644f2d895,0a0495a4-3c8e-4d29-9cfa-cf8644f2d895,72c41b8d-0069-4886-b734-a4000ffca286,llama-chat,sampling,yes,none,abstracts,Attention-Based 3D Seismic Fault Segmentation ...,Write the abstract for the academic paper titl...,"In this paper, we present a novel approach for...","In this paper, we present a novel approach for..."
376,260ac39c-fa06-4e46-a110-fcbc5acff930,260ac39c-fa06-4e46-a110-fcbc5acff930,72fe360b-cce6-4daf-b66a-1d778f5964f8,llama-chat,greedy,no,none,abstracts,Segmenter: Transformer for Semantic Segmentation,Write the abstract for the academic paper titl...,"In this paper, we present Segmenter, a novel a...","In this paper, we present Segmenter, a novel a..."
