In [4]:
import pickle
import os

from neural_nlp.benchmarks.neural import *
from neural_nlp.models import *
import neural_nlp
from neural_nlp.models.implementations import *
from neural_nlp.stimuli import StimulusSet
import xarray as xr

benchmark_pool = [
    # primary benchmarks
    ('Pereira2018-encoding', PereiraEncoding),
    ('Fedorenko2016v3-encoding', Fedorenko2016V3Encoding),
    ('Blank2014fROI-encoding', Blank2014fROIEncoding),
    # secondary benchmarks
    ('Pereira2018-rdm', PereiraRDM),
    ('Fedorenko2016v3-rdm', Fedorenko2016V3RDM),
    ('Fedorenko2016v3nonlang-encoding', Fedorenko2016V3NonLangEncoding),
    ('Blank2014fROI-rdm', Blank2014fROIRDM),
]
benchmark_pool = {identifier: LazyLoad(lambda identifier=identifier, ctr=ctr: ctr(identifier=identifier))
                  for identifier, ctr in benchmark_pool}

# how to fetch stimulus set
benchmark_test = benchmark_pool['Pereira2018-encoding']
stimuli_df = benchmark_test._target_assembly.attrs['stimulus_set']
stimuli_df

Unnamed: 0,sentence,sentence_num,stimulus_id,experiment,story,passage_index,passage_label,passage_category
0,Beekeeping encourages the conservation of loca...,0,243sentences.0,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
1,It is in every beekeeper's interest to conserv...,1,243sentences.1,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
2,"As a passive form of agriculture, it does not ...",2,243sentences.2,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
3,Beekeepers also discourage the use of pesticid...,3,243sentences.3,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
4,Artisanal beekeepers go to extremes for their ...,4,243sentences.4,243sentences,243sentences.beekeeping,2,beekeeping,beekeeping
...,...,...,...,...,...,...,...,...
622,Some windows have multiple panes to increase i...,379,384sentences.379,384sentences,384sentences.building_part,95,Window,building_part
623,A woman is a female human adult.,380,384sentences.380,384sentences,384sentences.human,96,Woman,human
624,A woman is stereotypically seen as a caregiver.,381,384sentences.381,384sentences,384sentences.human,96,Woman,human
625,A woman can become pregnant and bear children.,382,384sentences.382,384sentences,384sentences.human,96,Woman,human


In [5]:
#load perturbed dataset
with open("stimuli_within_sent_random_lowPMI.pkl", "rb") as f:
    random_lowPMI_df = pickle.load(f)
random_lowPMI_df

Unnamed: 0,stimulus_id,condition,stimulus
0,243sentences.0,within_sent_random_lowPMI,habitats conservation encourages the beekeepin...
12,243sentences.1,within_sent_random_lowPMI,to produce interest it plants every pollen is ...
27,243sentences.2,within_sent_random_lowPMI,that crops agriculture a of for form not way t...
30,243sentences.3,within_sent_random_lowPMI,kill also beekeepers the discourage of use cro...
47,243sentences.4,within_sent_random_lowPMI,product is go for artisanal but the to craft b...
...,...,...,...
6221,384sentences.379,within_sent_random_lowPMI,multiple to inside windows some have insulatio...
6234,384sentences.380,within_sent_random_lowPMI,female a a is human adult woman
6246,384sentences.381,within_sent_random_lowPMI,stereotypically caregiver as is woman a seen a
6253,384sentences.382,within_sent_random_lowPMI,a pregnant and children can become bear woman


In [6]:
random_lowPMI_stimuli = list(random_lowPMI_df['stimulus'])
random_lowPMI_stimuli

['habitats conservation encourages the beekeeping of local',
 "to produce interest it plants every pollen is local conserve in that beekeeper's",
 'that crops agriculture a of for form not way to vegetation native cleared make does as require be passive it',
 'kill also beekeepers the discourage of use crops the on they pesticides could honeybees because',
 'product is go for artisanal but the to craft beekeepers extremes effort their their worth',
 'artisanal and and over honey-making quality emphasizes quantity character consistency',
 'micromanagers become produce of honey to finest the their honeybees beekeepers',
 'select the best extract and flows to when scout ways know honey nectar fields they the',
 'hive fills as of the 40000 the beekeeper opens air bees deep the the hum',
 'pollen honey nursery supplies beekeeper bee and checks the stores the',
 "and across they're arms sting hands they but because bees don't bare his crawl gentle",
 "i'm no about college have longer dream t

In [7]:
perturbed_df = stimuli_df.copy()
perturbed_df["sentence"] = random_lowPMI_stimuli

savedir = '/om/user/ckauf/neural-nlp/ressources/scrambled-stimuli-dfs'
fname = f'stimuli_random_lowPMI.pkl'
with open(os.path.join(savedir, fname), 'wb') as fout:
    pickle.dump(perturbed_df, fout)
perturbed_df

Unnamed: 0,sentence,sentence_num,stimulus_id,experiment,story,passage_index,passage_label,passage_category
0,habitats conservation encourages the beekeepin...,0,243sentences.0,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
1,to produce interest it plants every pollen is ...,1,243sentences.1,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
2,that crops agriculture a of for form not way t...,2,243sentences.2,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
3,kill also beekeepers the discourage of use cro...,3,243sentences.3,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
4,product is go for artisanal but the to craft b...,4,243sentences.4,243sentences,243sentences.beekeeping,2,beekeeping,beekeeping
...,...,...,...,...,...,...,...,...
622,multiple to inside windows some have insulatio...,379,384sentences.379,384sentences,384sentences.building_part,95,Window,building_part
623,female a a is human adult woman,380,384sentences.380,384sentences,384sentences.human,96,Woman,human
624,stereotypically caregiver as is woman a seen a,381,384sentences.381,384sentences,384sentences.human,96,Woman,human
625,a pregnant and children can become bear woman,382,384sentences.382,384sentences,384sentences.human,96,Woman,human
