# Script for creating word-order manipulation datasets from the Pereira2018 fMRI stimuli

In [1]:
import re
from pathlib import Path
from os.path import abspath
import os
import numpy as np
import random
import pickle
import csv
import subprocess

In [2]:
print(os.getcwd())

/rdma/vast-rdma/vast/cpl/ckauf/perturbed-neural-nlp/ressources/stimuli_creation


In [3]:
importpath = abspath('../..')
os.chdir(importpath)
print(os.getcwd())

/rdma/vast-rdma/vast/cpl/ckauf/perturbed-neural-nlp


In [4]:
#add seeds for reproducability
np.random.seed(42)
random.seed(42)

# Load base stimulus dataframe (Pereira 2018)

In [5]:
from neural_nlp.benchmarks.neural import *
import neural_nlp
from neural_nlp.stimuli import StimulusSet
import xarray as xr

benchmark_pool = [
    # primary benchmarks
    ('Pereira2018-encoding', PereiraEncoding),
]
benchmark_pool = {identifier: LazyLoad(lambda identifier=identifier, ctr=ctr: ctr(identifier=identifier))
                  for identifier, ctr in benchmark_pool}

# fetch stimulus set
benchmark = benchmark_pool['Pereira2018-encoding']
stimuli_df = benchmark._target_assembly.attrs['stimulus_set']
stimuli_df

Loading lookup from /om2/user/ckauf/anaconda/envs/perturbedenv/lib/python3.6/site-packages/brainio_collection/lookup.csv
/rdma/vast-rdma/vast/cpl/ckauf/perturbed-neural-nlp/neural_nlp/../ressources/stimuli


 We're running in the NEW version of the implementations.py script.




  xr_data.set_index(append=True, inplace=True, **coords_d)


Unnamed: 0,sentence,sentence_num,stimulus_id,experiment,story,passage_index,passage_label,passage_category
0,Beekeeping encourages the conservation of loca...,0,243sentences.0,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
1,It is in every beekeeper's interest to conserv...,1,243sentences.1,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
2,"As a passive form of agriculture, it does not ...",2,243sentences.2,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
3,Beekeepers also discourage the use of pesticid...,3,243sentences.3,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
4,Artisanal beekeepers go to extremes for their ...,4,243sentences.4,243sentences,243sentences.beekeeping,2,beekeeping,beekeeping
...,...,...,...,...,...,...,...,...
622,Some windows have multiple panes to increase i...,379,384sentences.379,384sentences,384sentences.building_part,95,Window,building_part
623,A woman is a female human adult.,380,384sentences.380,384sentences,384sentences.human,96,Woman,human
624,A woman is stereotypically seen as a caregiver.,381,384sentences.381,384sentences,384sentences.human,96,Woman,human
625,A woman can become pregnant and bear children.,382,384sentences.382,384sentences,384sentences.human,96,Woman,human


In [6]:
# benchmark content
benchmark._target_assembly.values.shape
benchmark._target_assembly.coords

Coordinates:
  * presentation      (presentation) MultiIndex
  - stimulus_num      (presentation) int64 0 0 1 1 2 2 3 ... 12 12 13 13 14 14
  - passage_index     (presentation) int64 1 1 1 1 1 1 1 1 2 ... 3 4 4 4 4 4 4 4
  - passage_label     (presentation) object 'Accordion' ... 'dreams'
  - passage_category  (presentation) object 'music' 'beekeeping' ... 'dreams'
  - stimulus_id       (presentation) object '384sentences.0' ... '243sentences.14'
  - story             (presentation) object '384sentences.music' ... '243sentences.dreams'
  - experiment        (presentation) object '384sentences' ... '243sentences'
  * neuroid           (neuroid) MultiIndex
  - subject           (neuroid) object '018' '018' '018' ... '018' '018' '018'
  - voxel_num         (neuroid) int64 28 29 31 32 38 42 ... 152 153 154 159 160
  - atlas             (neuroid) object 'language' 'language' ... 'language'
  - filter_strategy   (neuroid) object '' '' '' '' '' '' ... '' '' '' '' '' ''
  - atlas_selection   (

# Create different perturbed versions of the benchmark

In [7]:
stimuli_path = os.path.join(os.getcwd(),'ressources/stimuli_creation')
os.chdir(stimuli_path)
print(os.getcwd())

savedir = abspath('../scrambled_stimuli_dfs')
print(savedir)
os.makedirs(savedir, exist_ok=True)

/rdma/vast-rdma/vast/cpl/ckauf/perturbed-neural-nlp/ressources/stimuli_creation
/rdma/vast-rdma/vast/cpl/ckauf/perturbed-neural-nlp/ressources/scrambled_stimuli_dfs


# Scrambling | Mollica et al. (2020)

## 1. Create stable PMI scrambled dataset versions (Mollica et al. 2020)

`The below cell calls a python script with input stimuli files already hardcoded!`

In [8]:
! python permute_sentences.py

stim_243sentences
0	beekeeping encourages the conservation of local habitats
1	beekeeping encourages the conservation of habitats local
2	encourages beekeeping the conservation of habitats local
3	beekeeping conservation encourages the of habitats local
4	conservation beekeeping encourages the of habitats local
5	conservation beekeeping encourages the habitats of local
6	conservation encourages beekeeping of the habitats local
7	beekeeping conservation of encourages habitats local the
0	it is in every beekeeper's interest to conserve local plants that produce pollen
1	it is in every beekeeper's interest to conserve local plants produce that pollen
2	it in is every beekeeper's interest to conserve local plants produce that pollen
3	in it is every beekeeper's interest to conserve local plants produce that pollen
4	in it is every beekeeper's interest to conserve local plants produce pollen that
5	in it is every interest beekeeper's to conserve local plants produce pollen that
6	in it is i

2	even i've never done any of the reading assignments
3	even i've never done of any the reading assignments
4	even i've never of done any the reading assignments
5	even done i've never of any the reading assignments
6	even done i've never of any reading the assignments
7	even done i've of never any reading the assignments
0	i know many other people who have the same nightmare
1	i many know other people who have the same nightmare
2	many i know other people who have the same nightmare
3	many i know other who people have the same nightmare
4	many i know who other people have the same nightmare
5	many other i know who people have the same nightmare
6	many other i know who people have the nightmare same
7	many other i know who people the have nightmare same
0	each morning participants in the study had to write down their dream experience from the previous night
1	each morning participants the in study had to write down their dream experience from the previous night
2	morning each participa

7	on the good data and social economic effects of legalized gambling hard are to by come
0	some studies indicate that having a casino nearby makes gambling problems more likely
1	some studies indicate that having a casino nearby makes gambling more problems likely
2	some indicate studies that having a casino nearby makes gambling more problems likely
3	indicate some studies that having a casino nearby makes gambling more problems likely
4	indicate some studies that having a casino nearby makes gambling more likely problems
5	indicate some studies that a having casino nearby makes gambling more likely problems
6	indicate some studies a that having casino nearby makes gambling more likely problems
7	indicate that some studies a having casino nearby gambling makes problems likely more
0	gambling may also be associated with personal bankruptcies and marriage problems
1	gambling also may be associated with personal bankruptcies and marriage problems
2	also gambling may be associated with pe

4	storms the strongest deposited layers of windblown beach sand at bottoms of the coastal lakes
5	storms the strongest deposited of layers windblown beach sand at bottoms of the coastal lakes
6	storms the strongest of deposited layers windblown beach sand at bottoms of the coastal lakes
7	storms deposited the strongest of layers windblown beach at sand the of bottoms coastal lakes
0	trees drenched by passing hurricanes preserve a record of each storm in their growth rings
1	trees drenched by passing hurricanes preserve a record of each in storm their growth rings
2	trees by drenched passing hurricanes preserve a record of each in storm their growth rings
3	by trees drenched passing hurricanes preserve a record of each in storm their growth rings
4	by trees drenched passing hurricanes preserve a record of each in their storm growth rings
5	by trees drenched passing preserve hurricanes a record of each in their storm growth rings
6	by trees drenched preserve passing hurricanes a record o

7	blades underneath the sharp lawn the mower precisely each sever blade grass of
0	mowing the lawn is a long task and it has taught me patience
1	mowing the lawn is a long task and it has me taught patience
2	mowing lawn the is a long task and it has me taught patience
3	lawn mowing the is a long task and it has me taught patience
4	lawn mowing the is a long task and it has me patience taught
5	lawn mowing the is long a task and it has me patience taught
6	lawn mowing the long is a task and it has me patience taught
7	lawn is mowing the long a task and has it taught patience me
0	a lawn mower is a machine with revolving blades that cut grass to an even height
1	a lawn mower is a machine with revolving blades that grass cut to an even height
2	a mower lawn is a machine with revolving blades that grass cut to an even height
3	mower a lawn is a machine with revolving blades that grass cut to an even height
4	mower a lawn is a machine with revolving blades that grass to cut an even height


2	the of team astronauts floated out together to the exterior the of space shuttle
3	of the team astronauts floated out together to the exterior the of space shuttle
4	of the team astronauts floated out together to the exterior the space of shuttle
5	of the team astronauts out floated together to the exterior the space of shuttle
6	of the team out astronauts floated together to the exterior the space of shuttle
7	of astronauts the team out floated together to exterior the of space the shuttle
0	they carried tools needed to repair the broken part on the huge telescope
1	they carried tools needed to repair the broken part on huge the telescope
2	they tools carried needed to repair the broken part on huge the telescope
3	tools they carried needed to repair the broken part on huge the telescope
4	tools they carried needed to repair the broken part on huge telescope the
5	tools they carried needed repair to the broken part on huge telescope the
6	tools they carried repair needed to the brok

5	field as the of graphics computer broadens we must ask graphics actually if help students
6	field as the graphics of computer broadens we must ask graphics actually if help students
7	field of as the graphics computer broadens we ask must if actually graphics help students
0	research does suggest that vivid and engaging digital imagery can be effective in the classroom
1	research does suggest that vivid and engaging digital imagery can effective be in the classroom
2	research suggest does that vivid and engaging digital imagery can effective be in the classroom
3	suggest research does that vivid and engaging digital imagery can effective be in the classroom
4	suggest research does that vivid and engaging digital imagery can effective in be the classroom
5	suggest research does that and vivid engaging digital imagery can effective in be the classroom
6	suggest research does and that vivid engaging digital imagery can effective in be the classroom
7	suggest that research does and vivid

5	often a pharmacists are source good of information about public health
6	often a pharmacists are source good of information public about health
7	often pharmacists a are source good information of public health about
0	doctors consult with pharmacists because they know a lot about drug interactions and side effects
1	doctors consult with pharmacists because they know a lot about interactions drug and side effects
2	doctors with consult pharmacists because they know a lot about interactions drug and side effects
3	with doctors consult pharmacists because they know a lot about interactions drug and side effects
4	with doctors consult pharmacists because they know a lot about interactions and drug side effects
5	with doctors consult pharmacists they because know a lot about interactions and drug side effects
6	with doctors consult they pharmacists because know a lot about interactions and drug side effects
7	with pharmacists doctors consult they because know a about lot drug and interac

7	it's your remember that to reaction stress that determines if it be will harmful beneficial or
0	when my company was downsizing i felt incredibly stressed and worried
1	when company my was downsizing i felt incredibly stressed and worried
2	company when my was downsizing i felt incredibly stressed and worried
3	company when my was i downsizing felt incredibly stressed and worried
4	company when my i was downsizing felt incredibly stressed and worried
5	company was when my i downsizing felt incredibly stressed and worried
6	company was when my i downsizing felt incredibly and stressed worried
7	company when was my i downsizing incredibly felt and worried stressed
0	my heart would race for no reason and i even felt dizzy at times
1	my heart would race for no reason and i even dizzy felt at times
2	my would heart race for no reason and i even dizzy felt at times
3	would my heart race for no reason and i even dizzy felt at times
4	would my heart race for no reason and i even dizzy at fel

7	was caused her blindness a by genetic condition slowly that damaged eyes the
0	she told interviewers that being blind was an advantage to a pianist
1	she told interviewers that being blind was an advantage to pianist a
2	she interviewers told that being blind was an advantage to pianist a
3	interviewers she told that being blind was an advantage to pianist a
4	interviewers she told that blind being was an advantage to pianist a
5	interviewers she told blind that being was an advantage to pianist a
6	interviewers that she told blind being was an advantage to pianist a
7	interviewers she that told blind being an was to a advantage pianist
0	she had no visual distractions so she could concentrate more fully on her music
1	she had no visual distractions so she could concentrate more on fully her music
2	she no had visual distractions so she could concentrate more on fully her music
3	no she had visual distractions so she could concentrate more on fully her music
4	no she had visual distr

7	is that on a bud taste the tip the of tongue would respond only if you were something eating sweet
0	scientists now think that a single taste bud can be sensitive to all of the basic tastes
1	scientists now think a that single taste bud can be sensitive to all of the basic tastes
2	now scientists think a that single taste bud can be sensitive to all of the basic tastes
3	now scientists think a that single taste bud be can sensitive to all of the basic tastes
4	now scientists think a that single taste be bud can sensitive to all of the basic tastes
5	now scientists think single a that taste bud be can sensitive to all of the basic tastes
6	now scientists single think a that taste bud be can sensitive to all of the basic tastes
7	now scientists single think a that taste bud be can sensitive to all the of basic tastes
0	my sense of taste isn't very good so i tend to eat foods with very strong flavors
1	my sense of isn't taste very good so i tend to eat foods with very strong flavors
2	s

7	open in the or fractures compound broken bone punctured has the skin and an produced open wound
0	new ear piercings can become infected if they're not carefully cleaned
1	new piercings ear can become infected if they're not carefully cleaned
2	piercings new ear can become infected if they're not carefully cleaned
3	piercings new ear can infected become if they're not carefully cleaned
4	piercings new ear infected can become if they're not carefully cleaned
5	piercings can new ear infected become if they're not carefully cleaned
6	piercings can new ear infected become if they're carefully not cleaned
7	piercings new can ear infected become they're if carefully cleaned not
0	signs of infection include tenderness swelling or redness around the earlobe
1	signs infection of include tenderness swelling or redness around the earlobe
2	infection signs of include tenderness swelling or redness around the earlobe
3	infection signs of include swelling tenderness or redness around the earlobe
4	

1	opera is an form art in which singers and musicians perform a play that combines words and music
2	is opera an form art in which singers and musicians perform a play that combines words and music
3	is opera an form art in which singers musicians and perform a play that combines words and music
4	is opera an form art in which musicians singers and perform a play that combines words and music
5	is opera an in form art which singers musicians and perform a play that combines words and music
6	is opera in an form art which singers musicians and perform a play that combines words and music
7	is opera in an form art which singers musicians and perform a play combines that words and music
0	it incorporates many of the elements of spoken theatre such as acting scenery and costumes
1	it incorporates many of the elements of spoken theatre such acting as scenery and costumes
2	it many incorporates of the elements of spoken theatre such acting as scenery and costumes
3	many it incorporates of th

4	signed they their decided work on its imagery subject and and established a personal relationship with their patrons
5	signed they their on decided work its subject imagery and and established a personal relationship with their patrons
6	signed they on their decided work its subject imagery and and established a personal relationship with their patrons
7	signed they on their decided work its subject imagery and and established a relationship personal with their patrons
0	as a painter i learned to focus less on the actual scene and more on the painting itself
1	as a painter learned i to focus less on the actual scene and more on the painting itself
2	a as painter learned i to focus less on the actual scene and more on the painting itself
3	a as painter learned i to focus less the on actual scene and more on the painting itself
4	a as painter learned i to focus the less on actual scene and more on the painting itself
5	a as painter to learned i focus less the on actual scene and more o

3	worse it's even when you are submerged in the frigid directly waters beneath the huge animal
4	worse it's even when you are submerged in the frigid directly beneath waters the huge animal
5	worse it's even when are you submerged in the frigid directly beneath waters the huge animal
6	worse it's even are when you submerged in the frigid directly beneath waters the huge animal
7	worse when it's even are you submerged in the frigid waters beneath directly the animal huge
0	overhead giant paws set down on the ice as the polar bear pursued its prey
1	overhead giant paws set down on the ice as the bear polar pursued its prey
2	overhead paws giant set down on the ice as the bear polar pursued its prey
3	paws overhead giant set down on the ice as the bear polar pursued its prey
4	paws overhead giant set down on the ice as the bear pursued polar its prey
5	paws overhead giant set on down the ice as the bear pursued polar its prey
6	paws overhead giant on set down the ice as the bear pursued p

6	in spectacular castles provide dramatic locations a record of the history of stormy many regions
7	in dramatic spectacular castles provide locations a record the of stormy of history many regions
0	today they stand atop rocky cliffs overlooking beautiful landscapes and quaint villages
1	today they stand atop rocky cliffs overlooking beautiful landscapes and villages quaint
2	today stand they atop rocky cliffs overlooking beautiful landscapes and villages quaint
3	stand today they atop rocky cliffs overlooking beautiful landscapes and villages quaint
4	stand today they atop cliffs rocky overlooking beautiful landscapes and villages quaint
5	stand today they cliffs atop rocky overlooking beautiful landscapes and villages quaint
6	stand atop today they cliffs rocky overlooking beautiful landscapes and villages quaint
7	stand today atop they cliffs rocky beautiful overlooking and quaint landscapes villages
0	in the past they were home to battles plots and intrigue
1	in past the they were

1	others speculate that ramps zigzagged up the exterior walls of pyramid the
2	others that speculate ramps zigzagged up the exterior walls of pyramid the
3	that others speculate ramps zigzagged up the exterior walls of pyramid the
4	that others speculate ramps up zigzagged the exterior walls of pyramid the
5	that others speculate up ramps zigzagged the exterior walls of pyramid the
6	that ramps others speculate up zigzagged the exterior walls of pyramid the
7	that others ramps speculate up zigzagged exterior the of the walls pyramid
0	even beginner rock climbing takes courage and strength
1	even beginner rock climbing takes and courage strength
2	beginner even rock climbing takes and courage strength
3	even climbing beginner rock takes and courage strength
4	climbing even beginner rock takes and courage strength
5	climbing even beginner rock and takes courage strength
6	climbing beginner even takes rock and courage strength
7	even climbing takes beginner and courage rock strength
0	cli

4	of proper use ski poles improves their balance and adds to their flair skiing
5	of proper use ski improves poles their balance and adds to their flair skiing
6	of proper use improves ski poles their balance and adds to their flair skiing
7	of ski proper use improves poles their balance adds and flair their to skiing
0	it minimizes the need for upper body movements to regain lost balance while skiing
1	it minimizes the need for upper body movements to regain balance lost while skiing
2	it the minimizes need for upper body movements to regain balance lost while skiing
3	the it minimizes need for upper body movements to regain balance lost while skiing
4	the it minimizes need for upper body movements to regain balance while lost skiing
5	the it minimizes need upper for body movements to regain balance while lost skiing
6	the it minimizes upper need for body movements to regain balance while lost skiing
7	the need it minimizes upper for body movements regain to lost while balance skiing


7	tart apples some used very are make to cider
0	apples can be eaten raw roasted or baked in pies
1	apples be can eaten raw roasted or baked in pies
2	be apples can eaten raw roasted or baked in pies
3	be apples can eaten roasted raw or baked in pies
4	be apples can roasted eaten raw or baked in pies
5	be eaten apples can roasted raw or baked in pies
6	be eaten apples can roasted raw or baked pies in
7	be eaten apples can roasted raw baked or pies in
0	arson is the criminal act of burning a building or wildland
1	arson the is criminal act of burning a building or wildland
2	the arson is criminal act of burning a building or wildland
3	the arson is criminal of act burning a building or wildland
4	the arson is of criminal act burning a building or wildland
5	the criminal arson is of act burning a building or wildland
6	the criminal arson is of act burning a or building wildland
7	the arson criminal is of act a burning or wildland building
0	a fire investigation determines if a fire was c

4	bed a is piece a of furniture a used as place to sleep or have sex in
5	bed a is of piece a furniture used a as place to sleep or have sex in
6	bed a of is piece a furniture used a as place to sleep or have sex in
7	bed a of is piece a furniture used a as place to sleep have or sex in
0	a bed is made of a mattress and a box spring plus sheets pillows and covers
1	a bed is made of a mattress and a box plus spring sheets pillows and covers
2	a is bed made of a mattress and a box plus spring sheets pillows and covers
3	is a bed made of a mattress and a box plus spring sheets pillows and covers
4	is a bed made of a mattress and a box plus sheets spring pillows and covers
5	is a bed made a of mattress and a box plus sheets spring pillows and covers
6	is a bed a made of mattress and a box plus sheets spring pillows and covers
7	is made a bed a of mattress and a box spring sheets plus pillows covers and
0	in waterbeds the mattress is filled with water and in airbeds it is filled with air
1	

4	objects blacksmiths make horse like weapons shoes or iron furniture
5	objects like blacksmiths make horse weapons shoes or iron furniture
6	objects like blacksmiths make horse weapons shoes or furniture iron
7	objects like blacksmiths make horse weapons or shoes furniture iron
0	a blender is a kitchen appliance used to mix ingredients or puree food
1	a blender is a kitchen appliance used to mix ingredients puree or food
2	a is blender a kitchen appliance used to mix ingredients puree or food
3	is a blender a kitchen appliance used to mix ingredients puree or food
4	is a blender a kitchen appliance used to mix ingredients puree food or
5	is a blender a appliance kitchen used to mix ingredients puree food or
6	is a blender appliance a kitchen used to mix ingredients puree food or
7	is a a blender appliance kitchen used to ingredients mix or food puree
0	blenders have a glass or plastic container with a rotating blade in the bottom
1	blenders have a glass or plastic container with a rot

7	collect butterflies some of people because colorful their wings
0	a carpenter is an artist or manual laborer who works with wood
1	a carpenter is an artist or manual laborer who works wood with
2	a is carpenter an artist or manual laborer who works wood with
3	is a carpenter an artist or manual laborer who works wood with
4	is a carpenter an or artist manual laborer who works wood with
5	is a carpenter or an artist manual laborer who works wood with
6	is an a carpenter or artist manual laborer who works wood with
7	is a an carpenter or artist laborer manual works with who wood
0	carpenters build and repair houses furniture boats and other objects
1	carpenters and build repair houses furniture boats and other objects
2	and carpenters build repair houses furniture boats and other objects
3	and carpenters build repair furniture houses boats and other objects
4	and carpenters build furniture repair houses boats and other objects
5	and repair carpenters build furniture houses boats and ot

6	is a cod that a fish lives in the colder of the areas atlantic
7	is a a cod that fish lives in colder the areas the of atlantic
0	cod flesh is white and mild-tasting and flakes easily when cooked
1	cod is flesh white and mild-tasting and flakes easily when cooked
2	is cod flesh white and mild-tasting and flakes easily when cooked
3	is cod flesh white mild-tasting and and flakes easily when cooked
4	is cod flesh mild-tasting white and and flakes easily when cooked
5	is white cod flesh mild-tasting and and flakes easily when cooked
6	is white cod flesh mild-tasting and and flakes when easily cooked
7	is cod white flesh mild-tasting and flakes and when cooked easily
0	fresh cod is used for fish and chips salted cod for elaborate dishes
1	fresh cod is used for fish and chips salted cod elaborate for dishes
2	fresh is cod used for fish and chips salted cod elaborate for dishes
3	is fresh cod used for fish and chips salted cod elaborate for dishes
4	is fresh cod used for fish and chips sal

3	is a door a movable barrier to entrance or exit an from enclosed area
4	is a door a movable barrier to entrance or exit an enclosed from area
5	is a door a barrier movable to entrance or exit an enclosed from area
6	is a door barrier a movable to entrance or exit an enclosed from area
7	is a a door barrier movable to entrance exit or from enclosed an area
0	most doors have locks to control who gets to pass through them
1	most doors have locks to control who gets to pass them through
2	most have doors locks to control who gets to pass them through
3	have most doors locks to control who gets to pass them through
4	have most doors locks control to who gets to pass them through
5	have most doors control locks to who gets to pass them through
6	have locks most doors control to who gets to pass them through
7	have most locks doors control to gets who pass through to them
0	most doors are opened by pushing or pulling a handle after they are unlocked
1	most doors are opened by pushing or pul

1	eagles have hooked beaks for tearing flesh strong legs and talons sharp
2	eagles hooked have beaks for tearing flesh strong legs and talons sharp
3	hooked eagles have beaks for tearing flesh strong legs and talons sharp
4	hooked eagles have beaks tearing for flesh strong legs and talons sharp
5	hooked eagles have tearing beaks for flesh strong legs and talons sharp
6	hooked beaks eagles have tearing for flesh strong legs and talons sharp
7	hooked eagles beaks have tearing for strong flesh and sharp legs talons
0	eagles live in eyries and some species eat fish and others small mammals
1	eagles live in eyries and some species eat fish and small others mammals
2	eagles in live eyries and some species eat fish and small others mammals
3	in eagles live eyries and some species eat fish and small others mammals
4	in eagles live eyries and some species eat fish and small mammals others
5	in eagles live eyries some and species eat fish and small mammals others
6	in eagles live some eyries and

6	is the fovea of the center the human eye and most sensitive the part of it
7	is the the fovea of center the human eye and the sensitive most part it of
0	insects have compound eyes made up of eye units facing in many directions
1	insects have compound eyes made up of eye units facing many in directions
2	insects compound have eyes made up of eye units facing many in directions
3	compound insects have eyes made up of eye units facing many in directions
4	compound insects have eyes made up of eye units facing many directions in
5	compound insects have eyes up made of eye units facing many directions in
6	compound insects have up eyes made of eye units facing many directions in
7	compound eyes insects have up made of eye facing units in directions many
0	a farm is land used to grow plants or raise animals for food
1	a farm is land used to grow plants or raise for animals food
2	a is farm land used to grow plants or raise for animals food
3	is a farm land used to grow plants or raise for

6	be made floors may bare of concrete tile linoleum or floorboards wooden
7	be floors made may bare of tile concrete or wooden linoleum floorboards
0	people often put carpets on a floor for comfort and to dampen noise
1	people often put carpets on a floor for comfort and dampen to noise
2	people put often carpets on a floor for comfort and dampen to noise
3	put people often carpets on a floor for comfort and dampen to noise
4	put people often carpets on a floor for comfort and dampen noise to
5	put people often carpets a on floor for comfort and dampen noise to
6	put people often a carpets on floor for comfort and dampen noise to
7	put carpets people often a on floor for and comfort to noise dampen
0	in buildings the floor often has pipes and electrical wiring under it
1	in buildings the floor often has pipes and electrical wiring it under
2	in the buildings floor often has pipes and electrical wiring it under
3	the in buildings floor often has pipes and electrical wiring it under
4	th

6	pan a frying small is a pan with sloping sides a handle and
7	pan is a frying small a pan with sides sloping and handle a
0	a frying pan is used to fry sear and brown food
1	a pan frying is used to fry sear and brown food
2	pan a frying is used to fry sear and brown food
3	pan a frying is to used fry sear and brown food
4	pan a frying to is used fry sear and brown food
5	pan is a frying to used fry sear and brown food
6	pan is a frying to used fry sear brown and food
7	pan a is frying to used sear fry brown food and
0	the food is cooked on oil covering the bottom of the pan
1	the food is cooked on oil covering the bottom of pan the
2	the is food cooked on oil covering the bottom of pan the
3	is the food cooked on oil covering the bottom of pan the
4	is the food cooked oil on covering the bottom of pan the
5	is the food oil cooked on covering the bottom of pan the
6	is cooked the food oil on covering the bottom of pan the
7	is the cooked food oil on the covering of the bottom pan
0	fr

5	are hand grenades timed thrown and to explode a bit that after
6	are thrown hand grenades timed and to explode a bit that after
7	are hand thrown grenades timed and explode to bit after a that
0	grenades can also be fired from rifles or shoulder launchers
1	grenades also can be fired from rifles or shoulder launchers
2	also grenades can be fired from rifles or shoulder launchers
3	also grenades can be from fired rifles or shoulder launchers
4	also grenades can from be fired rifles or shoulder launchers
5	also be grenades can from fired rifles or shoulder launchers
6	also be grenades can from fired rifles or launchers shoulder
7	also be grenades can from fired or rifles launchers shoulder
0	tear gas grenades are thrown into the middle of a mob to disperse it
1	tear gas grenades are thrown into the middle of a to mob disperse it
2	tear grenades gas are thrown into the middle of a to mob disperse it
3	grenades tear gas are thrown into the middle of a to mob disperse it
4	grenades tear g

3	is an igloo a type of shelter made from blocks snow of by inuit
4	is an igloo a type of shelter made from blocks snow by of inuit
5	is an igloo a of type shelter made from blocks snow by of inuit
6	is an igloo of a type shelter made from blocks snow by of inuit
7	is a an igloo of type shelter made blocks from of by snow inuit
0	igloos are built in places where snow covers the land most of the time
1	igloos are built in places where snow covers the land of most the time
2	igloos built are in places where snow covers the land of most the time
3	built igloos are in places where snow covers the land of most the time
4	built igloos are in places where snow covers the land of the most time
5	built igloos are in where places snow covers the land of the most time
6	built igloos are where in places snow covers the land of the most time
7	built in igloos are where places snow covers land the most the of time
0	igloos are often built in the shape of a dome with an entrance tunnel
1	igloos are o

7	is a a library where place a collection books of documents media and is kept
0	librarians work at libraries and are experts at finding and organizing information
1	librarians work at libraries and are experts at finding and information organizing
2	librarians at work libraries and are experts at finding and information organizing
3	at librarians work libraries and are experts at finding and information organizing
4	at librarians work libraries are and experts at finding and information organizing
5	at librarians work are libraries and experts at finding and information organizing
6	at libraries librarians work are and experts at finding and information organizing
7	at librarians libraries work are and at experts and organizing finding information
0	people can go to public libraries and borrow books to take home for several weeks
1	people can go to public libraries and borrow books to home take for several weeks
2	people go can to public libraries and borrow books to home take for sev

5	mouthparts for mosquitoes have the piercing skin of plants and animals
6	mouthparts for mosquitoes have the piercing skin of and plants animals
7	mouthparts mosquitoes for have the piercing of skin and animals plants
0	female mosquitoes bite people and animals and suck their blood for protein
1	female mosquitoes bite people and animals and suck their blood protein for
2	female bite mosquitoes people and animals and suck their blood protein for
3	bite female mosquitoes people and animals and suck their blood protein for
4	bite female mosquitoes people animals and and suck their blood protein for
5	bite female mosquitoes animals people and and suck their blood protein for
6	bite people female mosquitoes animals and and suck their blood protein for
7	bite female people mosquitoes animals and suck and blood for their protein
0	mosquito bites can spread yellow fever malaria and other parasite diseases
1	mosquito can bites spread yellow fever malaria and other parasite diseases
2	can mosqu

7	serious murder crime a it because the deprives of their victim existence
0	killing in self-defense accidentally or in war is not considered murder
1	killing self-defense in accidentally or in war is not considered murder
2	self-defense killing in accidentally or in war is not considered murder
3	self-defense killing in accidentally in or war is not considered murder
4	self-defense killing in in accidentally or war is not considered murder
5	self-defense accidentally killing in in or war is not considered murder
6	self-defense accidentally killing in in or war is considered not murder
7	self-defense killing accidentally in in or is war considered murder not
0	a convicted murderer is often given a life sentence or the death penalty
1	a convicted murderer is often given a life sentence or death the penalty
2	a murderer convicted is often given a life sentence or death the penalty
3	murderer a convicted is often given a life sentence or death the penalty
4	murderer a convicted is often g

0	the piano has an enormous note range and pedals to change the sound quality
1	the piano has an enormous note range and pedals to the change sound quality
2	the has piano an enormous note range and pedals to the change sound quality
3	has the piano an enormous note range and pedals to the change sound quality
4	has the piano an enormous note range and pedals to the sound change quality
5	has the piano an note enormous range and pedals to the sound change quality
6	has the piano note an enormous range and pedals to the sound change quality
7	has an the piano note enormous range and to pedals change sound the quality
0	the piano repertoire is large and famous pianists can give solo concerts
1	the piano repertoire is large and famous pianists can give concerts solo
2	the repertoire piano is large and famous pianists can give concerts solo
3	repertoire the piano is large and famous pianists can give concerts solo
4	repertoire the piano is and large famous pianists can give concerts solo
5

7	can have some refrigerators small a at compartment temperature freezing
0	a refrigerator can have shelves and small compartments for different types of food
1	a refrigerator can have shelves and small compartments for different of types food
2	a can refrigerator have shelves and small compartments for different of types food
3	can a refrigerator have shelves and small compartments for different of types food
4	can a refrigerator have shelves and small compartments for different of food types
5	can a refrigerator have and shelves small compartments for different of food types
6	can a refrigerator and have shelves small compartments for different of food types
7	can have a refrigerator and shelves small compartments different for types food of
0	a refrigerator is generally white and people often put magnets on its surface
1	a refrigerator is generally white and people often put magnets its on surface
2	a is refrigerator generally white and people often put magnets its on surface
3	is a

7	be worked saws can hand by or powered an by electric or motor gas engine
0	saws are used extensively in forestry construction carpentry and medicine
1	saws used are extensively in forestry construction carpentry and medicine
2	used saws are extensively in forestry construction carpentry and medicine
3	used saws are extensively forestry in construction carpentry and medicine
4	used saws are forestry extensively in construction carpentry and medicine
5	used extensively saws are forestry in construction carpentry and medicine
6	used extensively saws are forestry in construction carpentry medicine and
7	used extensively saws are forestry in carpentry construction medicine and
0	a scientist studies and produces new knowledge about the physical world
1	a studies scientist and produces new knowledge about the physical world
2	studies a scientist and produces new knowledge about the physical world
3	studies a scientist and new produces knowledge about the physical world
4	studies a scientist

6	very keen sharks have and senses can detect fields electrical
7	very keen sharks have and senses detect can fields electrical
0	a ship is a large vehicle that floats and travels on water
1	a ship is a large vehicle that floats and travels water on
2	a is ship a large vehicle that floats and travels water on
3	is a ship a large vehicle that floats and travels water on
4	is a ship a vehicle large that floats and travels water on
5	is a ship vehicle a large that floats and travels water on
6	is a a ship vehicle large that floats and travels water on
7	is a a ship vehicle large floats that travels on and water
0	a ship can be powered in the ocean by sails steam or diesel engines
1	a ship can be powered in the ocean by sails or steam diesel engines
2	a can ship be powered in the ocean by sails or steam diesel engines
3	can a ship be powered in the ocean by sails or steam diesel engines
4	can a ship be powered in the ocean by sails or diesel steam engines
5	can a ship be in powered the oce

4	insects spiders are legs with eight that make silk
5	insects with spiders are legs eight that make silk
6	insects with spiders are legs eight make that silk
7	insects with spiders legs are eight make that silk
0	spiders build webs with silk and use them to catch insects
1	spiders webs build with silk and use them to catch insects
2	webs spiders build with silk and use them to catch insects
3	webs spiders build with and silk use them to catch insects
4	webs spiders build and with silk use them to catch insects
5	webs with spiders build and silk use them to catch insects
6	webs with spiders build and silk use them catch to insects
7	webs spiders with build and silk them use catch insects to
0	spiders use venomous fangs to paralyse their stuck prey
1	spiders venomous use fangs to paralyse their stuck prey
2	venomous spiders use fangs to paralyse their stuck prey
3	venomous spiders use fangs paralyse to their stuck prey
4	venomous spiders use paralyse fangs to their stuck prey
5	venomous

1	they are used to play videogames when connected to a console game
2	they used are to play videogames when connected to a console game
3	used they are to play videogames when connected to a console game
4	used they are to videogames play when connected to a console game
5	used they are videogames to play when connected to a console game
6	used to they are videogames play when connected to a console game
7	used they to are videogames play connected when a game to console
0	television content can be broadcast or received via cable or internet
1	television can content be broadcast or received via cable or internet
2	can television content be broadcast or received via cable or internet
3	can television content be or broadcast received via cable or internet
4	can television content or be broadcast received via cable or internet
5	can be television content or broadcast received via cable or internet
6	can be television content or broadcast received via or cable internet
7	can television be 

7	is a a train series connected of cars moves that between on stations a track
0	the train is pulled by a locomotive powered by diesel or electricity
1	the train is pulled by a locomotive powered by diesel electricity or
2	the is train pulled by a locomotive powered by diesel electricity or
3	is the train pulled by a locomotive powered by diesel electricity or
4	is the train pulled a by locomotive powered by diesel electricity or
5	is the train a pulled by locomotive powered by diesel electricity or
6	is pulled the train a by locomotive powered by diesel electricity or
7	is the pulled train a by powered locomotive diesel or by electricity
0	a freight train transports containers of goods or fuel over long distances
1	a freight train transports containers of goods or fuel over distances long
2	a train freight transports containers of goods or fuel over distances long
3	train a freight transports containers of goods or fuel over distances long
4	train a freight transports of containers go

1	vultures are scavenging birds feeding mostly on the carcasses of animals dead
2	vultures scavenging are birds feeding mostly on the carcasses of animals dead
3	scavenging vultures are birds feeding mostly on the carcasses of animals dead
4	scavenging vultures are birds mostly feeding on the carcasses of animals dead
5	scavenging vultures are mostly birds feeding on the carcasses of animals dead
6	scavenging birds vultures are mostly feeding on the carcasses of animals dead
7	scavenging vultures birds are mostly feeding the on of dead carcasses animals
0	many vultures have a bald head without feathers for losing heat
1	many have vultures a bald head without feathers for losing heat
2	have many vultures a bald head without feathers for losing heat
3	have many vultures a head bald without feathers for losing heat
4	have many vultures head a bald without feathers for losing heat
5	have a many vultures head bald without feathers for losing heat
6	have a many vultures head bald without fea

0	whales have been hunted for meat whale oil and ambergris
1	whales been have hunted for meat whale oil and ambergris
2	been whales have hunted for meat whale oil and ambergris
3	been whales have hunted meat for whale oil and ambergris
4	been whales have meat hunted for whale oil and ambergris
5	been hunted whales have meat for whale oil and ambergris
6	been hunted whales have meat for whale oil ambergris and
7	been hunted whales have meat for oil whale ambergris and
0	a window is a transparent opening in a wall to let light through
1	a window is a transparent opening in a wall to light let through
2	a is window a transparent opening in a wall to light let through
3	is a window a transparent opening in a wall to light let through
4	is a window a transparent opening in a wall to light through let
5	is a window a opening transparent in a wall to light through let
6	is a window opening a transparent in a wall to light through let
7	is a a window opening transparent in a to wall let throug

`filter out conditions used in Mollica et al. (2020)`

In [9]:
def create_scr_lists(filename):
    with open(os.path.join(stimuli_path,filename),"r") as f:
        reader = csv.reader(f, delimiter="\t")
        sentences = list(reader)
    Original = [sentence[1] + '.' for sentence in sentences if int(sentence[0]) == 0]
    Scr1 = [sentence[1] + '.' for sentence in sentences if int(sentence[0]) == 1]
    Scr3 = [sentence[1] + '.' for sentence in sentences if int(sentence[0]) == 3]
    Scr5 = [sentence[1] + '.' for sentence in sentences if int(sentence[0]) == 5]
    Scr7 = [sentence[1] + '.' for sentence in sentences if int(sentence[0]) == 7]
    
    return(Original, Scr1, Scr3, Scr5, Scr7)

In [10]:
Original_243, Scr1_243, Scr3_243, Scr5_243, Scr7_243 = create_scr_lists("stim_243sentences_scrambled.txt")
Original_384, Scr1_384, Scr3_384, Scr5_384, Scr7_384 = create_scr_lists("stim_384sentences_scrambled.txt")

Original = Original_243 + Original_384
Scr1 = Scr1_243 + Scr1_384
Scr3 = Scr3_243 + Scr3_384
Scr5 = Scr5_243 + Scr5_384
Scr7 = Scr7_243 + Scr7_384

In [11]:
condition_names = ["Original", "Scr1", "Scr3", "Scr5", "Scr7"]
condition_lists = [Original, Scr1, Scr3, Scr5, Scr7]
condition_dict = dict(zip(condition_names, condition_lists))
condition_dict

{'Original': ['beekeeping encourages the conservation of local habitats.',
  "it is in every beekeeper's interest to conserve local plants that produce pollen.",
  'as a passive form of agriculture it does not require that native vegetation be cleared to make way for crops.',
  'beekeepers also discourage the use of pesticides on crops because they could kill the honeybees.',
  'artisanal beekeepers go to extremes for their craft but their product is worth the effort.',
  'artisanal honey-making emphasizes quality and character over quantity and consistency.',
  'to produce the finest honey beekeepers become micromanagers of their honeybees.',
  'they scout the fields know when nectar flows and select the best ways to extract honey.',
  'as the beekeeper opens the hive the deep hum of 40000 bees fills the air.',
  'the beekeeper checks honey stores pollen supplies and the bee nursery.',
  "bees crawl across his bare arms and hands but they don't sting because they're gentle.",
  "i hav

`save xarrays with scrambled stimulus sets`

In [12]:
def make_datasets(scrambled_sentence_list, cond):
    """
    Input: text file for each scrambling condition
    Output: Saves pickle files for each condition
    (same xarray just different stimulus_set!)
    """
    stimuli_scrambled = stimuli_df.copy()
    stimuli_scrambled["sentence"] = scrambled_sentence_list

    fname = f"{savedir}/stimuli_{cond}.pkl"
    with open(fname, 'wb') as fout:
        pickle.dump(stimuli_scrambled, fout)

for cond, scrambled_sentence_list in condition_dict.items():
    make_datasets(scrambled_sentence_list, cond)
print("Done!")

Done!


## 2. Create low PMI scrambled dataset version (Mollica et al. 2020)

In [13]:
import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ckauf/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/ckauf/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
stimuli_files = ["stim_243sentences_nopunct.txt", "stim_384sentences_nopunct.txt"]

def create_low_pmi(stimuli_files):
    
    content_regex = re.compile('JJ.*|NN.*|RB.*|VB.*')
    #JJ = adjectives
    #NN = nouns
    #RB = adverbs
    #VB = verbs
    
    lowPMI = []

    for filename in stimuli_files:
        with open(os.path.join(stimuli_path,filename), "r") as f:
            sentences = f.readlines()
            sentences = [x.strip().lower() for x in sentences]

        for sent in sentences:
            words = re.split(r'\s+', sent) #don't use NLTK word tokenizer, or else build work-around for 's sentences
            pos_tagged = nltk.pos_tag(words)

            content = [x[0] for x in pos_tagged if re.match(content_regex,x[1])]
            function = [x[0] for x in pos_tagged if not re.match(content_regex,x[1])]

            content1 = []
            content2 = []
            for i, c in enumerate(content): #create two lists of content words (even and odd indexed content words in the sentence)
                if i % 2 == 0:
                    content1.append(c)
                else:
                    content2.append(c)
            function1 = []
            function2 = []
            for i, c in enumerate(function): #create two lists of function words (even and odd indexed function words in the sentence)
                if i % 2 == 0:
                    function1.append(c)
                else:
                    function2.append(c)
            lowPMI.append(' '.join(content1 + function1 + function2 + content2)+".")

    stimuli_lowPMI = stimuli_df.copy()
    stimuli_lowPMI["sentence"] = lowPMI

    fname = f"{savedir}/stimuli_lowPMI.pkl"
    with open(fname, 'wb') as fout:
        pickle.dump(stimuli_lowPMI, fout)

    return lowPMI

lowPMI = create_low_pmi(stimuli_files)
print("Done!")

Done!


## 3. Create random word-list dataset version (Mollica et al. 2020)

In [15]:
stim_243 = os.path.join(stimuli_path, 'stim_243sentences_nopunct.txt')
stim_384 = os.path.join(stimuli_path, 'stim_384sentences_nopunct.txt')

In [16]:
all_words = []
lengths = []

for l in open(stim_243, 'r'):
    l = l.strip().lower()
    
    words = re.split(r'\s+', l)
    lengths.append(len(words))
    all_words += words

for l in open(stim_384, 'r'):
    l = l.strip().lower()
    
    words = re.split(r'\s+', l)
    lengths.append(len(words))
    all_words += words

print(len(all_words))
print(len(lengths))
random.shuffle(all_words)

random_sentences = []
for n in lengths:
    random_sample = random.sample(all_words,n)
    new_sentence = ' '.join(random_sample) + '.'
    random_sentences.append(new_sentence)
    
    [all_words.remove(elm) for elm in random_sample] #remove selected words from list
    #print(len(all_words))
    
assert len(all_words) == 0, f"Not all words from the dataset have been used. Length of word list is {len(all_words)}!"

print(random_sentences[:10])

stimuli_random = stimuli_df.copy()
stimuli_random["sentence"] = random_sentences

fname = f"{savedir}/stimuli_random.pkl"
with open(fname, 'wb') as fout:
    pickle.dump(stimuli_random, fout)

7730
627
['three climb around the the spears vapor.', 'of shears metallic is in individual machine for fracture a singer can have.', "insulation body used i've control a fever shark a side crop humble pliers treated and places beekeeper large characteristic own.", 'stop stone the act are flood export and genetic cultures edge varieties reptiles used people.', 'polar almost a a she crime furniture many have the as taste if a of.', 'the sauce an hammers of log driving to and let.', 'was musical of the at that most apply ice swamp of.', 'cabin to meant were the drop glass objects the night the heads for hand same orchestra.', 'or in a locks of to can a the to four instantly replacement are things.', 'patients a plant is the regions small evidence eye bone solid.']


## 4. Create backward dataset version (control for Mollica et al. 2020)

In [17]:
backward_stim = []

for l in open(stim_243, 'r'):
    l = l.strip().lower()
    
    words = re.split(r'\s+', l)
    new_words = words[::-1]
    new_sent = ' '.join(new_words) + '.'
    backward_stim.append(new_sent)

for l in open(stim_384, 'r'):
    l = l.strip().lower()
    
    words = re.split(r'\s+', l)
    new_words = words[::-1]
    new_sent = ' '.join(new_words) + '.'
    backward_stim.append(new_sent)

print(len(backward_stim))
print(words)
print(new_words)
print(new_sent)

stimuli_backward = stimuli_df.copy()
stimuli_backward["sentence"] = backward_stim

fname = f"{savedir}/stimuli_backward.pkl"
with open(fname, 'wb') as fout:
    pickle.dump(stimuli_backward, fout)
stimuli_backward

627
['a', 'woman', 'has', 'different', 'reproductive', 'organs', 'than', 'a', 'man']
['man', 'a', 'than', 'organs', 'reproductive', 'different', 'has', 'woman', 'a']
man a than organs reproductive different has woman a.


Unnamed: 0,sentence,sentence_num,stimulus_id,experiment,story,passage_index,passage_label,passage_category
0,habitats local of conservation the encourages ...,0,243sentences.0,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
1,pollen produce that plants local conserve to i...,1,243sentences.1,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
2,crops for way make to cleared be vegetation na...,2,243sentences.2,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
3,honeybees the kill could they because crops on...,3,243sentences.3,243sentences,243sentences.beekeeping,1,beekeeping,beekeeping
4,effort the worth is product their but craft th...,4,243sentences.4,243sentences,243sentences.beekeeping,2,beekeeping,beekeeping
...,...,...,...,...,...,...,...,...
622,inside insulation increase to panes multiple h...,379,384sentences.379,384sentences,384sentences.building_part,95,Window,building_part
623,adult human female a is woman a.,380,384sentences.380,384sentences,384sentences.human,96,Woman,human
624,caregiver a as seen stereotypically is woman a.,381,384sentences.381,384sentences,384sentences.human,96,Woman,human
625,children bear and pregnant become can woman a.,382,384sentences.382,384sentences,384sentences.human,96,Woman,human


In [18]:
backward_stim[:5]

['habitats local of conservation the encourages beekeeping.',
 "pollen produce that plants local conserve to interest beekeeper's every in is it.",
 'crops for way make to cleared be vegetation native that require not does it agriculture of form passive a as.',
 'honeybees the kill could they because crops on pesticides of use the discourage also beekeepers.',
 'effort the worth is product their but craft their for extremes to go beekeepers artisanal.']

## 5. Create random LowPMI dataset version (control for Mollica et al. 2020)

### requires OM

`go to folder "add_random_lowPMI_condition" & follow instructions there.`