# Import and get ready

In [1]:
!pip install autocorrect          
from autocorrect import Speller   # things we need for spell checking
check = Speller(lang='en')

import csv                        # csv is for importing and working with csv files

import datetime
date = datetime.date.today()

import nltk                       # get nltk 
from nltk import word_tokenize    # and some of its key functions
from nltk import sent_tokenize    
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
from nltk.corpus import wordnet                    # Finally, things we need for lemmatising!
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer() 
nltk.download('averaged_perceptron_tagger')        # Like a POS-tagger...
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('webtext')

import numpy as np
import os                         # os is a module for navigating your machine (e.g., file directories).
import pandas as pd
import statistics
import re                         # things we need for RegEx corrections

English_punctuation = "-!\"#$%&()'*+,./:;<=>?@[\]^_`{|}~''“”"      # Things for removing punctuation, stopwords and empty strings
table_punctuation = str.maketrans('','', English_punctuation)  

print("Succesfully imported necessary modules")    # The print statement is just a bit of encouragement!





[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\mzyssjkc\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mzyssjkc\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mzyssjkc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mzyssjkc\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package webtext to
[nltk_data]     C:\Users\mzyssjkc\AppData\Roaming\nltk_data...
[nltk_data]   Package webtext is already up-to-date!


Succesfully imported necessary modules


In [2]:
print(os.listdir("..\\for_analysis\\ESHG")) # This is how to see the contents of any folders shown in the last contents check
print(os.listdir("..\\counts\\ESHG")) # This is how to see the contents of any folders shown in the last contents check


['2001abstractICHG.txt', '2002Abstracts.txt', '2003Abstracts.txt', '2004Abstracts.txt', '2005Abstracts.txt', '2006Abstracts.txt', '2007Abstracts.txt', '2008Abstracts.txt', '2009Abstracts.txt', '2010Abstracts.txt', '2011Abstracts.txt', '2012Abstracts.txt', '2013Abstracts.txt', '2014Abstracts.txt', '2015Abstracts.txt', '2016Abstracts.txt', '2017 electronic posters.txt', '2017 oral presentations.txt', '2017 posters.txt', '2018 electronic posters.txt', '2018 EMPAG.txt', '2018 oral presentation.txt', '2018 posters.txt', '2019 oral presentation.txt', '2019 posters.txt', '2019 posters2.txt', '2020 eposters.txt', '2020 interactive eposter.txt', '2020 oral presentation.txt', '2021 eposters.txt', '2021 oral presentations.txt']
['select.csv', 'totals.csv']


# Read in and check useful things

In [3]:
totals_df = pd.read_csv('..\\counts\\ESHG\\totals.csv', header=None)
totals_df.columns = ["filename", "abstracts"]
totals_df

Unnamed: 0,filename,abstracts
0,2001abstractICHG,7805
1,2002Abstracts,5517
2,2003Abstracts,4539
3,2004Abstracts,13
4,2005Abstracts,7289
5,2006Abstracts,6026
6,2007Abstracts,7283
7,2008Abstracts,9794
8,2009Abstracts,7981
9,2010Abstracts,7795


In [142]:
raw_select_abstracts = []

with open('..\\counts\\ESHG\\select.csv', newline='', encoding = "ISO-8859-1") as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        raw_select_abstracts.append(row)
        
select_abstracts = (list(filter(lambda x: x, raw_select_abstracts)))
    

In [5]:
select_df = pd.DataFrame.from_records(select_abstracts)
total_select = select_df.iloc[:,0].value_counts()


In [6]:
autism_count = select_df.iloc[:,0].value_counts().rename_axis('unique_values').to_frame('counts')

autism_count = autism_count.reset_index()
autism_count.columns = ['filename', 'autism_abstracts']

autism_count

Unnamed: 0,filename,autism_abstracts
0,2013Abstracts,137
1,2016Abstracts,132
2,2015Abstracts,129
3,2014Abstracts,116
4,2012Abstracts,108
5,2011Abstracts,104
6,2018 posters,98
7,2019 posters,92
8,2020 interactive eposter,85
9,2010Abstracts,74


In [7]:
merged_counts = pd.merge(totals_df, autism_count, on = 'filename')


In [8]:
merged_counts


Unnamed: 0,filename,abstracts,autism_abstracts
0,2001abstractICHG,7805,21
1,2002Abstracts,5517,33
2,2003Abstracts,4539,19
3,2005Abstracts,7289,32
4,2006Abstracts,6026,35
5,2007Abstracts,7283,41
6,2008Abstracts,9794,72
7,2009Abstracts,7981,42
8,2010Abstracts,7795,74
9,2011Abstracts,9133,104


In [12]:
merged_counts['year'] = merged_counts['filename'].str.extract(r'(\d{4})')
merged_counts


Unnamed: 0,filename,abstracts,autism_abstracts,year
0,2001abstractICHG,7805,21,2001
1,2002Abstracts,5517,33,2002
2,2003Abstracts,4539,19,2003
3,2005Abstracts,7289,32,2005
4,2006Abstracts,6026,35,2006
5,2007Abstracts,7283,41,2007
6,2008Abstracts,9794,72,2008
7,2009Abstracts,7981,42,2009
8,2010Abstracts,7795,74,2010
9,2011Abstracts,9133,104,2011


In [15]:
print(merged_counts['abstracts'].sum())
print(merged_counts['autism_abstracts'].sum())

167336
1694


In [17]:
print(merged_counts[['year', 'abstracts']].groupby('year').sum('abstracts'))
#print(merged_counts['autism_abstracts'].sum())

      abstracts
year           
2001       7805
2002       5517
2003       4539
2005       7289
2006       6026
2007       7283
2008       9794
2009       7981
2010       7795
2011       9133
2012       8777
2013      10626
2014       9041
2015       7978
2016      10734
2017      10405
2018      10429
2019       8250
2020       9590
2021       8344


# Working with the contents of the selected abstracts

##  Dump the content of the selected abtsracts into one big string

In [28]:
bag_of_abstracts = ""

for abstract in (select_abstracts):
    bag_of_abstracts += abstract[3]
        

## tokenize that big string, and transform the tokens into lowercase

In [19]:
abstract_token_word = word_tokenize(bag_of_abstracts)
abstract_token_word_lower = [word.lower() for word in abstract_token_word]

print(abstract_token_word_lower[:10]) 

['p08', '.10', 'rs4307059', 'and', 'rs4141463', 'study', 'in', 'an', 'italian', 'cohort']


## Remove the punctuation from the lowercased tokens

In [20]:
abstract_t_w_l_np = [w.translate(table_punctuation) for w in abstract_token_word_lower]  
                                                               # Iterate over corpus_words, turning punctuation to nothing.
print(abstract_t_w_l_np[:10])       

['p08', '10', 'rs4307059', 'and', 'rs4141463', 'study', 'in', 'an', 'italian', 'cohort']


In [21]:
abstract_t_w_l_np = (list(filter(lambda x: x, abstract_t_w_l_np)))
print(abstract_t_w_l_np[:10])

['p08', '10', 'rs4307059', 'and', 'rs4141463', 'study', 'in', 'an', 'italian', 'cohort']


## Remove stopwords

In [None]:
print(sorted(stop_words))        # just an option to check what counts as a stopword if you want to see

In [22]:
abstract_t_w_l_np_nsw = []

for word in abstract_t_w_l_np:
    if word not in stop_words:
        abstract_t_w_l_np_nsw.append(word)
        
        
print(abstract_t_w_l_np_nsw[:10])

['p08', '10', 'rs4307059', 'rs4141463', 'study', 'italian', 'cohort', 'neck', 'surgery', 'university']


## Spell check the lowercased "real word" tokens

In [None]:
len(abstract_t_w_l_np_nsw)

In [None]:
# runs forever without finishing. Find alternative or skip it?

abstract_t_w_l_np_cs = []

for word in abstract_t_w_l_np_nsw:
    abstract_t_w_l_np_cs.append(check(word))    

print(abstract_t_w_l_np_cs[:10])

In [23]:
from nltk.stem.porter import PorterStemmer

porter = PorterStemmer()
abstract_t_w_l_np_nsw_stem = [porter.stem(word) for word in abstract_t_w_l_np_nsw]
print(abstract_t_w_l_np_nsw_stem[:10])

['p08', '10', 'rs4307059', 'rs4141463', 'studi', 'italian', 'cohort', 'neck', 'surgeri', 'univers']


In [24]:
from collections import Counter
counts = Counter(abstract_t_w_l_np_nsw_stem)
print(type(counts))

<class 'collections.Counter'>


In [25]:
print(counts.most_common(100))

[('gene', 3535), ('none', 3535), ('patient', 3516), ('genet', 3106), ('de', 2423), ('univers', 2210), ('disord', 2201), ('autism', 2057), ('mutat', 1878), ('asd', 1736), ('clinic', 1705), ('studi', 1582), ('syndrom', 1560), ('delet', 1557), ('associ', 1535), ('variant', 1439), ('case', 1427), ('chromosom', 1398), ('hospit', 1252), ('use', 1240), ('medic', 1236), ('phenotyp', 1228), ('analysi', 1171), ('result', 1163), ('report', 1160), ('unit', 1126), ('c', 1115), ('famili', 1099), ('j', 1091), ('e', 1033), ('genom', 1025), ('two', 983), ('franc', 978), ('spectrum', 973), ('p', 958), ('includ', 958), ('region', 953), ('present', 947), ('sequenc', 934), ('diseas', 929), ('featur', 921), ('cnv', 906), ('identifi', 881), ('caus', 872), ('sever', 854), ('show', 804), ('delay', 794), ('disabl', 784), ('human', 770), ('itali', 762), ('institut', 756), ('one', 752), ('l', 731), ('medicin', 725), ('2', 720), ('intellectu', 713), ('r', 712), ('molecular', 707), ('duplic', 694), ('cell', 687), (

## Person-first and identity first

In [40]:
!pip install spacy
import re 
import string 
import nltk 
import spacy 
import pandas as pd 
import numpy as np 
import math 
from tqdm import tqdm 

from spacy.matcher import Matcher 
from spacy.tokens import Span 
from spacy import displacy 

pd.set_option('display.max_colwidth', 200)

ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'c:\\python39\\Scripts\\spacy.exe' -> 'c:\\python39\\Scripts\\spacy.exe.deleteme'



Collecting spacy
  Using cached spacy-3.4.1-cp39-cp39-win_amd64.whl (11.8 MB)
Collecting langcodes<4.0.0,>=3.2.0
  Using cached langcodes-3.3.0-py3-none-any.whl (181 kB)
Installing collected packages: langcodes, spacy


In [45]:
!pip install spacy -q
import spacy
!python -m spacy download en_core_web_lg -q
from nltk.corpus import webtext

nlp = spacy.load('en_core_web_lg')



[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_lg')


In [41]:
person_identity = nltk.sent_tokenize(bag_of_abstracts)


In [42]:
type(person_identity)

list

In [111]:
person =""

for sentence in person_identity:
    if any(s in sentence for s in ['autisic', 'Autisic', 'autism', 'Autism']):
        person += sentence


In [112]:
len(person)

631947

In [113]:
for tok in person_doc: 
  print(tok.text, "-->",tok.dep_,"-->", tok.pos_)

Patients --> nsubj --> NOUN
4 --> nummod --> NUM
and --> cc --> CCONJ
5 --> conj --> NUM
presented --> ROOT --> VERB
with --> prep --> ADP
autistic --> amod --> ADJ
features --> pobj --> NOUN
. --> punct --> PUNCT
Primary --> amod --> ADJ
microcephalies --> ROOT --> NOUN
( --> punct --> PUNCT
MicroCephaly --> compound --> PROPN
Primary --> compound --> PROPN
Hereditary --> appos --> PROPN
, --> punct --> PUNCT
MCPH --> appos --> PROPN
) --> punct --> PUNCT
We --> nsubj --> PRON
report --> ROOT --> VERB
two --> nummod --> NUM
children --> dobj --> NOUN
with --> prep --> ADP
autistic --> amod --> ADJ
behavior --> pobj --> NOUN
, --> punct --> PUNCT
mental --> amod --> ADJ
retardation --> conj --> NOUN
and --> cc --> CCONJ
have --> aux --> AUX
often --> advmod --> ADV
been --> auxpass --> AUX
presented --> conj --> VERB
as --> prep --> ADP
a --> det --> DET
developmental --> amod --> ADJ
disorders --> pobj --> NOUN
resulting --> acl --> VERB
in --> prep --> ADP
dysmorphic --> amod --> ADJ

children --> pobj --> NOUN
in --> prep --> ADP
western --> amod --> ADJ
saudi --> compound --> PROPN
arabia --> compound --> PROPN
autism --> compound --> NOUN
spectrum --> compound --> NOUN
disorders --> pobj --> NOUN
( --> punct --> PUNCT
ASD).To --> nsubj --> PROPN
identify --> parataxis --> VERB
these --> det --> DET
likely --> amod --> ADJ
functional --> amod --> ADJ
M. --> compound --> PROPN
M. --> compound --> PROPN
Alwasiyah1 --> dobj --> PROPN
, --> punct --> PUNCT
C. --> compound --> PROPN
Trujillo2 --> appos --> NOUN
; --> punct --> PUNCT
elements --> appos --> NOUN
, --> punct --> PUNCT
we --> nsubj --> PRON
combined --> parataxis --> VERB
three --> dobj --> NUM
previously --> advmod --> ADV
published --> acl --> VERB
large --> amod --> ADJ
cohorts --> dobj --> NOUN
1Aziziah --> compound --> PROPN
Maternity --> appos --> PROPN
& --> cc --> CCONJ
Children --> compound --> PROPN
Hospital --> conj --> PROPN
, --> punct --> PUNCT
Jeddah --> conj --> PROPN
, --> punct --> PUNCT


Although --> mark --> SCONJ
the --> det --> DET
molecular --> amod --> ADJ
basis --> nsubj --> NOUN
of --> prep --> ADP
this --> det --> DET
disorder --> pobj --> NOUN
has --> aux --> AUX
not --> neg --> PART
been --> advcl --> AUX
totally --> advmod --> ADV
dchromosomal --> acomp --> ADJ
abberations.(Oxford --> compound --> PROPN
Gene --> compound --> PROPN
Technology --> dobj --> PROPN
, --> punct --> PUNCT
Oxford --> nmod --> PROPN
, --> punct --> PUNCT
UK --> nmod --> PROPN
) --> punct --> PUNCT
microchips --> appos --> NOUN
and --> cc --> CCONJ
scanned --> conj --> VERB
with --> prep --> ADP
Agilent --> compound --> PROPN
Microarray --> compound --> PROPN
Scanner --> pobj --> NOUN
( --> punct --> PUNCT
Agilent --> compound --> PROPN
Technologies --> nsubjpass --> PROPN
, --> punct --> PUNCT
Palo --> compound --> PROPN
Alto --> npadvmod --> PROPN
, --> punct --> PUNCT
CAP1 --> compound --> PROPN
  --> dep --> SPACE
autism --> compound --> NOUN
spectrum --> compound --> NOUN
disorde

p>0.05 --> appos --> PROPN
) --> punct --> PUNCT
, --> punct --> PUNCT
there --> expl --> PRON
was --> ccomp --> VERB
a --> det --> DET
between --> prep --> ADP
serum --> compound --> NOUN
visfatin --> compound --> NOUN
levels --> pobj --> NOUN
and --> cc --> CCONJ
the --> det --> DET
serum --> compound --> NOUN
insulin --> conj --> NOUN
, --> punct --> PUNCT
TNF --> conj --> PROPN
- --> punct --> PUNCT
Î± --> dep --> SPACE
, --> punct --> PUNCT
BMI --> conj --> PROPN
, --> punct --> PUNCT
significant --> amod --> ADJ
activity --> compound --> NOUN
increase --> appos --> NOUN
in --> prep --> ADP
the --> det --> DET
autistic --> amod --> ADJ
groups --> pobj --> NOUN
with --> prep --> ADP
R --> compound --> PROPN
allele --> compound --> NOUN
number --> pobj --> NOUN
HbA1c --> attr --> PROPN
, --> punct --> PUNCT
insulin --> compound --> NOUN
resistance --> conj --> NOUN
, --> punct --> PUNCT
proteinuria --> conj --> NOUN
and --> cc --> CCONJ
creatinine --> compound --> NOUN
clearence -->

BarreÃ±a --> appos --> PROPN
; --> punct --> PUNCT
United --> compound --> PROPN
States --> appos --> PROPN
. --> punct --> PUNCT
Several --> amod --> ADJ
from --> prep --> ADP
a --> det --> DET
promoter --> pobj --> NOUN
located --> acl --> VERB
upstream --> advmod --> NOUN
of --> prep --> ADP
exon --> pobj --> NOUN
1 --> nummod --> NUM
, --> punct --> PUNCT
while --> mark --> SCONJ
the --> det --> DET
Î² --> compound --> PROPN
neurexins --> nsubj --> NOUN
are --> advcl --> AUX
tran- --> dep --> VERB
recent --> amod --> ADJ
studies --> nsubj --> NOUN
have --> aux --> AUX
implicated --> ROOT --> VERB
NL --> dobj --> PROPN
in --> prep --> ADP
autistic --> amod --> ADJ
disorders --> pobj --> NOUN
and --> cc --> CCONJ
mental --> amod --> ADJ
retar- --> conj --> NOUN
scribed --> acl --> VERB
from --> prep --> ADP
an --> det --> DET
intragenic --> amod --> ADJ
downstream --> amod --> ADJ
promoter --> pobj --> NOUN
. --> punct --> PUNCT
He --> nsubj --> PRON
had --> ROOT --> VERB
some --> de

and --> cc --> CCONJ
some --> det --> DET
autistic --> amod --> ADJ
fea- --> dep --> CCONJ
healthy --> amod --> ADJ
parents --> conj --> NOUN
( --> punct --> PUNCT
mother --> appos --> NOUN
20 --> nummod --> NUM
years --> appos --> NOUN
, --> punct --> PUNCT
father --> appos --> NOUN
33 --> nummod --> NUM
years --> appos --> NOUN
) --> punct --> PUNCT
.Specifi- --> punct --> PUNCT
Autism --> nsubj --> NOUN
is --> ccomp --> AUX
a --> det --> DET
severe --> amod --> ADJ
neurodevelopmental --> amod --> ADJ
disorder --> attr --> NOUN
, --> punct --> PUNCT
characterized --> acl --> VERB
by --> agent --> ADP
impaired --> pobj --> ADJ
cally --> advmod --> ADV
, --> punct --> PUNCT
duplications --> nsubjpass --> NOUN
were --> auxpass --> AUX
found --> ROOT --> VERB
in --> prep --> ADP
the --> det --> DET
Zappella --> compound --> PROPN
variant --> pobj --> NOUN
, --> punct --> PUNCT
the --> det --> DET
Rett --> compound --> PROPN
variant --> appos --> NOUN
with --> prep --> ADP
verbal --> amod

. --> punct --> PUNCT
The --> det --> DET
syndrome --> nsubjpass --> NOUN
is --> auxpass --> AUX
caused --> ROOT --> VERB
by --> agent --> ADP
No --> det --> DET
dysmorphic --> amod --> ADJ
syndrome --> nsubj --> NOUN
was --> ccomp --> AUX
identifiable --> acomp --> ADJ
; --> punct --> PUNCT
therefore --> advmod --> ADV
she --> nsubj --> PRON
could --> aux --> AUX
be --> ccomp --> AUX
an --> det --> DET
mutations --> attr --> NOUN
in --> prep --> ADP
the --> det --> DET
ESCO2 --> compound --> PROPN
gene --> pobj --> NOUN
, --> punct --> PUNCT
which --> nsubjpass --> PRON
is --> auxpass --> AUX
located --> relcl --> VERB
at --> prep --> ADP
locus --> compound --> NOUN
8p21.1 --> pobj --> NUM
, --> punct --> PUNCT
and --> cc --> CCONJ
example --> conj --> NOUN
of --> prep --> ADP
an --> det --> DET
unusual --> amod --> ADJ
clinical --> amod --> ADJ
picture --> pobj --> NOUN
with --> prep --> ADP
positive --> amod --> ADJ
autistic --> amod --> ADJ
, --> punct --> PUNCT
psychotic --> amod 

) --> punct --> PUNCT
is --> conj --> AUX
an --> det --> DET
X --> nmod --> NOUN
linked --> acl --> VERB
dominant --> amod --> ADJ
inherited --> amod --> VERB
neuronal --> amod --> ADJ
role --> attr --> NOUN
of --> prep --> ADP
anti --> amod --> ADJ
- --> amod --> ADJ
neuronal --> amod --> ADJ
antibodies --> pobj --> NOUN
in --> prep --> ADP
autistic --> amod --> ADJ
syndromes --> compound --> NOUN
migration --> compound --> NOUN
disorder --> pobj --> NOUN
with --> prep --> ADP
high --> amod --> ADJ
perinatal --> compound --> ADJ
lethality --> pobj --> NOUN
of --> prep --> ADP
hemizygous --> amod --> ADJ
males --> pobj --> NOUN
. --> punct --> PUNCT
Further --> amod --> ADV
wide --> amod --> ADJ
- --> punct --> PUNCT
scale --> compound --> NOUN
studies --> nsubj --> NOUN
are --> ROOT --> AUX
necessary --> acomp --> ADJ
to --> aux --> PART
shed --> xcomp --> VERB
light --> dobj --> NOUN
on --> prep --> ADP
the --> det --> DET
rotopia --> pobj --> NOUN
( --> punct --> PUNCT
PVNH --> appo

severe --> amod --> ADJ
ID --> nmod --> NOUN
, --> punct --> PUNCT
developmental --> conj --> ADJ
and --> cc --> CCONJ
speech --> conj --> NOUN
delay --> dobj --> NOUN
with --> prep --> ADP
acquisition --> compound --> NOUN
Autism --> pobj --> PROPN
of --> prep --> ADP
a --> quantmod --> DET
few --> amod --> ADJ
words --> pobj --> NOUN
and --> cc --> CCONJ
secondary --> amod --> ADJ
regression --> conj --> NOUN
with --> prep --> ADP
absence --> pobj --> NOUN
of --> prep --> ADP
speech --> pobj --> NOUN
, --> punct --> PUNCT
attention --> compound --> NOUN
F. --> compound --> PROPN
La --> compound --> PROPN
Carpia1 --> appos --> PROPN
, --> punct --> PUNCT
M. --> compound --> PROPN
Gentile2 --> conj --> PROPN
, --> punct --> PUNCT
C. --> compound --> PROPN
Schwartz3 --> conj --> PROPN
, --> punct --> PUNCT
G. --> compound --> PROPN
Neri1 --> conj --> PROPN
, --> punct --> PUNCT
F. --> compound --> PROPN
Gurrieri1 --> appos --> PROPN
, --> punct --> PUNCT
E. --> compound --> PROPN
Sangio

out --> prt --> ADP
genotype --> compound --> NOUN
distribution --> nsubj --> NOUN
and --> cc --> CCONJ
allele --> compound --> NOUN
frequency --> conj --> NOUN
of --> prep --> ADP
T1128C --> amod --> ADJ
variant --> pobj --> NOUN
between --> prep --> ADP
mice --> pobj --> NOUN
show --> ROOT --> VERB
an --> det --> DET
autistic --> amod --> ADJ
- --> punct --> PUNCT
like --> amod --> ADJ
phenotype --> dobj --> NOUN
and --> cc --> CCONJ
some --> det --> DET
ASD --> conj --> PROPN
( --> punct --> PUNCT
Autism --> compound --> NOUN
Spectrum --> compound --> NOUN
cannabis --> compound --> NOUN
users --> appos --> NOUN
and --> cc --> CCONJ
controls --> conj --> NOUN
( --> punct --> PUNCT
p>0.05).We --> compound --> NOUN
genes --> appos --> NOUN
, --> punct --> PUNCT
which --> nsubjpass --> PRON
have --> aux --> AUX
been --> auxpass --> AUX
suggested --> relcl --> VERB
to --> prep --> ADP
playZ --> pobj --> PROPN
aN --> det --> DET
Fro2l7e7 --> nsubj --> PROPN
in --> prep --> ADP
autistic --

, --> punct --> PUNCT
Villejuif --> conj --> PROPN
, --> punct --> PUNCT
France --> conj --> PROPN
, --> punct --> PUNCT
GTG --> conj --> PROPN
; --> punct --> PUNCT
FISH --> nsubj --> VERB
with --> prep --> ADP
DNA --> compound --> NOUN
samples --> pobj --> NOUN
WC --> compound --> PROPN
P1.20 --> compound --> PROPN
D. --> compound --> PROPN
Guadagnolo1 --> appos --> PROPN
, --> punct --> PUNCT
E. --> compound --> PROPN
Marchionni1 --> conj --> PROPN
, --> punct --> PUNCT
F. --> compound --> PROPN
Di --> compound --> PROPN
Palma1 --> conj --> PROPN
, --> punct --> PUNCT
An --> det --> DET
interesting --> amod --> ADJ
case --> appos --> NOUN
: --> punct --> PUNCT
30 --> nummod --> NUM
- --> punct --> PUNCT
year --> npadvmod --> NOUN
- --> punct --> PUNCT
old --> amod --> ADJ
male --> amod --> ADJ
patient --> appos --> NOUN
with --> prep --> ADP
myotonic --> amod --> ADJ
dystrophy --> compound --> NOUN
typeI --> pobj --> PROPN
has --> aux --> AUX
got --> ROOT --> VERB
normal --> amod --

delay --> dobj --> NOUN
with --> prep --> ADP
autistic --> amod --> ADJ
traits --> pobj --> NOUN
. --> punct --> PUNCT
Patologic --> amod --> ADJ
result --> nsubjpass --> NOUN
was --> auxpass --> AUX
obtained --> ROOT --> VERB
in --> prep --> ADP
13,1 --> nummod --> NUM
% --> pobj --> NOUN
of --> prep --> ADP
patients --> pobj --> NOUN
, --> punct --> PUNCT
without --> prep --> ADP
stadistical --> pobj --> NOUN
di- --> dep --> X
The --> det --> DET
aCGH --> nmod --> NOUN
( --> punct --> PUNCT
Nimblegen --> nsubj --> PROPN
, --> punct --> PUNCT
720 --> nummod --> NUM
K --> nmod --> PROPN
) --> punct --> PUNCT
performR --> compound --> PROPN
ed --> appos --> PROPN
defined --> ROOT --> VERB
a --> det --> DET
60Kb --> nummod --> NUM
deletion --> dobj --> NOUN
on --> prep --> ADP
ferences --> pobj --> NOUN
between --> prep --> ADP
sexs --> pobj --> PROPN
. --> punct --> PUNCT
The --> det --> DET
genetics --> nmod --> NOUN
diagnostic --> amod --> ADJ
yield --> nsubj --> NOUN
obtained --> acl

, --> punct --> PUNCT
hypertelorism --> conj --> NOUN
, --> punct --> PUNCT
fullcheeks --> conj --> PROPN
, --> punct --> PUNCT
enlargedears --> appos --> NOUN
. --> punct --> PUNCT
Thechild --> compound --> PROPN
Institute --> appos --> PROPN
, --> punct --> PUNCT
Departments --> appos --> PROPN
of --> prep --> ADP
Neurology --> pobj --> PROPN
and --> cc --> CCONJ
Pediatrics --> conj --> PROPN
, --> punct --> PUNCT
Johns --> compound --> PROPN
Hopkins --> compound --> PROPN
exhibitsautisticbehavior --> appos --> NOUN
. --> punct --> PUNCT
Genetictestingincludeddetermination --> compound --> PROPN
SchoolofMedicine --> nmod --> PROPN
, --> punct --> PUNCT
Baltimore --> npadvmod --> PROPN
, --> punct --> PUNCT
MD --> conj --> PROPN
, --> punct --> PUNCT
USA,26GCSSeqOIA --> conj --> PROPN
, --> punct --> PUNCT
Paris --> conj --> PROPN
, --> punct --> PUNCT
France --> conj --> PROPN
, --> punct --> PUNCT
ofthekaryotypeoftheprobandandparentsbyseveralmethods --> appos --> NOUN
: --> punct --

, --> punct --> PUNCT
karyotype --> compound --> PROPN
M. --> compound --> PROPN
Seyfarth1 --> npadvmod --> PROPN
, --> punct --> PUNCT
A. --> compound --> PROPN
Roth --> compound --> PROPN
- --> punct --> PUNCT
Isigkeit2 --> appos --> PROPN
, --> punct --> PUNCT
L. --> compound --> PROPN
Hasselbach3 --> appos --> PROPN
, --> punct --> PUNCT
P. --> nsubj --> PROPN
Schmucker2 --> relcl --> VERB
in --> prep --> ADP
folic --> nmod --> NOUN
acid --> nmod --> ADJ
deficient --> amod --> ADJ
medium --> pobj --> NOUN
, --> punct --> PUNCT
and --> cc --> CCONJ
molecular --> amod --> ADJ
test --> conj --> NOUN
for --> prep --> ADP
the --> det --> DET
FRAXAmutation --> pobj --> NOUN
. --> punct --> PUNCT
Although --> mark --> SCONJ
the --> det --> DET
diagnosis --> nsubj --> NOUN
urements --> nsubj --> NOUN
during --> prep --> ADP
cardiac --> amod --> ADJ
surgery --> pobj --> NOUN
with --> prep --> ADP
CPB --> pobj --> PROPN
showed --> ROOT --> VERB
a --> det --> DET
high --> amod --> ADJ
inter -

same --> amod --> ADJ
girl --> nsubj --> NOUN
was --> ROOT --> AUX
re- --> advmod --> ADV
rangements --> acomp --> NOUN
that --> nsubj --> SCONJ
change --> compound --> NOUN
FNa --> dep --> PROPN
, --> punct --> PUNCT
such --> amod --> ADJ
as --> prep --> ADP
pericentric --> amod --> ADJ
inversions --> pobj --> NOUN
, --> punct --> PUNCT
tandem --> advcl --> VERB
ferred --> dobj --> ADJ
to --> prep --> ADP
our --> poss --> PRON
department --> pobj --> NOUN
because --> prep --> SCONJ
of --> pcomp --> ADP
mental --> amod --> ADJ
retardation --> pobj --> NOUN
and --> cc --> CCONJ
autistiform --> conj --> ADJ
fusions --> conj --> NOUN
and --> cc --> CCONJ
centromere --> amod --> ADJ
shifts --> conj --> NOUN
. --> punct --> PUNCT
Many --> amod --> ADJ
factors --> nsubjpass --> NOUN
like --> prep --> ADP
in --> prep --> ADP
LMNA --> compound --> PROPN
gene --> compound --> NOUN
mtDNA --> compound --> NOUN
background --> pobj --> NOUN
, --> punct --> PUNCT
heteroplasmy --> nmod --> PROPN
of -

I. --> compound --> PROPN
Huerta1 --> appos --> PROPN
, --> punct --> PUNCT
J. --> compound --> PROPN
Warsaw --> conj --> PROPN
, --> punct --> PUNCT
Poland --> appos --> PROPN
, --> punct --> PUNCT
2MRC --> compound --> NUM
Molecular --> compound --> PROPN
Haematology --> compound --> PROPN
Unit --> conj --> PROPN
, --> punct --> PUNCT
Weatherall --> compound --> PROPN
Institute --> appos --> PROPN
of --> prep --> ADP
RamÃ­rez1 --> pobj --> PROPN
, --> punct --> PUNCT
B. --> compound --> PROPN
Criado3 --> conj --> PROPN
, --> punct --> PUNCT
P. --> compound --> PROPN
Flores4 --> appos --> PROPN
, --> punct --> PUNCT
A. --> compound --> PROPN
GonzÃ¡lez5 --> conj --> PROPN
; --> punct --> PUNCT
Molecular --> compound --> PROPN
Medicine --> conj --> PROPN
, --> punct --> PUNCT
John --> compound --> PROPN
Radcliffe --> compound --> PROPN
Hospital --> conj --> PROPN
, --> punct --> PUNCT
Oxford --> conj --> PROPN
, --> punct --> PUNCT
United --> compound --> PROPN
Kingdom --> conj --> PROP

) --> punct --> PUNCT
T --> compound --> NOUN
cells --> conj --> NOUN
( --> punct --> PUNCT
59%).Since --> nummod --> NUM
no --> det --> DET
genetic --> amod --> ADJ
studies --> conj --> NOUN
have --> aux --> AUX
addressed --> ccomp --> VERB
this --> det --> DET
hypothesis --> dobj --> NOUN
, --> punct --> PUNCT
We --> nsubj --> PRON
further --> advmod --> ADV
evaluated --> ROOT --> VERB
the --> det --> DET
presence --> dobj --> NOUN
of --> prep --> ADP
autistic --> amod --> ADJ
traits --> pobj --> NOUN
in --> prep --> ADP
parents --> pobj --> NOUN
, --> punct --> PUNCT
using --> advcl --> VERB
appro- --> dobj --> PROPN
the --> det --> DET
current --> amod --> ADJ
study --> nsubj --> NOUN
aimed --> acl --> VERB
to --> aux --> PART
investigate --> xcomp --> VERB
whether --> mark --> SCONJ
polymorphisms --> nsubj --> NOUN
in --> prep --> ADP
GABRG2 --> compound --> PROPN
priate --> compound --> NOUN
questionnaires --> pobj --> NOUN
, --> punct --> PUNCT
and --> cc --> CCONJ
the --> det -

© --> punct --> PROPN
nÃ --> dep --> PROPN
© --> compound --> PROPN
tique --> compound --> NOUN
- --> punct --> PUNCT
Histologie --> appos --> PROPN
â --> prep --> ADP
BDR --> pobj --> PROPN
and --> cc --> CCONJ
CECOS --> conj --> PROPN
, --> punct --> PUNCT
CHU --> appos --> PROPN
, --> punct --> PUNCT
Besancon --> appos --> PROPN
, --> punct --> PUNCT
seen --> acl --> VERB
in --> prep --> ADP
ASD --> pobj --> PROPN
using --> acl --> VERB
animal --> compound --> NOUN
models --> dobj --> NOUN
, --> punct --> PUNCT
several --> amod --> ADJ
different --> amod --> ADJ
alterations --> nsubj --> NOUN
in --> prep --> ADP
pro- --> pobj --> PROPN
France --> pobj --> PROPN
, --> punct --> PUNCT
3Service --> compound --> PROPN
de --> nmod --> X
GÃ --> compound --> PROPN
© --> punct --> PROPN
nÃ --> dep --> PROPN
© --> compound --> PROPN
tique --> conj --> NOUN
MÃ --> nmod --> PROPN
© --> nmod --> PROPN
dicale --> amod --> ADJ
CHUV --> dobj --> PROPN
, --> punct --> PUNCT
Lausanne --> appos -->

has --> aux --> AUX
been --> auxpass --> AUX
implicated --> conj --> VERB
in --> prep --> ADP
the --> det --> DET
pathogenesis --> pobj --> NOUN
of --> prep --> ADP
se- --> pobj --> NOUN
involved --> acl --> VERB
in --> prep --> ADP
synaptic --> amod --> ADJ
plasticity --> pobj --> NOUN
, --> punct --> PUNCT
are --> auxpass --> AUX
mutated --> ccomp --> VERB
in --> prep --> ADP
a --> det --> DET
few --> amod --> ADJ
number --> pobj --> NOUN
of --> prep --> ADP
autistic --> amod --> ADJ
pa- --> nmod --> PROPN
veral --> amod --> ADJ
neurodegenerative --> amod --> ADJ
disorders --> pobj --> NOUN
, --> punct --> PUNCT
such --> amod --> ADJ
as --> prep --> ADP
Machado --> compound --> PROPN
- --> punct --> PUNCT
Joseph --> compound --> PROPN
disease --> pobj --> NOUN
( --> punct --> PUNCT
MJD --> appos --> PROPN
) --> punct --> PUNCT
, --> punct --> PUNCT
tients --> conj --> NOUN
, --> punct --> PUNCT
and --> cc --> CCONJ
are --> conj --> AUX
hemizygous --> acomp --> ADJ
in --> prep --> ADP

a --> det --> DET
likely --> amod --> ADJ
candidate --> attr --> NOUN
in --> prep --> ADP
autistic --> amod --> ADJ
dis- --> amod --> NOUN
ing --> compound --> NOUN
microarray --> pobj --> NOUN
comprehending --> acl --> VERB
9 --> nummod --> NUM
BBS --> compound --> PROPN
genes --> dobj --> NOUN
( --> punct --> PUNCT
BB --> compound --> PROPN
S1C1 --> compound --> PROPN
  --> dep --> SPACE
activating --> nsubjpass --> VERB
mutations --> dobj --> NOUN
have --> aux --> AUX
been --> auxpass --> AUX
associated --> ROOT --> VERB
with --> prep --> ADP
whole --> amod --> ADJ
genomes --> pobj --> NOUN
of --> prep --> ADP
a --> det --> DET
further --> amod --> ADJ
336 --> nummod --> NUM
individuals --> pobj --> NOUN
( --> punct --> PUNCT
including --> prep --> VERB
115 --> nummod --> NUM
megalencephaly --> pobj --> PROPN
and --> cc --> CCONJ
RHOA --> compound --> PROPN
mosaic --> nmod --> ADJ
postzygotic --> amod --> ADJ
inactivat- --> compound --> NOUN
patients --> conj --> NOUN
) --> punct --

macrocephaly/ --> dep --> NUM
autistism --> compound --> NOUN
association --> nsubjpass --> NOUN
has --> aux --> AUX
been --> auxpass --> AUX
recognised --> ROOT --> VERB
in --> prep --> ADP
recent --> amod --> ADJ
years --> pobj --> NOUN
, --> punct --> PUNCT
striction --> advcl --> ADJ
on --> prep --> ADP
the --> det --> DET
first --> amod --> ADJ
phalanx --> pobj --> NOUN
of --> prep --> ADP
the --> det --> DET
second --> amod --> ADJ
left --> amod --> ADJ
finger --> pobj --> NOUN
. --> punct --> PUNCT
Thus --> advmod --> ADV
, --> punct --> PUNCT
any --> det --> DET
mole- --> dep --> NOUN
and --> cc --> CCONJ
expansion --> conj --> NOUN
of --> prep --> ADP
a --> det --> DET
novel --> amod --> ADJ
ATTTC --> compound --> NOUN
pentamer --> pobj --> NOUN
within --> prep --> ADP
the --> det --> DET
cular --> amod --> ADJ
manipulation --> pobj --> NOUN
eliciting --> csubj --> VERB
an --> det --> DET
increase --> dobj --> NOUN
in --> prep --> ADP
CHD2 --> nmod --> PROPN
/ --> punct --> SY

de --> punct --> PROPN
gÃ --> dep --> SPACE
© --> punct --> PROPN
nÃ --> nmod --> PROPN
© --> punct --> PROPN
tique --> nmod --> PROPN
et --> nmod --> PROPN
Centre --> nmod --> PROPN
de --> nmod --> X
RÃ --> pobj --> PROPN
© --> npadvmod --> PROPN
fÃ --> punct --> PROPN
© --> oprd --> PROPN
rence --> nmod --> NOUN
Anomalies --> nmod --> NOUN
du --> nmod --> X
dÃ --> nmod --> PROPN
© --> nmod --> NUM
veloppement --> nmod --> NOUN
et --> prep --> X
Syndromes --> compound --> PROPN
spectrum --> compound --> NOUN
disorder --> pobj --> NOUN
without --> prep --> ADP
associated --> amod --> ADJ
dysmorphism --> pobj --> NOUN
or --> cc --> CCONJ
malformation.involves --> nummod --> NOUN
two --> nummod --> NUM
brothers --> conj --> NOUN
, --> punct --> PUNCT
one --> appos --> NUM
presenting --> acl --> VERB
autistic --> amod --> ADJ
spectrum --> dobj --> NOUN
Soysal --> oprd --> PROPN
: --> punct --> PUNCT
None.disorder --> dep --> PRON
and --> cc --> CCONJ
intellectual --> amod --> ADJ
disabili

( --> punct --> PUNCT
high --> amod --> ADJ
forehead --> appos --> NOUN
, --> punct --> PUNCT
sparse --> amod --> ADJ
eyebrows --> conj --> NOUN
, --> punct --> PUNCT
short --> amod --> ADJ
palpebral --> amod --> ADJ
fissures --> conj --> NOUN
, --> punct --> PUNCT
hy- --> amod --> ADJ
psychiatric --> amod --> ADJ
diseases --> conj --> NOUN
observed --> acl --> VERB
in --> prep --> ADP
the --> det --> DET
family --> pobj --> NOUN
. --> punct --> PUNCT
PDPR --> nsubj --> NOUN
has --> ccomp --> VERB
Altogether --> advmod --> ADV
, --> punct --> PUNCT
our --> poss --> PRON
results --> nsubj --> NOUN
indicate --> ROOT --> VERB
that --> mark --> SCONJ
rare --> amod --> ADJ
coding --> amod --> VERB
variants --> nsubjpass --> NOUN
in --> prep --> ADP
not --> neg --> PART
been --> auxpass --> AUX
associated --> ccomp --> VERB
with --> prep --> ADP
any --> det --> DET
disease;It --> pobj --> NOUN
is --> auxpass --> AUX
ought --> ccomp --> AUX
to --> prep --> ADP
beAbstractsfromthe51stEuropeanSo

, --> punct --> PUNCT
P. --> compound --> PROPN
Amouyel4,5,6 --> appos --> PROPN
, --> punct --> PUNCT
H. --> compound --> PROPN
Munter9 --> conj --> PROPN
, --> punct --> PUNCT
G. --> compound --> PROPN
Bourque9 --> appos --> PROPN
, --> punct --> PUNCT
M. --> compound --> PROPN
Lathrop9 --> conj --> PROPN
, --> punct --> PUNCT
T. --> compound --> PROPN
FrÃ --> appos --> PROPN
© --> punct --> PROPN
bourg10 --> appos --> NOUN
, --> punct --> PUNCT
R. --> compound --> PROPN
Redon11,12 --> appos --> PROPN
, --> punct --> PUNCT
1Baylor --> compound --> PROPN
College --> conj --> PROPN
of --> prep --> ADP
Medicine --> pobj --> PROPN
/ --> punct --> SYM
Jan --> conj --> PROPN
and --> cc --> CCONJ
Dan --> compound --> PROPN
Neurological --> compound --> PROPN
L. --> conj --> PROPN
Letenneur13,14 --> dobj --> PROPN
, --> punct --> PUNCT
J. --> compound --> PROPN
Dartigues13,14 --> dobj --> PROPN
, --> punct --> PUNCT
O. --> compound --> PROPN
Martinaud15 --> conj --> PROPN
, --> punct --> PUN

. --> punct --> PUNCT
To --> aux --> PART
investigate --> csubj --> VERB
stress --> compound --> NOUN
effects --> dobj --> NOUN
on --> prep --> ADP
phenotype --> pobj --> NOUN
included --> ccomp --> VERB
intellectual --> amod --> ADJ
disability --> dobj --> NOUN
, --> punct --> PUNCT
facial --> amod --> ADJ
dys- --> dep --> ADJ
autistic --> amod --> ADJ
phenotype --> compound --> NOUN
development --> appos --> NOUN
, --> punct --> PUNCT
we --> nsubj --> PRON
used --> ROOT --> VERB
a --> det --> DET
Tsc2 --> nummod --> NUM
hetero- --> dep --> NOUN
morphisms --> dobj --> NOUN
, --> punct --> PUNCT
microcephaly --> conj --> NOUN
and --> cc --> CCONJ
anomalies --> conj --> NOUN
of --> prep --> ADP
the --> det --> DET
hands --> pobj --> NOUN
as --> prep --> ADP
zygous --> amod --> ADJ
KO --> compound --> PROPN
mouse --> compound --> NOUN
model --> pobj --> NOUN
. --> punct --> PUNCT
To --> aux --> PART
investigate --> csubj --> VERB
stress --> compound --> NOUN
effects --> dobj --> NOUN
on 

and --> cc --> CCONJ
molecular --> amod --> ADJ
genetic --> amod --> ADJ
study --> appos --> NOUN
of --> prep --> ADP
the --> det --> DET
family --> pobj --> NOUN
with --> prep --> ADP
a --> det --> DET
male --> amod --> ADJ
child --> pobj --> NOUN
who --> nsubj --> PRON
had --> relcl --> VERB
mental --> amod --> ADJ
Introduction --> dobj --> NOUN
: --> punct --> PUNCT
The --> det --> DET
clinical- --> nmod --> ADJ
and --> cc --> CCONJ
genetic --> conj --> ADJ
heterogeneity --> conj --> NOUN
retardation --> appos --> NOUN
and --> cc --> CCONJ
autistic --> amod --> ADJ
features --> conj --> NOUN
revealed --> ROOT --> VERB
an --> det --> DET
abnormal --> nsubj --> ADJ
of --> prep --> ADP
neurocognitive --> amod --> ADJ
disorders --> pobj --> NOUN
( --> punct --> PUNCT
NCD --> appos --> PROPN
) --> punct --> PUNCT
poses --> ccomp --> VERB
a --> det --> DET
signiï¬cant --> compound --> NOUN
chromosome --> dobj --> NOUN
13 --> nummod --> NUM
bearing --> advcl --> VERB
an --> det --> DET
en

or --> cc --> CCONJ
mothers --> conj --> NOUN
and --> cc --> CCONJ
fathers.and --> cc --> CCONJ
assume --> conj --> VERB
the --> det --> DET
parental --> amod --> ADJ
gametes --> nsubj --> NOUN
present --> ccomp --> ADJ
at --> prep --> ADP
ascertainment --> pobj --> NOUN
are --> auxpass --> AUX
It --> nsubj --> PRON
seems --> ccomp --> VERB
to --> aux --> PART
exist --> xcomp --> VERB
a --> det --> DET
preferential --> amod --> ADJ
12/12 --> nummod --> NUM
genotype --> dobj --> NOUN
in --> prep --> ADP
autistic --> amod --> ADJ
individuals --> pobj --> NOUN
representative --> amod --> ADJ
of --> prep --> ADP
those --> pobj --> PRON
present --> amod --> ADJ
before --> prep --> ADP
fertilisation --> pobj --> NOUN
, --> punct --> PUNCT
each --> det --> DET
allele --> npadvmod --> NOUN
having --> advcl --> VERB
with --> prep --> ADP
mothers --> pobj --> NOUN
carrying --> acl --> VERB
the --> det --> DET
same --> amod --> ADJ
genotype --> dobj --> NOUN
. --> punct --> PUNCT
The --> det --> 

quite --> advmod --> ADV
common --> acomp --> ADJ
, --> punct --> PUNCT
but --> cc --> CCONJ
the --> det --> DET
additional --> amod --> ADJ
autistic --> amod --> ADJ
disorder --> nsubj --> NOUN
is --> conj --> AUX
4p --> attr --> NUM
and --> cc --> CCONJ
10q --> conj --> NOUN
. --> punct --> PUNCT
However --> advmod --> ADV
, --> punct --> PUNCT
different --> amod --> ADJ
chromosomal --> amod --> ADJ
disorders --> nsubj --> NOUN
a --> det --> DET
balanced --> amod --> ADJ
t(4;10)(p15.1 --> appos --> NOUN
; --> punct --> PUNCT
q --> det --> X
26.3 --> appos --> NUM
) --> punct --> PUNCT
and --> cc --> CCONJ
three --> nummod --> NUM
family --> compound --> NOUN
members --> nsubj --> NOUN
( --> punct --> PUNCT
two --> nummod --> NUM
account --> appos --> NOUN
for --> prep --> ADP
less --> amod --> ADJ
than --> quantmod --> ADP
5 --> nummod --> NUM
% --> pobj --> NOUN
of --> prep --> ADP
all --> det --> DET
cases --> pobj --> NOUN
of --> prep --> ADP
autism --> pobj --> NOUN
, --> punct -

Rassu2 --> appos --> PROPN
, --> punct --> PUNCT
L. --> compound --> PROPN
Boccone3 --> conj --> PROPN
, --> punct --> PUNCT
D. --> compound --> PROPN
Gasperini3 --> appos --> PROPN
, --> punct --> PUNCT
A. --> compound --> PROPN
Cao1 --> appos --> PROPN
, --> punct --> PUNCT
L. --> compound --> PROPN
Crisponi1 --> conj --> PROPN
; --> punct --> PUNCT
and --> cc --> CCONJ
post --> amod --> ADJ
- --> amod --> ADJ
meiotic --> amod --> ADJ
cells --> conj --> NOUN
from --> prep --> ADP
cryo --> compound --> NOUN
- --> punct --> PUNCT
sections --> pobj --> NOUN
of --> prep --> ADP
Rhesus --> compound --> PROPN
macaque --> pobj --> NOUN
testis --> npadvmod --> PROPN
1Istituto --> npadvmod --> PROPN
di --> ROOT --> PROPN
Neurogenetica --> compound --> PROPN
e --> compound --> PROPN
Neurofarmacologia --> compound --> PROPN
INN --> compound --> PROPN
- --> punct --> PUNCT
CNR --> nsubj --> PROPN
, --> punct --> PUNCT
Monserrato --> appos --> PROPN
( --> punct --> PUNCT
CA --> appos --> PROPN
) 

syndrome --> pobj --> NOUN
, --> punct --> PUNCT
Heller --> compound --> PROPN
syn- --> compound --> NOUN
anomaly --> appos --> NOUN
. --> punct --> PUNCT
Whole --> amod --> ADJ
exome --> compound --> NOUN
sequencing --> ROOT --> NOUN
of --> prep --> ADP
ABSTRACTS --> compound --> NOUN
POSTERS --> pobj --> NOUN
Back --> advmod --> ADV
to --> prep --> ADP
index --> compound --> NOUN
gDNA --> pobj --> NOUN
from --> prep --> ADP
the --> det --> DET
boy --> pobj --> NOUN
and --> cc --> CCONJ
his --> poss --> PRON
parents --> nsubj --> NOUN
identified --> conj --> VERB
a --> det --> DET
de --> nmod --> X
novo --> nmod --> PROPN
, --> punct --> PUNCT
novel --> amod --> ADJ
variant --> dobj --> NOUN
in --> prep --> ADP
250 --> nummod --> NUM
K --> compound --> PROPN
Nsp --> compound --> PROPN
chips --> pobj --> NOUN
, --> punct --> PUNCT
exome --> compound --> NOUN
capture --> dobj --> NOUN
using --> acl --> VERB
NimbleGen --> dobj --> PROPN
SeqCap --> compound --> PROPN
EZ --> compound --> P

895 --> appos --> NUM
from --> prep --> ADP
Germany --> pobj --> PROPN
, --> punct --> PUNCT
419 --> appos --> NUM
from --> prep --> ADP
Norway --> pobj --> PROPN
) --> punct --> PUNCT
and --> cc --> CCONJ
2700 --> nummod --> NUM
controls --> nsubj --> VERB
nical --> amod --> ADJ
assessment --> dobj --> NOUN
of --> prep --> ADP
all --> det --> DET
autistic --> amod --> ADJ
and --> cc --> CCONJ
cognitively --> advmod --> ADV
impaired --> conj --> ADJ
children --> pobj --> NOUN
, --> punct --> PUNCT
as --> advmod --> ADV
well --> amod --> ADV
( --> punct --> PUNCT
2366 --> nummod --> NUM
/ --> punct --> SYM
339).She --> appos --> NOUN
subsequently --> advmod --> ADV
showed --> conj --> VERB
nose --> dobj --> NOUN
, --> punct --> PUNCT
small --> amod --> ADJ
mouth --> appos --> NOUN
with --> prep --> ADP
high --> amod --> ADJ
arched --> amod --> ADJ
palate --> amod --> NOUN
, --> punct --> PUNCT
crowded --> amod --> ADJ
teeth --> pobj --> NOUN
, --> punct --> PUNCT
bilateral --> amod --> 

CACNA1A --> compound --> PROPN
gene --> nmod --> NOUN
tedsynaptic --> amod --> ADJ
dysfunction --> pobj --> NOUN
, --> punct --> PUNCT
as --> prep --> ADP
a --> det --> DET
key --> amod --> ADJ
mechanism --> pobj --> NOUN
in --> prep --> ADP
the --> det --> DET
patho- --> compound --> NOUN
NM_001127222.1 --> pobj --> NOUN
: --> punct --> PUNCT
c.2259_2260insCG;(p --> dep --> X
. --> punct --> PUNCT
A754Rfs*6)and --> cc --> CCONJ
physiology --> nsubj --> NOUN
of --> prep --> ADP
ASD.There --> pobj --> ADV
are --> ROOT --> AUX
an --> det --> DET
autistic --> amod --> ADJ
- --> punct --> PUNCT
like --> amod --> ADJ
Hospitalario --> compound --> PROPN
Universitario --> compound --> PROPN
de --> compound --> PROPN
Albacete --> attr --> PROPN
. --> punct --> PUNCT
, --> punct --> PUNCT
Albacete --> npadvmod --> PROPN
, --> punct --> PUNCT
Spain --> conj --> PROPN
, --> punct --> PUNCT
2NeurologÃ­a --> nummod --> NUM
manner --> conj --> NOUN
and --> cc --> CCONJ
phases --> conj --> NOUN
of --

feeding --> amod --> VERB
difï¬culties --> conj --> NOUN
, --> punct --> PUNCT
autistic --> amod --> ADJ
behavior --> conj --> NOUN
, --> punct --> PUNCT
hearing --> acl --> VERB
oraspartofasyndrome --> dobj --> PROPN
. --> punct --> PUNCT
FGFR2,FGFR3,FGFR1,TWIST1 --> ROOT --> NUM
impairment --> nmod --> NOUN
, --> punct --> PUNCT
short --> amod --> ADJ
stature --> nsubj --> NOUN
, --> punct --> PUNCT
microcephaly --> nmod --> NOUN
, --> punct --> PUNCT
and --> cc --> CCONJ
facial --> amod --> ADJ
dys- --> nmod --> NOUN
and --> cc --> CCONJ
EFNB --> conj --> PROPN
genes --> appos --> NOUN
are --> ROOT --> AUX
major --> amod --> ADJ
causative --> amod --> ADJ
genes --> attr --> NOUN
of --> prep --> ADP
genetic --> amod --> ADJ
morphisms --> pobj --> NOUN
[ --> punct --> PUNCT
Menke --> nmod --> PROPN
et --> nmod --> X
al --> dep --> PROPN
. --> appos --> PROPN
, --> punct --> PUNCT
2016 --> appos --> NUM
; --> punct --> PUNCT
Menke --> compound --> PROPN
et --> compound --> X
al --> np

In [131]:
pattern_1 = [{"TEXT": {"REGEX": "^[Aa]utistic$"}},
             {'DEP':'amod', 'OP':"?"},
             {'DEP':'amod', 'OP':"?"},
             {'DEP':'amod', 'OP':"?"},
             {"POS": "NOUN"}]

In [132]:
# Matcher class object 
matcher = Matcher(nlp.vocab) 
matcher.add("matching_1", [pattern_1]) 

autistic =[]

matches = matcher(person_doc) 
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = person_doc[start:end]  # The matched span
    autistic.append(span.text)

In [159]:
autistic_no_dups = list(set(autistic))
with open('..\\counts\\ESHG\\autism.csv', "w", encoding='ISO-8859-1') as outfile:
        write = csv.writer(outfile)
        for item in autistic_no_dups:
            write.writerow([item])



In [157]:
for item in autistic_no_dups:
    print(item)

autistic features
autistic behaviour.individuals
autistic boy
autistic mole-
autistic Basque sample
autistic traits
autistic behaviuor
autistic behaviour.delayandbehavioraldisorder/ASDtraits,associatedwith
autistic dis- ing
autistic regression
autistic patientâs
autistic phenotype
autistic variants
autistic Results
autistic trait
autistic features,358 aggressiveness
autistic behaviour
autistic behaviour.8,9
autistic subjects
autistic variome
autistic image
autistic disorders
autistic population
autistic gene
autistic spectrum
autistic syndrome
autistic ethnic populations
autistic girl
autistic traits.nal features
autistic behavior
autistic populations
autistic indi- variability
autistic patientsâ
autistic group
autistic microcephaly
autistic brain
autistic patients
autistic child-
autistic cases
autistic symptoms
autistic patient
autistic syndromes
autistic beha- disorder
autistic groups
autistic behaviour.1University
autistic unrelated patients
autistic features.feeding
autistic v

In [138]:
pattern_2 = [{"POS": "NOUN"},
             {'LOWER': 'with'},
             {'DEP':'amod', 'OP':"?"},
             {'DEP':'amod', 'OP':"?"},
             {'DEP':'amod', 'OP':"?"},
             {"TEXT": {"REGEX": "^[Aa]utism$"}}]

In [139]:
# Matcher class object 
matcher = Matcher(nlp.vocab) 
matcher.add("matching_1", [pattern_2]) 

person_with =[]
matches = matcher(person_doc) 
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = person_doc[start:end]  # The matched span
    person_with.append(span.text)

In [160]:
person_with_no_dups = list(set(person_with))
with open('..\\counts\\ESHG\\person_with.csv', "w", encoding='ISO-8859-1') as outfile:
        write = csv.writer(outfile)
        for item in person_with_no_dups:
            write.writerow([item])

