In [10]:
from stanfordnlp.server import CoreNLPClient

# example text
print('---')
print('input text')
print('')

text = "Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people."

print(text)

# set up the client
print('---')
print('starting up Java Stanford CoreNLP Server...')

# set up the client
client = CoreNLPClient(endpoint='http://140.109.19.190:9000', start_server=False, annotators=['tokenize','ssplit','pos','lemma','ner', 'parse', 'depparse','coref'])
    

---
input text

Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people.
---
starting up Java Stanford CoreNLP Server...


In [11]:
# submit the request to the server
ann = client.annotate(text)

# get the first sentence
sentence = ann.sentence[0]

In [12]:
# get the constituency parse of the first sentence
print('---')
print('constituency parse of first sentence')
constituency_parse = sentence.parseTree
print(constituency_parse)

---
constituency parse of first sentence
child {
  child {
    child {
      child {
        value: "Chris"
      }
      value: "NNP"
      score: -9.281864166259766
    }
    child {
      child {
        value: "Manning"
      }
      value: "NNP"
      score: -11.83283805847168
    }
    value: "NP"
    score: -23.60647964477539
  }
  child {
    child {
      child {
        value: "is"
      }
      value: "VBZ"
      score: -0.14657023549079895
    }
    child {
      child {
        child {
          value: "a"
        }
        value: "DT"
        score: -1.4235451221466064
      }
      child {
        child {
          value: "nice"
        }
        value: "JJ"
        score: -8.306026458740234
      }
      child {
        child {
          value: "person"
        }
        value: "NN"
        score: -7.468829154968262
      }
      value: "NP"
      score: -19.973678588867188
    }
    value: "VP"
    score: -25.309616088867188
  }
  child {
    child {
      value: "."
 

In [13]:
# get the first subtree of the constituency parse
print('---')
print('first subtree of constituency parse')
print(constituency_parse.child[0])

---
first subtree of constituency parse
child {
  child {
    child {
      value: "Chris"
    }
    value: "NNP"
    score: -9.281864166259766
  }
  child {
    child {
      value: "Manning"
    }
    value: "NNP"
    score: -11.83283805847168
  }
  value: "NP"
  score: -23.60647964477539
}
child {
  child {
    child {
      value: "is"
    }
    value: "VBZ"
    score: -0.14657023549079895
  }
  child {
    child {
      child {
        value: "a"
      }
      value: "DT"
      score: -1.4235451221466064
    }
    child {
      child {
        value: "nice"
      }
      value: "JJ"
      score: -8.306026458740234
    }
    child {
      child {
        value: "person"
      }
      value: "NN"
      score: -7.468829154968262
    }
    value: "NP"
    score: -19.973678588867188
  }
  value: "VP"
  score: -25.309616088867188
}
child {
  child {
    value: "."
  }
  value: "."
  score: -0.003481106134131551
}
value: "S"
score: -50.052059173583984



In [14]:
# get the value of the first subtree
print('---')
print('value of first subtree of constituency parse')
print(constituency_parse.child[0].value)

---
value of first subtree of constituency parse
S


In [15]:
# get the dependency parse of the first sentence
print('---')
print('dependency parse of first sentence')
dependency_parse = sentence.basicDependencies
print(dependency_parse)

---
dependency parse of first sentence
node {
  sentenceIndex: 0
  index: 1
}
node {
  sentenceIndex: 0
  index: 2
}
node {
  sentenceIndex: 0
  index: 3
}
node {
  sentenceIndex: 0
  index: 4
}
node {
  sentenceIndex: 0
  index: 5
}
node {
  sentenceIndex: 0
  index: 6
}
node {
  sentenceIndex: 0
  index: 7
}
edge {
  source: 2
  target: 1
  dep: "compound"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 6
  target: 2
  dep: "nsubj"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 6
  target: 3
  dep: "cop"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 6
  target: 4
  dep: "det"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 6
  target: 5
  dep: "amod"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 6
  target: 7
  dep: "punct"
  isExtra: fal

In [17]:
# get the first token of the first sentence
print('---')
print('first token of first sentence')
token = sentence.token[0]
print(token)

---
first token of first sentence
word: "Chris"
pos: "NNP"
value: "Chris"
before: ""
after: " "
originalText: "Chris"
ner: "PERSON"
lemma: "Chris"
beginChar: 0
endChar: 5
utterance: 0
speaker: "PER0"
beginIndex: 0
endIndex: 1
tokenBeginIndex: 0
tokenEndIndex: 1
hasXmlContext: false
polarity {
  projectEquivalence: EQUIVALENCE
  projectForwardEntailment: FORWARD_ENTAILMENT
  projectReverseEntailment: REVERSE_ENTAILMENT
  projectNegation: NEGATION
  projectAlternation: ALTERNATION
  projectCover: COVER
  projectIndependence: INDEPENDENCE
}
isNewline: false
polarity_dir: "up"
coarseNER: "PERSON"
fineGrainedNER: "PERSON"
corefMentionIndex: 0
entityMentionIndex: 0



In [16]:
# get the part-of-speech tag
print('---')
print('part of speech tag of token')
token.pos
print(token.pos)

---
part of speech tag of token
NNP


In [9]:
# get the named entity tag
print('---')
print('named entity tag of token')
print(token.ner)

---
named entity tag of token


NameError: name 'token' is not defined

In [4]:
# get an entity mention from the first sentence
print('---')
print('first entity mention in sentence')
print(sentence.mentions[0])

# access the coref chain
print('---')
print('coref chains for the example')
print(ann.corefChain)

---
constituency parse of first sentence
child {
  child {
    child {
      child {
        value: "Chris"
      }
      value: "NNP"
      score: -9.281864166259766
    }
    child {
      child {
        value: "Manning"
      }
      value: "NNP"
      score: -11.83283805847168
    }
    value: "NP"
    score: -23.60647964477539
  }
  child {
    child {
      child {
        value: "is"
      }
      value: "VBZ"
      score: -0.14657023549079895
    }
    child {
      child {
        child {
          value: "a"
        }
        value: "DT"
        score: -1.4235451221466064
      }
      child {
        child {
          value: "nice"
        }
        value: "JJ"
        score: -8.306026458740234
      }
      child {
        child {
          value: "person"
        }
        value: "NN"
        score: -7.468829154968262
      }
      value: "NP"
      score: -19.973678588867188
    }
    value: "VP"
    score: -25.309616088867188
  }
  child {
    child {
      value: "."
 

## tokensregex
```
matches = client.tokensregex(text, pattern)

```

- matches format

```json
{'sentences': [{'length': 0},
               {'0': {'1': {'begin': 0, 'end': 1, 'text': 'Chris'},
                      'begin': 0,
                      'end': 5,
                      'text': 'Chris wrote a simple sentence'},
                'length': 1},
               {'length': 0}]}
 ```
 
 - total sentences to be matched in text
 `len(matches["sentences"]) == 3`
 - length tells you whether or not there are any matches in this
 `matches["sentences"][1]["length"] == 1`

In [5]:
pprint(matches)

{'sentences': [{'length': 0},
               {'0': {'1': {'begin': 0, 'end': 1, 'text': 'Chris'},
                      'begin': 0,
                      'end': 5,
                      'text': 'Chris wrote a simple sentence'},
                'length': 1},
               {'length': 0}]}


In [6]:
pprint(matches['sentences'])

[{'length': 0},
 {'0': {'1': {'begin': 0, 'end': 1, 'text': 'Chris'},
        'begin': 0,
        'end': 5,
        'text': 'Chris wrote a simple sentence'},
  'length': 1},
 {'length': 0}]


In [8]:
from pprint import pprint

# 3 sentences
text = "Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people."

# Use tokensregex patterns to find who wrote a sentence.
pattern = '([ner: PERSON]+) /wrote/ /an?/ []{0,3} /sentence|article/'
matches = client.tokensregex(text, pattern)
pprint(matches["sentences"])
# sentences contains a list with matches for each sentence.
assert len(matches["sentences"]) == 3
# length tells you whether or not there are any matches in this
assert matches["sentences"][1]["length"] == 1
# You can access matches like most regex groups.
matches["sentences"][1]["0"]["text"] == "Chris wrote a simple sentence"  # ["0"] stands for the 0th match (starting from 0)
matches["sentences"][1]["0"]["1"]["text"] == "Chris"  # ["0"]["1"] stands for the first matching group in the 0th match (starting from 1)

[{'length': 0},
 {'0': {'1': {'begin': 0, 'end': 1, 'text': 'Chris'},
        'begin': 0,
        'end': 5,
        'text': 'Chris wrote a simple sentence'},
  'length': 1},
 {'length': 0}]


True

## semgrex

In [6]:
# Use semgrex patterns to directly find who wrote what.
pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object'
matches = client.semgrex(text, pattern)
# sentences contains a list with matches for each sentence.
assert len(matches["sentences"]) == 3
# length tells you whether or not there are any matches in this
assert matches["sentences"][1]["length"] == 1
# You can access matches like most regex groups.
matches["sentences"][1]["0"]["text"] == "wrote"
matches["sentences"][1]["0"]["$subject"]["text"] == "Chris"
matches["sentences"][1]["0"]["$object"]["text"] == "sentence"

[{'length': 0},
 {'0': {'1': {'begin': 0, 'end': 1, 'text': 'Chris'},
        'begin': 0,
        'end': 5,
        'text': 'Chris wrote a simple sentence'},
  'length': 1},
 {'length': 0}]


True

In [None]:
client.close()