-
Notifications
You must be signed in to change notification settings - Fork 98
/
server.py
185 lines (145 loc) · 5.68 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
from pathlib import Path
import falcon
import spacy
import json
import os
from spacy.symbols import ENT_TYPE, TAG, DEP
import spacy.about
import spacy.util
from .parse import Parse, Entities, Sentences
MODELS = os.getenv("languages", "").split()
_models = {}
def get_model(model_name):
if model_name not in _models:
_models[model_name] = spacy.load(model_name)
return _models[model_name]
def get_dep_types(model):
'''List the available dep labels in the model.'''
labels = []
for label_id in model.parser.moves.freqs[DEP]:
labels.append(model.vocab.strings[label_id])
return labels
def get_ent_types(model):
'''List the available entity types in the model.'''
labels = []
for label_id in model.entity.moves.freqs[ENT_TYPE]:
labels.append(model.vocab.strings[label_id])
return labels
def get_pos_types(model):
'''List the available part-of-speech tags in the model.'''
labels = []
for label_id in model.tagger.moves.freqs[TAG]:
labels.append(model.vocab.strings[label_id])
return labels
class ModelsResource(object):
"""List the available models.
test with: curl -s localhost:8000/models
"""
def on_get(self, req, resp):
try:
output = list(MODELS)
resp.body = json.dumps(output, sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception:
resp.status = falcon.HTTP_500
class VersionResource(object):
"""Return the used spacy / api version
test with: curl -s localhost:8000/version
"""
def on_get(self, req, resp):
try:
resp.body = json.dumps({
"spacy": spacy.about.__version__
}, sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception:
resp.status = falcon.HTTP_500
class SchemaResource(object):
"""Describe the annotation scheme of a model.
This does not appear to work with later spacy
versions.
"""
def on_get(self, req, resp, model_name):
try:
model = get_model(model_name)
output = {
'dep_types': get_dep_types(model),
'ent_types': get_ent_types(model),
'pos_types': get_pos_types(model)
}
resp.body = json.dumps(output, sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception as e:
raise falcon.HTTPBadRequest(
'Schema construction failed',
'{}'.format(e))
class DepResource(object):
"""Parse text and return displacy's expected JSON output.
test with: curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles."}'
"""
def on_post(self, req, resp):
req_body = req.bounded_stream.read()
json_data = json.loads(req_body.decode('utf8'))
text = json_data.get('text')
model_name = json_data.get('model', 'en')
collapse_punctuation = json_data.get('collapse_punctuation', True)
collapse_phrases = json_data.get('collapse_phrases', True)
try:
model = get_model(model_name)
parse = Parse(model, text, collapse_punctuation, collapse_phrases)
resp.body = json.dumps(parse.to_json(), sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception as e:
raise falcon.HTTPBadRequest(
'Dependency parsing failed',
'{}'.format(e))
class EntResource(object):
"""Parse text and return displaCy ent's expected output."""
def on_post(self, req, resp):
req_body = req.bounded_stream.read()
json_data = json.loads(req_body.decode('utf8'))
text = json_data.get('text')
model_name = json_data.get('model', 'en')
try:
model = get_model(model_name)
entities = Entities(model, text)
resp.body = json.dumps(entities.to_json(), sort_keys=True,
indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception:
resp.status = falcon.HTTP_500
class SentsResources(object):
"""Returns sentences"""
def on_post(self, req, resp):
req_body = req.bounded_stream.read()
json_data = json.loads(req_body.decode('utf8'))
text = json_data.get('text')
model_name = json_data.get('model', 'en')
try:
model = get_model(model_name)
sentences = Sentences(model, text)
resp.body = json.dumps(sentences.to_json(), sort_keys=True,
indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
except Exception:
resp.status = falcon.HTTP_500
APP = falcon.API()
APP.add_route('/dep', DepResource())
APP.add_route('/ent', EntResource())
APP.add_route('/sents', SentsResources())
APP.add_route('/{model_name}/schema', SchemaResource())
APP.add_route('/models', ModelsResource())
APP.add_route('/version', VersionResource())