-
Notifications
You must be signed in to change notification settings - Fork 800
/
parent_child.py
202 lines (164 loc) · 6.21 KB
/
parent_child.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""
Complex data model example modeling stackoverflow-like data.
It is used to showcase several key features of elasticsearch-dsl:
* Object and Nested fields: see User and Comment classes and fields they
are used in
* method add_comment is used to add comments
* Parent/Child relationship
* See the Join field on Post creating the relationship between Question
and Answer
* Meta.matches allows the hits from same index to be wrapped in proper
classes
* to see how child objects are created see Question.add_answer
* Question.search_answers shows how to query for children of a
particular parent
"""
from datetime import datetime
from elasticsearch_dsl import Document, Date, Text, Keyword, Boolean, InnerDoc, \
Nested, Object, Join, Long, connections
class User(InnerDoc):
"""
Class used to represent a denormalized user stored on other objects.
"""
id = Long(required=True)
signed_up = Date()
username = Text(fields={'keyword': Keyword()}, required=True)
email = Text(fields={'keyword': Keyword()})
location = Text(fields={'keyword': Keyword()})
class Comment(InnerDoc):
"""
Class wrapper for nested comment objects.
"""
author = Object(User, required=True)
created = Date(required=True)
content = Text(required=True)
class Post(Document):
"""
Base class for Question and Answer containing the common fields.
"""
author = Object(User, required=True)
created = Date(required=True)
body = Text(required=True)
comments = Nested(Comment)
question_answer = Join(relations={'question': 'answer'})
@classmethod
def _matches(cls, hit):
# Post is an abstract class, make sure it never gets used for
# deserialization
return False
class Index:
name = 'test-qa-site'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0,
}
def add_comment(self, user, content, created=None, commit=True):
c = Comment(
author=user,
content=content,
created=created or datetime.now()
)
self.comments.append(c)
if commit:
self.save()
return c
def save(self, **kwargs):
# if there is no date, use now
if self.created is None:
self.created = datetime.now()
return super(Post, self).save(**kwargs)
class Question(Post):
# use multi True so that .tags will return empty list if not present
tags = Keyword(multi=True)
title = Text(fields={'keyword': Keyword()})
@classmethod
def _matches(cls, hit):
" Use Question class for parent documents "
return hit['_source']['question_answer'] == 'question'
@classmethod
def search(cls, **kwargs):
return cls._index.search(**kwargs).filter('term', question_answer='question')
def add_answer(self, user, body, created=None, accepted=False, commit=True):
answer = Answer(
# required make sure the answer is stored in the same shard
_routing=self.meta.id,
# since we don't have explicit index, ensure same index as self
_index=self.meta.index,
# set up the parent/child mapping
question_answer={'name': 'answer', 'parent': self.meta.id},
# pass in the field values
author=user,
created=created,
body=body,
accepted=accepted
)
if commit:
answer.save()
return answer
def search_answers(self):
# search only our index
s = Answer.search()
# filter for answers belonging to us
s = s.filter('parent_id', type="answer", id=self.meta.id)
# add routing to only go to specific shard
s = s.params(routing=self.meta.id)
return s
def get_answers(self):
"""
Get answers either from inner_hits already present or by searching
elasticsearch.
"""
if 'inner_hits' in self.meta and 'answer' in self.meta.inner_hits:
return self.meta.inner_hits.answer.hits
return list(self.search_answers())
def save(self, **kwargs):
self.question_answer = 'question'
return super(Question, self).save(**kwargs)
class Answer(Post):
is_accepted = Boolean()
@classmethod
def _matches(cls, hit):
" Use Answer class for child documents with child name 'answer' "
return isinstance(hit['_source']['question_answer'], dict) \
and hit['_source']['question_answer'].get('name') == 'answer'
@classmethod
def search(cls, **kwargs):
return cls._index.search(**kwargs).exclude('term', question_answer='question')
@property
def question(self):
# cache question in self.meta
# any attributes set on self would be interpretted as fields
if 'question' not in self.meta:
self.meta.question = Question.get(
id=self.question_answer.parent, index=self.meta.index)
return self.meta.question
def save(self, **kwargs):
# set routing to parents id automatically
self.meta.routing = self.question_answer.parent
return super(Answer, self).save(**kwargs)
def setup():
" Create an IndexTemplate and save it into elasticsearch. "
index_template = Post._index.as_template('base')
index_template.save()
if __name__ == '__main__':
# initiate the default connection to elasticsearch
connections.create_connection()
# create index
setup()
# user objects to use
nick = User(id=47, signed_up=datetime(2017, 4, 3), username='fxdgear',
email='nick.lang@elastic.co', location='Colorado')
honza = User(id=42, signed_up=datetime(2013, 4, 3), username='honzakral',
email='honza@elastic.co', location='Prague')
# create a question object
question = Question(
_id=1,
author=nick,
tags=['elasticsearch', 'python'],
title='How do I use elasticsearch from Python?',
body='''
I want to use elasticsearch, how do I do it from Python?
''',
)
question.save()
answer = question.add_answer(honza, "Just use `elasticsearch-py`!")