-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
__init__.py
1850 lines (1731 loc) · 80.7 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import unicode_literals
import logging
from ..transport import Transport
from ..exceptions import TransportError
from ..compat import string_types, urlparse, unquote
from .indices import IndicesClient
from .ingest import IngestClient
from .cluster import ClusterClient
from .cat import CatClient
from .nodes import NodesClient
from .remote import RemoteClient
from .snapshot import SnapshotClient
from .tasks import TasksClient
from .xpack import XPackClient
from .utils import query_params, _make_path, SKIP_IN_PATH
logger = logging.getLogger("elasticsearch")
def _normalize_hosts(hosts):
"""
Helper function to transform hosts argument to
:class:`~elasticsearch.Elasticsearch` to a list of dicts.
"""
# if hosts are empty, just defer to defaults down the line
if hosts is None:
return [{}]
# passed in just one string
if isinstance(hosts, string_types):
hosts = [hosts]
out = []
# normalize hosts to dicts
for host in hosts:
if isinstance(host, string_types):
if "://" not in host:
host = "//%s" % host
parsed_url = urlparse(host)
h = {"host": parsed_url.hostname}
if parsed_url.port:
h["port"] = parsed_url.port
if parsed_url.scheme == "https":
h["port"] = parsed_url.port or 443
h["use_ssl"] = True
if parsed_url.username or parsed_url.password:
h["http_auth"] = "%s:%s" % (
unquote(parsed_url.username),
unquote(parsed_url.password),
)
if parsed_url.path and parsed_url.path != "/":
h["url_prefix"] = parsed_url.path
out.append(h)
else:
out.append(host)
return out
class Elasticsearch(object):
"""
Elasticsearch low-level client. Provides a straightforward mapping from
Python to ES REST endpoints.
The instance has attributes ``cat``, ``cluster``, ``indices``, ``ingest``,
``nodes``, ``snapshot`` and ``tasks`` that provide access to instances of
:class:`~elasticsearch.client.CatClient`,
:class:`~elasticsearch.client.ClusterClient`,
:class:`~elasticsearch.client.IndicesClient`,
:class:`~elasticsearch.client.IngestClient`,
:class:`~elasticsearch.client.NodesClient`,
:class:`~elasticsearch.client.SnapshotClient` and
:class:`~elasticsearch.client.TasksClient` respectively. This is the
preferred (and only supported) way to get access to those classes and their
methods.
You can specify your own connection class which should be used by providing
the ``connection_class`` parameter::
# create connection to localhost using the ThriftConnection
es = Elasticsearch(connection_class=ThriftConnection)
If you want to turn on :ref:`sniffing` you have several options (described
in :class:`~elasticsearch.Transport`)::
# create connection that will automatically inspect the cluster to get
# the list of active nodes. Start with nodes running on 'esnode1' and
# 'esnode2'
es = Elasticsearch(
['esnode1', 'esnode2'],
# sniff before doing anything
sniff_on_start=True,
# refresh nodes after a node fails to respond
sniff_on_connection_fail=True,
# and also every 60 seconds
sniffer_timeout=60
)
Different hosts can have different parameters, use a dictionary per node to
specify those::
# connect to localhost directly and another node using SSL on port 443
# and an url_prefix. Note that ``port`` needs to be an int.
es = Elasticsearch([
{'host': 'localhost'},
{'host': 'othernode', 'port': 443, 'url_prefix': 'es', 'use_ssl': True},
])
If using SSL, there are several parameters that control how we deal with
certificates (see :class:`~elasticsearch.Urllib3HttpConnection` for
detailed description of the options)::
es = Elasticsearch(
['localhost:443', 'other_host:443'],
# turn on SSL
use_ssl=True,
# make sure we verify SSL certificates
verify_certs=True,
# provide a path to CA certs on disk
ca_certs='/path/to/CA_certs'
)
If using SSL, but don't verify the certs, a warning message is showed
optionally (see :class:`~elasticsearch.Urllib3HttpConnection` for
detailed description of the options)::
es = Elasticsearch(
['localhost:443', 'other_host:443'],
# turn on SSL
use_ssl=True,
# no verify SSL certificates
verify_certs=False,
# don't show warnings about ssl certs verification
ssl_show_warn=False
)
SSL client authentication is supported
(see :class:`~elasticsearch.Urllib3HttpConnection` for
detailed description of the options)::
es = Elasticsearch(
['localhost:443', 'other_host:443'],
# turn on SSL
use_ssl=True,
# make sure we verify SSL certificates
verify_certs=True,
# provide a path to CA certs on disk
ca_certs='/path/to/CA_certs',
# PEM formatted SSL client certificate
client_cert='/path/to/clientcert.pem',
# PEM formatted SSL client key
client_key='/path/to/clientkey.pem'
)
Alternatively you can use RFC-1738 formatted URLs, as long as they are not
in conflict with other options::
es = Elasticsearch(
[
'http://user:secret@localhost:9200/',
'https://user:secret@other_host:443/production'
],
verify_certs=True
)
By default, `JSONSerializer
<https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/serializer.py#L24>`_
is used to encode all outgoing requests.
However, you can implement your own custom serializer::
from elasticsearch.serializer import JSONSerializer
class SetEncoder(JSONSerializer):
def default(self, obj):
if isinstance(obj, set):
return list(obj)
if isinstance(obj, Something):
return 'CustomSomethingRepresentation'
return JSONSerializer.default(self, obj)
es = Elasticsearch(serializer=SetEncoder())
"""
def __init__(self, hosts=None, transport_class=Transport, **kwargs):
"""
:arg hosts: list of nodes we should connect to. Node should be a
dictionary ({"host": "localhost", "port": 9200}), the entire dictionary
will be passed to the :class:`~elasticsearch.Connection` class as
kwargs, or a string in the format of ``host[:port]`` which will be
translated to a dictionary automatically. If no value is given the
:class:`~elasticsearch.Urllib3HttpConnection` class defaults will be used.
:arg transport_class: :class:`~elasticsearch.Transport` subclass to use.
:arg kwargs: any additional arguments will be passed on to the
:class:`~elasticsearch.Transport` class and, subsequently, to the
:class:`~elasticsearch.Connection` instances.
"""
self.transport = transport_class(_normalize_hosts(hosts), **kwargs)
# namespaced clients for compatibility with API names
self.indices = IndicesClient(self)
self.ingest = IngestClient(self)
self.cluster = ClusterClient(self)
self.cat = CatClient(self)
self.nodes = NodesClient(self)
self.remote = RemoteClient(self)
self.snapshot = SnapshotClient(self)
self.tasks = TasksClient(self)
self.xpack = XPackClient(self)
def __repr__(self):
try:
# get a list of all connections
cons = self.transport.hosts
# truncate to 5 if there are too many
if len(cons) > 5:
cons = cons[:5] + ["..."]
return "<{cls}({cons})>".format(cls=self.__class__.__name__, cons=cons)
except:
# probably operating on custom transport and connection_pool, ignore
return super(Elasticsearch, self).__repr__()
def _bulk_body(self, body):
# if not passed in a string, serialize items and join by newline
if not isinstance(body, string_types):
body = "\n".join(map(self.transport.serializer.dumps, body))
# bulk body must end with a newline
if not body.endswith("\n"):
body += "\n"
return body
@query_params()
def ping(self, params=None):
"""
Returns True if the cluster is up, False otherwise.
`<http://www.elastic.co/guide/>`_
"""
try:
return self.transport.perform_request("HEAD", "/", params=params)
except TransportError:
return False
@query_params()
def info(self, params=None):
"""
Get the basic info from the current cluster.
`<http://www.elastic.co/guide/>`_
"""
return self.transport.perform_request("GET", "/", params=params)
@query_params(
"parent",
"pipeline",
"refresh",
"routing",
"timeout",
"timestamp",
"ttl",
"version",
"version_type",
"wait_for_active_shards",
)
def create(self, index, id, body, doc_type="_doc", params=None):
"""
Adds a typed JSON document in a specific index, making it searchable.
Behind the scenes this method calls index(..., op_type='create')
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html>`_
:arg index: The name of the index
:arg id: Document ID
:arg body: The document
:arg parent: ID of the parent document
:arg pipeline: The pipeline id to preprocess incoming documents with
:arg refresh: If `true` then refresh the affected shards to make this
operation visible to search, if `wait_for` then wait for a refresh
to make this operation visible to search, if `false` (the default)
then do nothing with refreshes., valid choices are: 'true', 'false',
'wait_for'
:arg routing: Specific routing value
:arg timeout: Explicit operation timeout
:arg timestamp: Explicit timestamp for the document
:arg ttl: Expiration time for the document
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
:arg wait_for_active_shards: Sets the number of shard copies that must
be active before proceeding with the index operation. Defaults to 1,
meaning the primary shard only. Set to `all` for all shard copies,
otherwise set to any non-negative value less than or equal to the
total number of copies for the shard (number of replicas + 1)
"""
for param in (index, id, body):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"PUT", _make_path(index, doc_type, id, "_create"), params=params, body=body
)
@query_params(
"if_seq_no",
"if_primary_term",
"op_type",
"parent",
"pipeline",
"refresh",
"routing",
"timeout",
"timestamp",
"ttl",
"version",
"version_type",
"wait_for_active_shards",
)
def index(self, index, body, doc_type="_doc", id=None, params=None):
"""
Adds or updates a typed JSON document in a specific index, making it searchable.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html>`_
:arg index: The name of the index
:arg body: The document
:arg id: Document ID
:arg if_primary_term: only perform the index operation if the last
operation that has changed the document has the specified primary
term
:arg if_seq_no: only perform the index operation if the last operation
that has changed the document has the specified sequence number
:arg doc_type: Document type, defaults to `_doc`. Not used on ES 7 clusters.
:arg op_type: Explicit operation type, default 'index', valid choices
are: 'index', 'create'
:arg parent: ID of the parent document
:arg pipeline: The pipeline id to preprocess incoming documents with
:arg refresh: If `true` then refresh the affected shards to make this
operation visible to search, if `wait_for` then wait for a refresh
to make this operation visible to search, if `false` (the default)
then do nothing with refreshes., valid choices are: 'true', 'false',
'wait_for'
:arg routing: Specific routing value
:arg timeout: Explicit operation timeout
:arg timestamp: Explicit timestamp for the document
:arg ttl: Expiration time for the document
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
:arg wait_for_active_shards: Sets the number of shard copies that must
be active before proceeding with the index operation. Defaults to 1,
meaning the primary shard only. Set to `all` for all shard copies,
otherwise set to any non-negative value less than or equal to the
total number of copies for the shard (number of replicas + 1)
"""
for param in (index, body):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"POST", _make_path(index, doc_type, id), params=params, body=body
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"parent",
"preference",
"realtime",
"refresh",
"routing",
"stored_fields",
"version",
"version_type",
)
def exists(self, index, id, doc_type="_doc", params=None):
"""
Returns a boolean indicating whether or not given document exists in Elasticsearch.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_
:arg index: The name of the index
:arg id: The document ID
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg parent: The ID of the parent document
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg realtime: Specify whether to perform the operation in realtime or
search mode
:arg refresh: Refresh the shard containing the document before
performing the operation
:arg routing: Specific routing value
:arg stored_fields: A comma-separated list of stored fields to return in
the response
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
"""
for param in (index, id):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"HEAD", _make_path(index, doc_type, id), params=params
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"parent",
"preference",
"realtime",
"refresh",
"routing",
"version",
"version_type",
)
def exists_source(self, index, id, doc_type=None, params=None):
"""
`<http://www.elastic.co/guide/en/elasticsearch/reference/master/docs-get.html>`_
:arg index: The name of the index
:arg id: The document ID
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg parent: The ID of the parent document
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg realtime: Specify whether to perform the operation in realtime or
search mode
:arg refresh: Refresh the shard containing the document before
performing the operation
:arg routing: Specific routing value
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
"""
for param in (index, id):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"HEAD", _make_path(index, doc_type, id, "_source"), params=params
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"parent",
"preference",
"realtime",
"refresh",
"routing",
"stored_fields",
"version",
"version_type",
)
def get(self, index, id, doc_type="_doc", params=None):
"""
Get a typed JSON document from the index based on its id.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_
:arg index: The name of the index
:arg id: The document ID
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg parent: The ID of the parent document
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg realtime: Specify whether to perform the operation in realtime or
search mode
:arg refresh: Refresh the shard containing the document before
performing the operation
:arg routing: Specific routing value
:arg stored_fields: A comma-separated list of stored fields to return in
the response
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
"""
for param in (index, id):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"GET", _make_path(index, doc_type, id), params=params
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"parent",
"preference",
"realtime",
"refresh",
"routing",
"version",
"version_type",
)
def get_source(self, index, id, doc_type="_doc", params=None):
"""
Get the source of a document by it's index, type and id.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_
:arg index: The name of the index
:arg id: The document ID
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg parent: The ID of the parent document
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg realtime: Specify whether to perform the operation in realtime or
search mode
:arg refresh: Refresh the shard containing the document before
performing the operation
:arg routing: Specific routing value
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'external', 'external_gte', 'force'
"""
for param in (index, id):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"GET", _make_path(index, doc_type, id, "_source"), params=params
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"preference",
"realtime",
"refresh",
"routing",
"stored_fields",
)
def mget(self, body, doc_type=None, index=None, params=None):
"""
Get multiple documents based on an index, type (optional) and ids.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html>`_
:arg body: Document identifiers; can be either `docs` (containing full
document information) or `ids` (when index and type is provided in
the URL.
:arg index: The name of the index
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg realtime: Specify whether to perform the operation in realtime or
search mode
:arg refresh: Refresh the shard containing the document before
performing the operation
:arg routing: Specific routing value
:arg stored_fields: A comma-separated list of stored fields to return in
the response
"""
if body in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument 'body'.")
return self.transport.perform_request(
"GET", _make_path(index, doc_type, "_mget"), params=params, body=body
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"fields",
"if_seq_no",
"if_primary_term",
"lang",
"parent",
"refresh",
"retry_on_conflict",
"routing",
"timeout",
"timestamp",
"ttl",
"version",
"version_type",
"wait_for_active_shards",
)
def update(self, index, id, doc_type="_doc", body=None, params=None):
"""
Update a document based on a script or partial data provided.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update.html>`_
:arg index: The name of the index
:arg id: Document ID
:arg body: The request definition using either `script` or partial `doc`
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg fields: A comma-separated list of fields to return in the response
:arg if_seq_no:
:arg if_primary_term:
:arg lang: The script language (default: painless)
:arg parent: ID of the parent document. Is is only used for routing and
when for the upsert request
:arg refresh: If `true` then refresh the effected shards to make this
operation visible to search, if `wait_for` then wait for a refresh
to make this operation visible to search, if `false` (the default)
then do nothing with refreshes., valid choices are: 'true', 'false',
'wait_for'
:arg retry_on_conflict: Specify how many times should the operation be
retried when a conflict occurs (default: 0)
:arg routing: Specific routing value
:arg timeout: Explicit operation timeout
:arg timestamp: Explicit timestamp for the document
:arg ttl: Expiration time for the document
:arg version: Explicit version number for concurrency control
:arg version_type: Specific version type, valid choices are: 'internal',
'force'
:arg wait_for_active_shards: Sets the number of shard copies that must
be active before proceeding with the update operation. Defaults to
1, meaning the primary shard only. Set to `all` for all shard
copies, otherwise set to any non-negative value less than or equal
to the total number of copies for the shard (number of replicas + 1)
"""
for param in (index, id):
if param in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument.")
return self.transport.perform_request(
"POST", _make_path(index, doc_type, id, "_update"), params=params, body=body
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"allow_no_indices",
"allow_partial_search_results",
"analyze_wildcard",
"analyzer",
"batched_reduce_size",
"ccs_minimize_roundtrips",
"default_operator",
"df",
"docvalue_fields",
"expand_wildcards",
"explain",
"from_",
"ignore_throttled",
"ignore_unavailable",
"lenient",
"max_concurrent_shard_requests",
"pre_filter_shard_size",
"preference",
"q",
"rest_total_hits_as_int",
"request_cache",
"routing",
"scroll",
"search_type",
"seq_no_primary_term",
"size",
"sort",
"stats",
"stored_fields",
"suggest_field",
"suggest_mode",
"suggest_size",
"suggest_text",
"terminate_after",
"timeout",
"track_scores",
"track_total_hits",
"typed_keys",
"version",
)
def search(self, index=None, body=None, params=None):
"""
Execute a search query and get back search hits that match the query.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html>`_
:arg index: A list of index names to search, or a string containing a
comma-separated list of index names to search; use `_all`
or empty string to perform the operation on all indices
:arg body: The search definition using the Query DSL
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg allow_no_indices: Whether to ignore if a wildcard indices
expression resolves into no concrete indices. (This includes `_all`
string or when no indices have been specified)
:arg allow_partial_search_results: Set to false to return an overall
failure if the request would produce partial results. Defaults to
True, which will allow partial results in the case of timeouts or
partial failures
:arg analyze_wildcard: Specify whether wildcard and prefix queries
should be analyzed (default: false)
:arg analyzer: The analyzer to use for the query string
:arg batched_reduce_size: The number of shard results that should be
reduced at once on the coordinating node. This value should be used
as a protection mechanism to reduce the memory overhead per search
request if the potential number of shards in the request can be
large., default 512
:arg ccs_minimize_roundtrips: Indicates whether network round-trips
should be minimized as part of cross-cluster search requests
execution, default 'true'
:arg default_operator: The default operator for query string query (AND
or OR), default 'OR', valid choices are: 'AND', 'OR'
:arg df: The field to use as default where no field prefix is given in
the query string
:arg docvalue_fields: A comma-separated list of fields to return as the
docvalue representation of a field for each hit
:arg expand_wildcards: Whether to expand wildcard expression to concrete
indices that are open, closed or both., default 'open', valid
choices are: 'open', 'closed', 'none', 'all'
:arg explain: Specify whether to return detailed information about score
computation as part of a hit
:arg from\\_: Starting offset (default: 0)
:arg ignore_unavailable: Whether specified concrete indices should be
ignored when unavailable (missing or closed)
:arg lenient: Specify whether format-based query failures (such as
providing text to a numeric field) should be ignored
:arg max_concurrent_shard_requests: The number of concurrent shard
requests this search executes concurrently. This value should be
used to limit the impact of the search on the cluster in order to
limit the number of concurrent shard requests, default 'The default
grows with the number of nodes in the cluster but is at most 256.'
:arg pre_filter_shard_size: A threshold that enforces a pre-filter
roundtrip to prefilter search shards based on query rewriting if
the number of shards the search request expands to exceeds the
threshold. This filter roundtrip can limit the number of shards
significantly if for instance a shard can not match any documents
based on it's rewrite method ie. if date filters are mandatory to
match but the shard bounds and the query are disjoint., default 128
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg q: Query in the Lucene query string syntax
:arg rest_total_hits_as_int: This parameter is used to restore the total hits as a number
in the response. This param is added version 6.x to handle mixed cluster queries where nodes
are in multiple versions (7.0 and 6.latest)
:arg request_cache: Specify if request cache should be used for this
request or not, defaults to index level setting
:arg routing: A comma-separated list of specific routing values
:arg scroll: Specify how long a consistent view of the index should be
maintained for scrolled search
:arg search_type: Search operation type, valid choices are:
'query_then_fetch', 'dfs_query_then_fetch'
:arg size: Number of hits to return (default: 10)
:arg sort: A comma-separated list of <field>:<direction> pairs
:arg stats: Specific 'tag' of the request for logging and statistical
purposes
:arg stored_fields: A comma-separated list of stored fields to return as
part of a hit
:arg suggest_field: Specify which field to use for suggestions
:arg suggest_mode: Specify suggest mode, default 'missing', valid
choices are: 'missing', 'popular', 'always'
:arg suggest_size: How many suggestions to return in response
:arg suggest_text: The source text for which the suggestions should be
returned
:arg terminate_after: The maximum number of documents to collect for
each shard, upon reaching which the query execution will terminate
early.
:arg timeout: Explicit operation timeout
:arg track_scores: Whether to calculate and return scores even if they
are not used for sorting
:arg track_total_hits: Indicate if the number of documents that match
the query should be tracked
:arg typed_keys: Specify whether aggregation and suggester names should
be prefixed by their respective types in the response
:arg version: Specify whether to return document version as part of a
hit
"""
# from is a reserved word so it cannot be used, use from_ instead
if "from_" in params:
params["from"] = params.pop("from_")
if not index:
index = "_all"
return self.transport.perform_request(
"GET", _make_path(index, "_search"), params=params, body=body
)
@query_params(
"_source",
"_source_exclude",
"_source_excludes",
"_source_include",
"_source_includes",
"allow_no_indices",
"analyze_wildcard",
"analyzer",
"conflicts",
"default_operator",
"df",
"expand_wildcards",
"from_",
"ignore_unavailable",
"lenient",
"pipeline",
"preference",
"q",
"refresh",
"request_cache",
"requests_per_second",
"routing",
"scroll",
"scroll_size",
"search_timeout",
"search_type",
"size",
"slices",
"sort",
"stats",
"terminate_after",
"timeout",
"version",
"version_type",
"wait_for_active_shards",
"wait_for_completion",
)
def update_by_query(self, index, body=None, params=None):
"""
Perform an update on all documents matching a query.
`<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html>`_
:arg index: A list of index names to search, or a string containing a
comma-separated list of index names to search; use `_all` or the
empty string to perform the operation on all indices
:arg body: The search definition using the Query DSL
:arg _source: True or false to return the _source field or not, or a
list of fields to return
:arg _source_exclude: A list of fields to exclude from the returned
_source field
:arg _source_include: A list of fields to extract and return from the
_source field
:arg allow_no_indices: Whether to ignore if a wildcard indices
expression resolves into no concrete indices. (This includes `_all`
string or when no indices have been specified)
:arg analyze_wildcard: Specify whether wildcard and prefix queries
should be analyzed (default: false)
:arg analyzer: The analyzer to use for the query string
:arg conflicts: What to do when the update by query hits version
conflicts?, default 'abort', valid choices are: 'abort', 'proceed'
:arg default_operator: The default operator for query string query (AND
or OR), default 'OR', valid choices are: 'AND', 'OR'
:arg df: The field to use as default where no field prefix is given in
the query string
:arg expand_wildcards: Whether to expand wildcard expression to concrete
indices that are open, closed or both., default 'open', valid
choices are: 'open', 'closed', 'none', 'all'
:arg from\\_: Starting offset (default: 0)
:arg ignore_unavailable: Whether specified concrete indices should be
ignored when unavailable (missing or closed)
:arg lenient: Specify whether format-based query failures (such as
providing text to a numeric field) should be ignored
:arg pipeline: Ingest pipeline to set on index requests made by this
action. (default: none)
:arg preference: Specify the node or shard the operation should be
performed on (default: random)
:arg q: Query in the Lucene query string syntax
:arg refresh: Should the effected indexes be refreshed?
:arg request_cache: Specify if request cache should be used for this
request or not, defaults to index level setting
:arg requests_per_second: The throttle to set on this request in sub-
requests per second. -1 means no throttle., default 0
:arg routing: A comma-separated list of specific routing values
:arg scroll: Specify how long a consistent view of the index should be
maintained for scrolled search
:arg scroll_size: Size on the scroll request powering the
update_by_query
:arg search_timeout: Explicit timeout for each search request. Defaults
to no timeout.
:arg search_type: Search operation type, valid choices are:
'query_then_fetch', 'dfs_query_then_fetch'
:arg size: Number of hits to return (default: 10)
:arg slices: The number of slices this task should be divided into.
Defaults to 1 meaning the task isn't sliced into subtasks., default
1
:arg sort: A comma-separated list of <field>:<direction> pairs
:arg stats: Specific 'tag' of the request for logging and statistical
purposes
:arg terminate_after: The maximum number of documents to collect for
each shard, upon reaching which the query execution will terminate
early.
:arg timeout: Time each individual bulk request should wait for shards
that are unavailable., default '1m'
:arg version: Specify whether to return document version as part of a
hit
:arg version_type: Should the document increment the version number
(internal) on hit or not (reindex)
:arg wait_for_active_shards: Sets the number of shard copies that must
be active before proceeding with the update by query operation.
Defaults to 1, meaning the primary shard only. Set to `all` for all
shard copies, otherwise set to any non-negative value less than or
equal to the total number of copies for the shard (number of
replicas + 1)
:arg wait_for_completion: Should the request should block until the
update by query operation is complete., default True
"""
if index in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument 'index'.")
return self.transport.perform_request(
"POST", _make_path(index, "_update_by_query"), params=params, body=body
)
@query_params("requests_per_second")
def update_by_query_rethrottle(self, task_id, params=None):
"""
`<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html>`_
:arg task_id: The task id to rethrottle
:arg requests_per_second: The throttle to set on this request in
floating sub-requests per second. -1 means set no throttle.
"""
if task_id in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument 'task_id'.")
return self.transport.perform_request(
"POST",
_make_path("_update_by_query", task_id, "_rethrottle"),
params=params,
)
@query_params(
"refresh",
"requests_per_second",
"slices",
"scroll",
"timeout",
"wait_for_active_shards",
"wait_for_completion",
)
def reindex(self, body, params=None):
"""
Reindex all documents from one index to another.
`<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html>`_
:arg body: The search definition using the Query DSL and the prototype
for the index request.
:arg refresh: Should the effected indexes be refreshed?
:arg requests_per_second: The throttle to set on this request in sub-
requests per second. -1 means no throttle., default 0
:arg slices: The number of slices this task should be divided into.
Defaults to 1 meaning the task isn't sliced into subtasks., default
1
:arg scroll: Control how long to keep the search context alive, default
'5m'
:arg timeout: Time each individual bulk request should wait for shards
that are unavailable., default '1m'
:arg wait_for_active_shards: Sets the number of shard copies that must
be active before proceeding with the reindex operation. Defaults to
1, meaning the primary shard only. Set to `all` for all shard
copies, otherwise set to any non-negative value less than or equal
to the total number of copies for the shard (number of replicas + 1)
:arg wait_for_completion: Should the request should block until the
reindex is complete., default True
"""
if body in SKIP_IN_PATH:
raise ValueError("Empty value passed for a required argument 'body'.")
return self.transport.perform_request(
"POST", "/_reindex", params=params, body=body
)