Permalink
Browse files

bumped version to 0.5.4 in setup.py, added bin/esdump with argparse s…

…upport
  • Loading branch information...
eriky committed Jan 30, 2013
1 parent e8236dd commit ba7a5053e080aa72727734bfbb1f1e3250e35a51
Showing with 55 additions and 11 deletions.
  1. +0 −1 .gitignore
  2. +43 −0 bin/esdump
  3. +10 −9 esclient.py
  4. +2 −1 setup.py
View
@@ -12,7 +12,6 @@ dist
build
eggs
parts
-bin
var
sdist
develop-eggs
View
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+import esclient
+import json
+import argparse
+
+parser = argparse.ArgumentParser(description="Dump one or more ElasticSearch" +
+" indexes to stdout. This tool will dump all the _source fields. If you chose"+
+" not to store the _source field, you can not make backups of your index(es)"+
+" with this tool.")
+
+parser.add_argument('--url', '-u', required=True, help="The full URL to the ElasticSearch server, including port")
+parser.add_argument('--file', '-f', required=True, type=argparse.FileType('wb', 8192), default="-", help="The output file to dump to. By default esdump will dump to stdout.")
+parser.add_argument('--indexes', '-i', nargs='+', help="One or more index names to dump, may also be aliases. If none specified, ALL indexes are dumped.")
+arguments = parser.parse_args()
+
+if not arguments.indexes:
+ print "You did not specify an index with the -i/--indexes option. Dumping ALL indexes."
+ indexes = ['_all']
+else:
+ indexes = arguments.indexes
+
+es = esclient.ESClient(arguments.url)
+print type(arguments.indexes)
+query_body = { "query": { "match_all": {} } }
+
+scroll_id = es.scan(query_body = query_body, indexes = indexes)
+
+while True:
+ scrollres = es.scroll(scroll_id)
+ # get next scroll_id
+ scroll_id = scrollres["_scroll_id"]
+
+ hits = scrollres["hits"]["hits"]
+
+ num_results = 0
+ for hit in scrollres["hits"]["hits"]:
+ print json.dumps(hit["_source"])
+ num_results += 1
+
+ # See if we reached the end of the data
+ if num_results == 0:
+ break
View
@@ -124,20 +124,18 @@ def send_request(self, method, path, body=None, query_string_args={},encode_json
def _search_operation(self, request_type, query_body=None,
operation_type="_search", query_string_args=None,
indexes=["_all"], doctypes=[]):
- """Perform a search operation. This method can be used for search,
- delete by search and count.
+ """Perform a search operation. This method can be used for search and
+ counting by using the operation types:
+ _search, _count
+
+ Note that you can also count with more options by using ElasticSearch's
+ search_type=count, which is not yet implemented in ESClient
Searching in ElasticSearch can be done in two ways:
1) with a query string, by providing query_args
- 2) using a full query body (JSON) by providing
- the query_body.
- You can choose one, but not both at the same time.
+ 2) using a full query body (JSON) by providing the query_body
"""
- #if query_body and query_string_args:
- # raise ESClientException("Found both a query body and query" +
- # "arguments")
-
indexes = ','.join(indexes)
doctypes = ','.join(doctypes)
@@ -195,6 +193,9 @@ def search(self, query_body=None, query_string_args=None,
You can choose one, but not both at the same time.
"""
+ if query_body and query_string_args:
+ raise ESClientException("Both query_body and query_string_args" +
+ "provided, please use only on at a time")
return self._search_operation('GET', query_body=query_body,
query_string_args=query_string_args, indexes=indexes,
doctypes=doctypes)
View
@@ -6,7 +6,7 @@
long_description = file.read()
setup(name='ESClient',
- version="0.5.3",
+ version="0.5.4",
description='A lightweight Python client for ElasticSearch',
author='Erik-Jan van Baaren',
author_email='erikjan@gmail.com',
@@ -15,6 +15,7 @@
license='New BSD license',
keywords = ["elasticsearch"],
install_requires = ['requests >= 0.10.0'],
+ scripts = ['bin/esdump'],
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Web Environment',

0 comments on commit ba7a505

Please sign in to comment.