Skip to content

Commit

Permalink
Two new parameters: ELASTICSEARCH_INDEX_DATE_KEY (time-based indices …
Browse files Browse the repository at this point in the history
…from scraped string data) and ELASTICSEARCH_INDEX_DATE_KEY_FORMAT (time-based indices from scraped datetime data). Parameter ELASTICSEARCH_SERVERS no more required (default: localhost:9200).
  • Loading branch information
Dataninja committed Oct 27, 2016
1 parent 46f7c7a commit e834082
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions scrapyelasticsearch/scrapyelasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def validate_setting(setting_key):
if settings[setting_key] is None:
raise InvalidSettingsException('%s is not defined in settings.py' % setting_key)

required_settings = {'ELASTICSEARCH_SERVERS', 'ELASTICSEARCH_INDEX', 'ELASTICSEARCH_TYPE'}
required_settings = {'ELASTICSEARCH_INDEX', 'ELASTICSEARCH_TYPE'}

for required_setting in required_settings:
validate_setting(required_setting)
Expand All @@ -53,7 +53,7 @@ def from_crawler(cls, crawler):

cls.validate_settings(ext.settings)

es_servers = ext.settings['ELASTICSEARCH_SERVERS']
es_servers = ext.settings.get('ELASTICSEARCH_SERVERS', 'localhost:9200')
es_servers = es_servers if isinstance(es_servers, list) else [es_servers]

authType = ext.settings['ELASTICSEARCH_AUTH']
Expand All @@ -79,9 +79,17 @@ def index_item(self, item):

index_name = self.settings['ELASTICSEARCH_INDEX']
index_suffix_format = self.settings.get('ELASTICSEARCH_INDEX_DATE_FORMAT', None)
index_suffix_key = self.settings.get('ELASTICSEARCH_INDEX_DATE_KEY', None)
index_suffix_key_format = self.settings.get('ELASTICSEARCH_INDEX_DATE_KEY_FORMAT', None)

if index_suffix_format:
index_name += "-" + datetime.strftime(datetime.now(),index_suffix_format)
if index_suffix_key and index_suffix_key_format:
dt = datetime.strptime(item[index_suffix_key], index_suffix_key_format)
else:
dt = datetime.now()
index_name += "-" + datetime.strftime(dt,index_suffix_format)
elif index_suffix_key:
index_name += "-" + index_suffix_key

index_action = {
'_index': index_name,
Expand Down

0 comments on commit e834082

Please sign in to comment.