Skip to content

Commit

Permalink
Adjust to new ES schema ~ parallel commit to ahmia-index #a091eb0
Browse files Browse the repository at this point in the history
* Use the two separate indices, each for tor crawls and i2p crawls
* Upgrade to 7.x that would also support 8.x & 9.x will be easily performed by changing `doc` to `_doc` and removing the ELASTICSEARCH_TYPE attribute from all the projects
  • Loading branch information
chamalis committed Jun 10, 2018
1 parent dcf5e85 commit 576b94f
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 15 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -10,8 +10,8 @@ in Finland. This repository contains ahmia.fi source code.

# Compatibility

The newest version of Ahmia is built with Python 3.6, Django 1.11 and
Elasticsearch 5. Python 2.7+ should be ok too, but preferably try Python 3 instead.
The newest version of Ahmia is built with Python 3.6, Django 1.11 and Elasticsearch 6.2 (5.6 is also compatible).
Python 2.7+ should be ok too, but preferably try Python 3 instead.
You will need to know these technologies to create a working Ahmia installation.
Ahmia crawls using [OnionBot](https://github.com/ahmia/ahmia-crawler).

Expand Down
7 changes: 5 additions & 2 deletions ahmia/ahmia/settings/base.py
Expand Up @@ -26,8 +26,11 @@ def my_path(*x):
default="8C:DC:67:EA:C3:B3:97:94:92:30:81:35:8C:C6:D9:2A:E2:E6:8E:3E")
# 'https://ahmia.fi/esconnection/'
ELASTICSEARCH_SERVERS = config('ELASTICSEARCH_SERVERS', default='http://localhost:9200')
ELASTICSEARCH_INDEX = config('ELASTICSEARCH_INDEX', default='latest-crawl')
ELASTICSEARCH_TYPE = config('ELASTICSEARCH_TYPE', default='tor')
# BOTH-INDEX exists in case we want to look into both to onion and i2p addresses ~ currently unused
# ELASTICSEARCH_BOTH_INDEX = config('ELASTICSEARCH_BOTH_INDEX', default='latest-crawl')
ELASTICSEARCH_TOR_INDEX = config('ELASTICSEARCH_TOR_INDEX', default='latest-tor')
ELASTICSEARCH_I2P_INDEX = config('ELASTICSEARCH_I2P_INDEX', default='latest-i2p')
ELASTICSEARCH_TYPE = config('ELASTICSEARCH_TYPE', default='doc') # todo change/rm when ES 7.x

# Email settings
EMAIL_USE_TLS = config('EMAIL_USE_TLS', cast=bool, default=True)
Expand Down
7 changes: 5 additions & 2 deletions ahmia/ahmia/settings/example.env
Expand Up @@ -12,8 +12,11 @@ DB_PASS=well_I_am_not_pushing_it_to_git
# ELASTICSEARCH STUFF
ELASTICSEARCH_TLS_FPRINT=8C:DC:67:EA:C3:B3:97:94:92:30:81:35:8C:C6:D9:2A:E2:E6:8E:3E
ELASTICSEARCH_SERVERS=http://localhost:9200
ELASTICSEARCH_INDEX=latest-crawl
ELASTICSEARCH_TYPE=tor
# BOTH-INDEX serves cases when we want to look into both to onion and i2p addresses
ELASTICSEARCH_BOTH_INDEX=latest-crawl
ELASTICSEARCH_TOR_INDEX=latest-tor
ELASTICSEARCH_I2P_INDEX=latest-i2p
ELASTICSEARCH_TYPE=doc

# Email settings
EMAIL_USE_TLS=True
Expand Down
14 changes: 12 additions & 2 deletions ahmia/ahmia/utils.py
Expand Up @@ -23,9 +23,19 @@ def get_elasticsearch_object():
return es_obj


def get_elasticsearch_index():
def get_elasticsearch_both_index():
""" Getting the name of the index """
return settings.ELASTICSEARCH_INDEX
return settings.ELASTICSEARCH_BOTH_INDEX


def get_elasticsearch_tor_index():
""" Getting the name of the index """
return settings.ELASTICSEARCH_TOR_INDEX


def get_elasticsearch_i2p_index():
""" Getting the name of the index """
return settings.ELASTICSEARCH_I2P_INDEX


def get_elasticsearch_type():
Expand Down
4 changes: 2 additions & 2 deletions ahmia/ahmia/validators.py
Expand Up @@ -13,8 +13,8 @@ def validate_status(value):
"""Test if an onion domain is not banned."""

res = get_elasticsearch_object().count(
index=settings.ELASTICSEARCH_INDEX,
doc_type=settings.ELASTICSEARCH_INDEX,
index=settings.ELASTICSEARCH_TOR_INDEX,
doc_type=settings.ELASTICSEARCH_TYPE,
body={
"query": {
"constant_score": {
Expand Down
6 changes: 3 additions & 3 deletions ahmia/ahmia/views.py
Expand Up @@ -127,7 +127,7 @@ class BlacklistView(FormView):

def get_es_context(self, **kwargs):
return {
"index": utils.get_elasticsearch_index(),
"index": utils.get_elasticsearch_tor_index(),
"doc_type": utils.get_elasticsearch_type(),
"size": 0,
"body": {
Expand Down Expand Up @@ -215,7 +215,7 @@ def get_context_data(self, **kwargs):

def get_es_context(self, **kwargs):
return {
"index": utils.get_elasticsearch_index(),
"index": utils.get_elasticsearch_tor_index(),
"doc_type": utils.get_elasticsearch_type(),
"size": 0,
"body": {
Expand Down Expand Up @@ -252,7 +252,7 @@ def format_hits(self, hits):

def get_es_context(self, **kwargs):
return {
"index": utils.get_elasticsearch_index(),
"index": utils.get_elasticsearch_tor_index(),
"doc_type": utils.get_elasticsearch_type(),
"size": 0,
"body": {
Expand Down
5 changes: 3 additions & 2 deletions ahmia/search/views.py
Expand Up @@ -12,6 +12,7 @@

from ahmia import utils
from ahmia.models import SearchResultsClicks
from ahmia.utils import get_elasticsearch_i2p_index
from ahmia.views import ElasticsearchBaseListView

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -58,7 +59,7 @@ class TorResultsView(ElasticsearchBaseListView):
def get_es_context(self, **kwargs):
query = kwargs['q']
return {
"index": utils.get_elasticsearch_index(),
"index": utils.get_elasticsearch_tor_index(),
"doc_type": utils.get_elasticsearch_type(),
"body": {
"query": {
Expand Down Expand Up @@ -205,5 +206,5 @@ class IipResultsView(TorResultsView):

def get_es_context(self, **kwargs):
context = super(IipResultsView, self).get_es_context(**kwargs)
context['doc_type'] = "i2p"
context['index'] = get_elasticsearch_i2p_index()
return context

0 comments on commit 576b94f

Please sign in to comment.