In [42]:
import requests
import json
import pandas as pd
from numpy import unique

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
#!pip install python-greeklish --user
from greeklish.converter import Converter

In [138]:
INDEX_NAME = "nefnef_es"

In [188]:
### Helpter functions

# Convert greek to gleeklish
def query_to_gleeklish(query):
    conv = Converter(max_expansions=1)
    query = conv.convert(query.lower())[0]
    return query

# connect to elasticsearch
def connect_elasticsearch():
    _es = None
    _es = Elasticsearch()
    if _es.ping():
        print('- Connected')
    else:
        print('- Cannot connect to ElasticSearch')
    return _es


In [2]:
jsonUrl = "https://nef-nef.gr/feeds/products-json.ashx"
r = requests.get(jsonUrl)
res = json.loads(r.text)

In [3]:
attributes = [el.pop('attributes') for el in res]

In [4]:
df = pd.DataFrame(res)

df['category'] = df['category_path'].str.split(' > ').apply(lambda x: x[0])
df['category'] = df['category'].replace({'SPECIAL OFFERS':'Προσφορές'})

In [5]:
Categories = df['category'].unique()
Categories = unique([x.split(' ')[0] for x in Categories])

In [6]:
conv = Converter(max_expansions=1)
df['meta_keywords_eng'] = [conv.convert(el)[0] for el in df['meta_keywords'].str.lower()]
df['name_eng'] = [conv.convert(el)[0] for el in df['name'].str.lower()]
df['description_small_eng'] = [conv.convert(el)[0] for el in df['description_small'].str.lower()]

In [11]:
products = df[['code','manufacturer', 'meta_keywords', 'meta_keywords_eng', 
                        'name', 'name_eng', 'description_small', 'description_small_eng',
                        'category', 'site_url', ]].to_dict('records')

In [15]:
%%capture
# Create an Elasticsearch object
es = connect_elasticsearch()

In [20]:
## delete the index
if INDEX_NAME in es.indices.get_alias().keys():
    es.indices.delete(index=INDEX_NAME, ignore=[400, 404])

In [17]:
# the index creation in ES is similar to the DB building in RDBMS
# a database named spitishop_testing is created 
# http://localhost:9200/spitishop_testing
def create_index(es_object, index_name=None):
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html
    settings = {
                "settings": {
                    "index" : {
                        "analysis" : {
                            "analyzer" : {
                                "my_analyzer" : {
                                    "tokenizer" : "standard",
                                    "filter" : ["my_stop", "synonym"]
                                }
                            },
                            "filter" : {
                                "my_stop": {
                                    "type" : "stop",
                                    "stopwords": ["to", "kai", "thn", "einai", "an", "sas", 
                                                  "olo", "apo", "sth", "sto", "me"]
                                },
                                "synonym" : {
                                    "type" : "synonym",
                                    "lenient": True,
                                    "synonyms" : ["ypnodomatio, bedroom => krebatokamara"]
                                }
                            }
                        }
                    }
                },
            }
    
    try:
        #if not es_object.indices.exists(index_name):
        # Ignore 400 means to ignore "Index Already Exist" error.
        es_object.indices.create(index=index_name, ignore=400, **settings)
        print('Created Index')
    except Exception as ex:
        print('Error: ', str(ex))

In [18]:
create_index(es, INDEX_NAME)

Created Index


In [19]:
%%time

# add products to the index
for i, product in enumerate(products[0:]):
    es.index(index=INDEX_NAME,
             id=i,
             document=product)

CPU times: user 1.09 s, sys: 47 ms, total: 1.14 s
Wall time: 5.36 s


## Search

In [145]:
SEARCH_TERM = "ΠΑΠΛΩΜΑ"

In [259]:
def es_query(es_object, index_name, query_term, fz = "AUTO:3,5"):
    
    query_term = query_to_gleeklish(query_term)
    
    s = (Search(using=es_object, index=index_name)
     
     .query("bool",
        should=[Q("match",name_eng={"query": query_term, 
                                    "fuzziness": fz, "analyzer":"my_analyzer", "boost":3}), 
                Q("match",meta_keywords_eng={"query": query_term, 
                                    "fuzziness": fz, "analyzer":"my_analyzer", "boost":2}),
                Q("match",name_eng={"query": query_term, 
                                    "fuzziness": fz, "analyzer":"my_analyzer", "boost":1})
               ]
       )
     
     .extra(size=50)
         
     .source(includes=["name", "code", "meta_keywords"])
         
    )
    
    return s

In [260]:
s = es_query(es_object = es, index_name = INDEX_NAME, query_term = SEARCH_TERM)

In [261]:
s.to_dict()

{'query': {'bool': {'should': [{'match': {'name_eng': {'query': 'paplwma',
       'fuzziness': 'AUTO:3,5',
       'analyzer': 'my_analyzer',
       'boost': 3}}},
    {'match': {'meta_keywords_eng': {'query': 'paplwma',
       'fuzziness': 'AUTO:3,5',
       'analyzer': 'my_analyzer',
       'boost': 2}}},
    {'match': {'name_eng': {'query': 'paplwma',
       'fuzziness': 'AUTO:3,5',
       'analyzer': 'my_analyzer',
       'boost': 1}}}]}},
 'size': 50,
 '_source': {'includes': ['name', 'code', 'meta_keywords']}}

In [262]:
res = s.execute()

In [263]:
res.hits.total.value

37

In [264]:
s = es_query(es_object = es, index_name = INDEX_NAME, query_term = SEARCH_TERM, fz=10)

In [265]:
res = s.execute()

In [266]:
res.hits.total.value

37

In [269]:
res.hits[0]

<Hit(nefnef_es/0): {'code': '003507', 'name': 'ΠΑΠΛΩΜΑ ΜΟΝΟ MICROFIBER', 'meta_...}>

In [None]:
res[0].meta.score
res[0].name_eng
res[0].name

In [45]:
def run_search(query, category = None, returned_results=10):
    
    conv = Converter(max_expansions=1)
    query = conv.convert(query.lower())[0]
    
    print(query)
    
    if category:
        pass
    else:
        results = es.search(index=INDEX_NAME,size=returned_results, **{

                  "query": {
                    "bool": {
                        
                        
                      "should": [
                          
                        { "match": { 
                            "name_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",  
                              "boost": 3
                        }}},
                        { "match": { 
                            "meta_keywords_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",                             
                              "boost": 5
                        }}},
                        { "match": { 
                            "description_small_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",                            
                              "boost": 1
                        }}},
                          
                      ]
                    
                    
                    }
                  }                                                                   
                },                  
                                   )
        return results

In [169]:
run_search('Παιδική')['hits']['hits']

paidikh


[{'_index': 'nefnef_es',
  '_type': '_doc',
  '_id': '863',
  '_score': 37.44996,
  '_source': {'code': '029380',
   'manufacturer': 'NEF NEF HOMEWARE',
   'meta_keywords': 'ΠΑΙΔΙΚΗ, ΚΟΥΒΕΡΤΑ, ΜΟΝΗ, FOOT',
   'meta_keywords_eng': 'paidikh, kouberta, monh, foot',
   'name': 'ΠΑΙΔΙΚΗ ΚΟΥΒΕΡΤΑ ΜΟΝΗ FOOT ',
   'name_eng': 'paidikh kouberta monh foot ',
   'description_small': 'Παιδική Κουβέρτα μονή, διάστασης 160x220cm.<br />Είναι ιδιαίτερα απαλή, κατασκευασμένη από flannel πολυεστέρα βάρους 285gr/m<sup>2</sup>.<br />',
   'description_small_eng': 'paidikh kouberta monh, diastashs 160x220cm.<br />einai idiaitera apalh, kataskeuasmenh apo flannel polyestera barous 285gr/m<sup>2</sup>.<br />',
   'category': 'Παιδικά',
   'site_url': 'https://nef-nef.gr/gr/prdid/7ab3de7415484b13bafc9f091503c037/productinfo.aspx'}},
 {'_index': 'nefnef_es',
  '_type': '_doc',
  '_id': '557',
  '_score': 36.715103,
  '_ignored': ['description_small.keyword', 'description_small_eng.keyword'],
  '_source': {'cod

In [None]:
 results = es.search(index='nefnef_es',size=returned_results, **{

                  "query": {
                    "bool": {
                        
                        
                      "should": [
                          
                        { "match": { 
                            "name_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",  
                              "boost": 3
                        }}},
                        { "match": { 
                            "meta_keywords_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",                             
                              "boost": 5
                        }}},
                        { "match": { 
                            "description_small_eng":  {
                              "query": query,
                              "fuzziness": "AUTO:3,5",
                              "analyzer": "my_analyzer",                            
                              "boost": 1
                        }}},
                          
                      ]
                    
                    
                    }
                  }                                                                   
                },                  
                                   )
        return results

In [121]:
s.query("bool", 
        should=[Q("match",name_eng=query("ΠΑΠΛΩΜΑ"),fuzziness= "AUTO:3,5", analyzer = "my_analyzer", boost=3), 
                Q("match",meta_keywords_eng=query("ΠΑΠΛΩΜΑ"),fuzziness= "AUTO:3,5", analyzer = "my_analyzer", boost=2),
                Q("match",description_small_eng=query("ΠΑΠΛΩΜΑ"),fuzziness= "AUTO:3,5", analyzer = "my_analyzer", boost=1)
               ]
       ).to_dict()

{'query': {'bool': {'should': [{'match': {'name_eng': 'paplwma',
      'fuzziness': 'AUTO:3,5',
      'analyzer': 'my_analyzer',
      'boost': 3}},
    {'match': {'meta_keywords_eng': 'paplwma',
      'fuzziness': 'AUTO:3,5',
      'analyzer': 'my_analyzer',
      'boost': 2}},
    {'match': {'description_small_eng': 'paplwma',
      'fuzziness': 'AUTO:3,5',
      'analyzer': 'my_analyzer',
      'boost': 1}}]}}}

In [110]:
s.query(
  Q(
    "match",
    name_eng=query("ΠΑΠΛΩΜΑ"),
    boost=3
  ) | Q(
    'match',
    meta_keywords_eng=query("ΠΑΠΛΩΜΑ"), 
    boost=2
  ) | Q(
    'match',
    description_small_eng=query("ΠΑΠΛΩΜΑ"), 
    boost=1
  )
).to_dict()

{'query': {'bool': {'should': [{'match': {'fuzziness': 'AUTO:3,5',
      'name_eng': 'paplwma',
      'boost': 3}},
    {'match': {'meta_keywords_eng': 'paplwma', 'boost': 2}},
    {'match': {'description_small_eng': 'paplwma', 'boost': 1}}]}}}

In [92]:
s = Search(using=es, index="nefnef_es") \
    .query("match", name_eng=query("ΠΑΠΛΩΜΑ")) \
    .extra(size=50)

res = s.execute()

In [93]:
res[0].meta.score
res[0].name_eng
res[0].name

'ΠΑΠΛΩΜΑ ΜΟΝΟ MICROFIBER'

In [94]:
a = res[0]

In [95]:
a.site_url

'https://nef-nef.gr/gr/prdid/cd83a5b3c23540649e1e87f4e7ea49ac/productinfo.aspx'

In [96]:
len(res)

37

In [None]:
s.extra()

In [97]:
from elasticsearch_dsl import Q

In [98]:
s = s.query(
  Q(
    "match",
    query="",
    fields=['product_name', "short_name"],
    boost=2
  ) | Q(
    'match',
    product_pinyin={'query': '...', 'boost': 1}
  )
)

In [101]:
s.to_dict()

{'query': {'bool': {'should': [{'match': {'query': '',
      'fields': ['product_name', 'short_name'],
      'boost': 2}},
    {'match': {'product_pinyin': {'query': '...', 'boost': 1}}}],
   'must': [{'match': {'name_eng': 'paplwma'}}],
   'minimum_should_match': 1}},
 'size': 50}

In [None]:
s = s.query(
  Q(
    "match",
    query="",
    fields=['product_name', "short_name"],
    boost=2
  ) | Q(
    'match',
    product_pinyin={'query': '...', 'boost': 1}
  )
)

In [None]:
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "%s",
"fields": [
"product_name",
"short_name"
],
"boost": 2
}
},
{
"match": {
"product_pinyin": {
"query": "%s",
"boost": 1
}
}
}
]
}
}