# Ealsticsearch tutorial 

In [18]:
#import required modules
import requests
import json
import uuid

## Helper functions

In [19]:
def pprint(data):
    print (json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')))

def get_new_id():
    return str(uuid.uuid1()).replace('-','')
    
def get(url,data):
    return requests.get(url).json()

def delete(url,data):
    return requests.delete(url).json()

def put(url, payload):
    return requests.put(url,data= json.dumps(payload) ).json()

def post(url, payload):
    return requests.post(url,data= json.dumps(payload) ).json()

services={
    'GET':get,
    'PUT':put,
    'POST':post,
    'DELETE':delete
}

def curl(url, method, payload={}):
    return services[method](url,payload)

## Check Elastic Server

In [20]:
url = 'http://127.0.0.1:9200'
res = curl(url,'GET')
pprint(res)

{
    "cluster_name": "elasticsearch",
    "cluster_uuid": "qSX_8o7OQ2KTv6RI2FDkOA",
    "name": "DJ",
    "tagline": "You Know, for Search",
    "version": {
        "build_hash": "d38a34e7b75af4e17ead16f156feffa432b22be3",
        "build_snapshot": false,
        "build_timestamp": "2016-12-07T16:28:56Z",
        "lucene_version": "5.5.2",
        "number": "2.4.3"
    }
}


## Let's Define index name and doc type

In [21]:
index_name = "bigdatairan"
doc_type = 'tweet'

## Create new document

In [22]:
tweet={
    'author':'Fartash Haghani',
    'tweet_text':"150tr gigabytes. That's the amount of information in a human body",
    'coordinate' : [3.09, 10.67],
    'user_lang':'Fa',
    'tweet_lang':'En',
    'user_followers_count':203,
    'gender':'male',
    'race':'white'
}

## Create new Index and index document

In [23]:
documet_id=get_new_id()
url = 'http://127.0.0.1:9200/{}/{}/{}'.format(index_name, doc_type, documet_id)
res = curl(url,'PUT',tweet)
pprint (res)

{
    "_id": "070cc3b8c79811e6a28e88532e3b047f",
    "_index": "bigdatairan",
    "_shards": {
        "failed": 0,
        "successful": 1,
        "total": 2
    },
    "_type": "tweet",
    "_version": 1,
    "created": true
}


## Getback the Document

In [24]:
url = 'http://127.0.0.1:9200/{}/{}/{}'.format(index_name, doc_type, documet_id)
res = curl(url,'GET')
pprint (res)

{
    "_id": "070cc3b8c79811e6a28e88532e3b047f",
    "_index": "bigdatairan",
    "_source": {
        "author": "Fartash Haghani",
        "coordinate": [
            3.09,
            10.67
        ],
        "gender": "male",
        "race": "white",
        "tweet_lang": "En",
        "tweet_text": "150tr gigabytes. That's the amount of information in a human body",
        "user_followers_count": 203,
        "user_lang": "Fa"
    },
    "_type": "tweet",
    "_version": 1,
    "found": true
}


## Where is search engine

In [25]:
query ='author:Haghani'
url = 'http://127.0.0.1:9200/{}/{}/_search?q={}'.format(index_name, doc_type, query)
res = curl(url,'GET')
pprint (res)

{
    "_shards": {
        "failed": 0,
        "successful": 5,
        "total": 5
    },
    "hits": {
        "hits": [
            {
                "_id": "070cc3b8c79811e6a28e88532e3b047f",
                "_index": "bigdatairan",
                "_score": 0.19178301,
                "_source": {
                    "author": "Fartash Haghani",
                    "coordinate": [
                        3.09,
                        10.67
                    ],
                    "gender": "male",
                    "race": "white",
                    "tweet_lang": "En",
                    "tweet_text": "150tr gigabytes. That's the amount of information in a human body",
                    "user_followers_count": 203,
                    "user_lang": "Fa"
                },
                "_type": "tweet"
            }
        ],
        "max_score": 0.19178301,
        "total": 1
    },
    "timed_out": false,
    "took": 2
}


## Complex Search queries
[query-dsl](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)

In [26]:
query={
  "query": {
    "bool": {
      "must": [
        { "match": { "author": "Fartash Haghani" } },
        { "match": { "tweet_lang": "En" } }
      ]
    }
  }
}
url = 'http://127.0.0.1:9200/{}/{}/_search'.format(index_name, doc_type)
res = curl(url,'POST',query)
pprint (res)

{
    "_shards": {
        "failed": 0,
        "successful": 5,
        "total": 5
    },
    "hits": {
        "hits": [
            {
                "_id": "070cc3b8c79811e6a28e88532e3b047f",
                "_index": "bigdatairan",
                "_score": 0.3986135,
                "_source": {
                    "author": "Fartash Haghani",
                    "coordinate": [
                        3.09,
                        10.67
                    ],
                    "gender": "male",
                    "race": "white",
                    "tweet_lang": "En",
                    "tweet_text": "150tr gigabytes. That's the amount of information in a human body",
                    "user_followers_count": 203,
                    "user_lang": "Fa"
                },
                "_type": "tweet"
            }
        ],
        "max_score": 0.3986135,
        "total": 1
    },
    "timed_out": false,
    "took": 1
}


## Let's check current schema

In [27]:
url = 'http://127.0.0.1:9200/bigdatairan/_mapping'
res = curl(url,'GET')
pprint(res)

{
    "bigdatairan": {
        "mappings": {
            "tweet": {
                "properties": {
                    "author": {
                        "type": "string"
                    },
                    "coordinate": {
                        "type": "double"
                    },
                    "gender": {
                        "type": "string"
                    },
                    "race": {
                        "type": "string"
                    },
                    "tweet_lang": {
                        "type": "string"
                    },
                    "tweet_text": {
                        "type": "string"
                    },
                    "user_followers_count": {
                        "type": "long"
                    },
                    "user_lang": {
                        "type": "string"
                    }
                }
            }
        }
    }
}


## Delete Document

In [28]:
url = 'http://127.0.0.1:9200/{}/{}/{}'.format(index_name, doc_type, documet_id)
res = curl(url,'DELETE')
pprint (res)

{
    "_id": "070cc3b8c79811e6a28e88532e3b047f",
    "_index": "bigdatairan",
    "_shards": {
        "failed": 0,
        "successful": 1,
        "total": 2
    },
    "_type": "tweet",
    "_version": 2,
    "found": true
}


## Delete Index

In [30]:
url = 'http://127.0.0.1:9200/{}'.format(index_name)
res = curl(url,'DELETE')
pprint (res)

{
    "error": {
        "index": "bigdatairan",
        "reason": "no such index",
        "resource.id": "bigdatairan",
        "resource.type": "index_or_alias",
        "root_cause": [
            {
                "index": "bigdatairan",
                "reason": "no such index",
                "resource.id": "bigdatairan",
                "resource.type": "index_or_alias",
                "type": "index_not_found_exception"
            }
        ],
        "type": "index_not_found_exception"
    },
    "status": 404
}
