# Ealsticsearch tutorial 

In [10]:
#import required modules
import requests
import json
import uuid
from datetime import datetime

## Helper functions

In [11]:
def pprint(data):
    print (json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')))

def get_new_id():
    return str(uuid.uuid1()).replace('-','')
    
def get(url,data):
    return requests.get(url).json()

def delete(url,data):
    return requests.delete(url).json()

def put(url, payload):
    return requests.put(url,data= json.dumps(payload) ).json()

def post(url, payload):
    return requests.post(url,data= json.dumps(payload) ).json()

services={
    'GET':get,
    'PUT':put,
    'POST':post,
    'DELETE':delete
}

def curl(url, method, payload={}):
    return services[method](url,payload)

## Check Elastic Server

In [12]:
url = 'http://127.0.0.1:9200'
res = curl(url,'GET')
pprint(res)

{
    "cluster_name": "elasticsearch",
    "cluster_uuid": "qSX_8o7OQ2KTv6RI2FDkOA",
    "name": "DJ",
    "tagline": "You Know, for Search",
    "version": {
        "build_hash": "d38a34e7b75af4e17ead16f156feffa432b22be3",
        "build_snapshot": false,
        "build_timestamp": "2016-12-07T16:28:56Z",
        "lucene_version": "5.5.2",
        "number": "2.4.3"
    }
}


## Let's Define index name and doc type

In [13]:
index_name = "bigdatairan"
doc_type = 'tweet'

## Create new Index with schema

In [14]:
_mappings={
        "mappings": {
            "tweet" : {
                "properties" : {
                    "coordinate":{"type" : "geo_point"}
                }
            }
        }
    }

In [15]:
url = 'http://127.0.0.1:9200/{}'.format(index_name)
res = curl(url,'POST',_mappings)
pprint (res)

{
    "acknowledged": true
}


## Create new document

In [17]:
tweet={
    'author':'Fartash Haghani',
    'tweet_text':"150tr gigabytes. That's the amount of information in a human body",
    'coordinate' : [35.6683226, 35.6683226],
    'user_lang':'Fa',
    'tweet_lang':'En',
    'user_followers_count':203,
    'gender':'male',
    'created_at':datetime.now().isoformat()
}

## Index document

In [18]:
documet_id=get_new_id()
url = 'http://127.0.0.1:9200/{}/{}/{}'.format(index_name, doc_type, documet_id)
res = curl(url,'PUT',tweet)
pprint (res)

{
    "_id": "af761afec79811e6a28e88532e3b047f",
    "_index": "bigdatairan",
    "_shards": {
        "failed": 0,
        "successful": 1,
        "total": 2
    },
    "_type": "tweet",
    "_version": 1,
    "created": true
}


## Let's check current schema

In [19]:
url = 'http://127.0.0.1:9200/bigdatairan/_mapping'
res = curl(url,'GET')
pprint(res)

{
    "bigdatairan": {
        "mappings": {
            "tweet": {
                "properties": {
                    "author": {
                        "type": "string"
                    },
                    "coordinate": {
                        "type": "geo_point"
                    },
                    "created_at": {
                        "format": "strict_date_optional_time||epoch_millis",
                        "type": "date"
                    },
                    "gender": {
                        "type": "string"
                    },
                    "tweet_lang": {
                        "type": "string"
                    },
                    "tweet_text": {
                        "type": "string"
                    },
                    "user_followers_count": {
                        "type": "long"
                    },
                    "user_lang": {
                        "type": "string"
                    }
                }
      

## Delete Index

In [69]:
url = 'http://127.0.0.1:9200/{}'.format(index_name)
res = curl(url,'DELETE')
pprint (res)

{
    "acknowledged": true
}
