---
### Carrega pacotes necessários

In [None]:
#import pip
#pip.main(['install', 'elasticsearch'])
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd
import numpy as np
from time import time
import os
import sys
from dataiku import pandasutils as pdu
from elasticsearch import Elasticsearch, helpers
import json

---
### Conexão ao ES

In [None]:
es = Elasticsearch('http://user:pwd@server_ip:port/')

In [None]:
es.info()

---
### Criação de índice

In [None]:
#Exclui índice, se ele existir
indice= "cnes_dss"
doc_type="cnes-type"
try :
    es.indices.delete(index=indice)
except :
    pass

In [None]:
#Definição de tipo para documentos
cnes_type = {
                    "mappings":{
                        'cnes-type': {
                            'properties': {
                                'REGSAUDE':{'type': 'keyword'},
                                'MICR_REG':{'type': 'keyword'},
                                'DISTRSAN':{'type': 'keyword'},
                                'DISTRADM':{'type': 'keyword'},
                                'CODUFMUN':{'type': 'keyword'},
                                'CNES':{'type': 'keyword'},
                                'CO_BANCO':{'type': 'keyword'},
                                'COD_IR':{'type': 'keyword'},
                                'CPF_CNPJ':{'type': 'keyword'},
                                'mun_coordenadas' : {"type" : "geo_point"},
                                
                                "ALVARA": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "AP01CV01": {
                                  "type": "long"
                                },
                                "AP01CV02": {
                                  "type": "long"
                                },
                                "AP01CV03": {
                                  "type": "long"
                                },
                                "AP01CV04": {
                                  "type": "long"
                                },
                                "AP01CV05": {
                                  "type": "long"
                                },
                                "AP01CV06": {
                                  "type": "long"
                                },
                                "AP02CV01": {
                                  "type": "long"
                                },
                                "AP02CV02": {
                                  "type": "long"
                                },
                                "AP02CV03": {
                                  "type": "long"
                                },
                                "AP02CV04": {
                                  "type": "long"
                                },
                                "AP02CV05": {
                                  "type": "long"
                                },
                                "AP02CV06": {
                                  "type": "float"
                                },
                                "AP03CV01": {
                                  "type": "long"
                                },
                                "AP03CV02": {
                                  "type": "long"
                                },
                                "AP03CV03": {
                                  "type": "long"
                                },
                                "AP03CV04": {
                                  "type": "long"
                                },
                                "AP03CV05": {
                                  "type": "long"
                                },
                                "AP03CV06": {
                                  "type": "long"
                                },
                                "AP04CV01": {
                                  "type": "long"
                                },
                                "AP04CV02": {
                                  "type": "long"
                                },
                                "AP04CV03": {
                                  "type": "long"
                                },
                                "AP04CV04": {
                                  "type": "long"
                                },
                                "AP04CV05": {
                                  "type": "long"
                                },
                                "AP04CV06": {
                                  "type": "long"
                                },
                                "AP05CV01": {
                                  "type": "long"
                                },
                                "AP05CV02": {
                                  "type": "long"
                                },
                                "AP05CV03": {
                                  "type": "long"
                                },
                                "AP05CV04": {
                                  "type": "long"
                                },
                                "AP05CV05": {
                                  "type": "long"
                                },
                                "AP05CV06": {
                                  "type": "long"
                                },
                                "AP06CV01": {
                                  "type": "long"
                                },
                                "AP06CV02": {
                                  "type": "long"
                                },
                                "AP06CV03": {
                                  "type": "long"
                                },
                                "AP06CV04": {
                                  "type": "long"
                                },
                                "AP06CV05": {
                                  "type": "long"
                                },
                                "AP06CV06": {
                                  "type": "long"
                                },
                                "AP07CV01": {
                                  "type": "long"
                                },
                                "AP07CV02": {
                                  "type": "long"
                                },
                                "AP07CV03": {
                                  "type": "long"
                                },
                                "AP07CV04": {
                                  "type": "long"
                                },
                                "AP07CV05": {
                                  "type": "long"
                                },
                                "AP07CV06": {
                                  "type": "long"
                                },
                                "ATENDAMB": {
                                  "type": "long"
                                },
                                "ATENDHOS": {
                                  "type": "long"
                                },
                                "ATEND_PR": {
                                  "type": "long"
                                },
                                "ATIVIDAD": {
                                  "type": "long"
                                },
                                "AV_ACRED": {
                                  "type": "float"
                                },
                                "AV_PNASS": {
                                  "type": "float"
                                },
                                "CENTRCIR": {
                                  "type": "long"
                                },
                                "CENTRNEO": {
                                  "type": "long"
                                },
                                "CENTROBS": {
                                  "type": "long"
                                },
                                "CLASAVAL": {
                                  "type": "float"
                                },
                                "CLIENTEL": {
                                  "type": "float"
                                },
                                "CNPJ_MAN": {
                                  "type": "long"
                                },
                                "COD_CEP": {
                                  "type": "long"
                                },
                                "COLETRES": {
                                  "type": "long"
                                },
                                "COMISS01": {
                                  "type": "long"
                                },
                                "COMISS02": {
                                  "type": "long"
                                },
                                "COMISS03": {
                                  "type": "long"
                                },
                                "COMISS04": {
                                  "type": "long"
                                },
                                "COMISS05": {
                                  "type": "long"
                                },
                                "COMISS06": {
                                  "type": "long"
                                },
                                "COMISS07": {
                                  "type": "long"
                                },
                                "COMISS08": {
                                  "type": "long"
                                },
                                "COMISS09": {
                                  "type": "long"
                                },
                                "COMISS10": {
                                  "type": "long"
                                },
                                "COMISS11": {
                                  "type": "long"
                                },
                                "COMISS12": {
                                  "type": "long"
                                },
                                "COMISSAO": {
                                  "type": "long"
                                },
                                "COMPETEN": {
                                  "type": "keyword"
                                },
                                "CONTRATE": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "CONTRATM": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "CO_AGENC": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "C_CORREN": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "DT_ACRED": {
                                  "type": "float"
                                },
                                "DT_ATUAL": {
                                  "type": "long"
                                },
                                "DT_EXPED": {
                                  "type": "float"
                                },
                                "DT_PNASS": {
                                  "type": "float"
                                },
                                "DT_PUBLE": {
                                  "type": "float"
                                },
                                "DT_PUBLM": {
                                  "type": "float"
                                },
                                "ESFERA_A": {
                                  "type": "long"
                                },
                                "GESPRG1E": {
                                  "type": "long"
                                },
                                "GESPRG1M": {
                                  "type": "long"
                                },
                                "GESPRG2E": {
                                  "type": "long"
                                },
                                "GESPRG2M": {
                                  "type": "long"
                                },
                                "GESPRG3E": {
                                  "type": "long"
                                },
                                "GESPRG3M": {
                                  "type": "long"
                                },
                                "GESPRG4E": {
                                  "type": "long"
                                },
                                "GESPRG4M": {
                                  "type": "long"
                                },
                                "GESPRG5E": {
                                  "type": "long"
                                },
                                "GESPRG5M": {
                                  "type": "long"
                                },
                                "GESPRG6E": {
                                  "type": "long"
                                },
                                "GESPRG6M": {
                                  "type": "long"
                                },
                                "LEITHOSP": {
                                  "type": "long"
                                },
                                "NATUREZA": {
                                  "type": "long"
                                },
                                "NIVATE_A": {
                                  "type": "long"
                                },
                                "NIVATE_H": {
                                  "type": "long"
                                },
                                "NIV_DEP": {
                                  "type": "long"
                                },
                                "NIV_HIER": {
                                  "type": "float"
                                },
                                "ORGEXPED": {
                                  "type": "float"
                                },
                                "PF_PJ": {
                                  "type": "long"
                                },
                                "QTINST01": {
                                  "type": "long"
                                },
                                "QTINST02": {
                                  "type": "long"
                                },
                                "QTINST03": {
                                  "type": "long"
                                },
                                "QTINST04": {
                                  "type": "long"
                                },
                                "QTINST05": {
                                  "type": "long"
                                },
                                "QTINST06": {
                                  "type": "long"
                                },
                                "QTINST07": {
                                  "type": "long"
                                },
                                "QTINST08": {
                                  "type": "long"
                                },
                                "QTINST09": {
                                  "type": "long"
                                },
                                "QTINST10": {
                                  "type": "long"
                                },
                                "QTINST11": {
                                  "type": "long"
                                },
                                "QTINST12": {
                                  "type": "long"
                                },
                                "QTINST13": {
                                  "type": "long"
                                },
                                "QTINST14": {
                                  "type": "long"
                                },
                                "QTINST15": {
                                  "type": "long"
                                },
                                "QTINST16": {
                                  "type": "long"
                                },
                                "QTINST17": {
                                  "type": "long"
                                },
                                "QTINST18": {
                                  "type": "long"
                                },
                                "QTINST19": {
                                  "type": "long"
                                },
                                "QTINST20": {
                                  "type": "long"
                                },
                                "QTINST21": {
                                  "type": "long"
                                },
                                "QTINST22": {
                                  "type": "long"
                                },
                                "QTINST23": {
                                  "type": "long"
                                },
                                "QTINST24": {
                                  "type": "long"
                                },
                                "QTINST25": {
                                  "type": "long"
                                },
                                "QTINST26": {
                                  "type": "long"
                                },
                                "QTINST27": {
                                  "type": "long"
                                },
                                "QTINST28": {
                                  "type": "long"
                                },
                                "QTINST29": {
                                  "type": "long"
                                },
                                "QTINST30": {
                                  "type": "long"
                                },
                                "QTINST31": {
                                  "type": "long"
                                },
                                "QTINST32": {
                                  "type": "long"
                                },
                                "QTINST33": {
                                  "type": "long"
                                },
                                "QTINST34": {
                                  "type": "long"
                                },
                                "QTINST35": {
                                  "type": "long"
                                },
                                "QTINST36": {
                                  "type": "long"
                                },
                                "QTINST37": {
                                  "type": "long"
                                },
                                "QTLEIT05": {
                                  "type": "long"
                                },
                                "QTLEIT06": {
                                  "type": "long"
                                },
                                "QTLEIT07": {
                                  "type": "long"
                                },
                                "QTLEIT08": {
                                  "type": "long"
                                },
                                "QTLEIT09": {
                                  "type": "long"
                                },
                                "QTLEIT19": {
                                  "type": "long"
                                },
                                "QTLEIT20": {
                                  "type": "long"
                                },
                                "QTLEIT21": {
                                  "type": "long"
                                },
                                "QTLEIT22": {
                                  "type": "long"
                                },
                                "QTLEIT23": {
                                  "type": "long"
                                },
                                "QTLEIT32": {
                                  "type": "long"
                                },
                                "QTLEIT34": {
                                  "type": "long"
                                },
                                "QTLEIT38": {
                                  "type": "long"
                                },
                                "QTLEIT39": {
                                  "type": "long"
                                },
                                "QTLEIT40": {
                                  "type": "long"
                                },
                                "QTLEITP1": {
                                  "type": "long"
                                },
                                "QTLEITP2": {
                                  "type": "long"
                                },
                                "QTLEITP3": {
                                  "type": "long"
                                },
                                "RES_BIOL": {
                                  "type": "long"
                                },
                                "RES_COMU": {
                                  "type": "long"
                                },
                                "RES_QUIM": {
                                  "type": "long"
                                },
                                "RES_RADI": {
                                  "type": "long"
                                },
                                "RETENCAO": {
                                  "type": "long"
                                },
                                "SERAP01P": {
                                  "type": "long"
                                },
                                "SERAP01T": {
                                  "type": "long"
                                },
                                "SERAP02P": {
                                  "type": "long"
                                },
                                "SERAP02T": {
                                  "type": "long"
                                },
                                "SERAP03P": {
                                  "type": "long"
                                },
                                "SERAP03T": {
                                  "type": "long"
                                },
                                "SERAP04P": {
                                  "type": "long"
                                },
                                "SERAP04T": {
                                  "type": "long"
                                },
                                "SERAP05P": {
                                  "type": "long"
                                },
                                "SERAP05T": {
                                  "type": "long"
                                },
                                "SERAP06P": {
                                  "type": "long"
                                },
                                "SERAP06T": {
                                  "type": "long"
                                },
                                "SERAP07P": {
                                  "type": "long"
                                },
                                "SERAP07T": {
                                  "type": "long"
                                },
                                "SERAP08P": {
                                  "type": "long"
                                },
                                "SERAP08T": {
                                  "type": "long"
                                },
                                "SERAP09P": {
                                  "type": "long"
                                },
                                "SERAP09T": {
                                  "type": "long"
                                },
                                "SERAP10P": {
                                  "type": "long"
                                },
                                "SERAP10T": {
                                  "type": "long"
                                },
                                "SERAP11P": {
                                  "type": "long"
                                },
                                "SERAP11T": {
                                  "type": "long"
                                },
                                "SERAPOIO": {
                                  "type": "long"
                                },
                                "TPGESTAO": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "TP_PREST": {
                                  "type": "long"
                                },
                                "TP_UNID": {
                                  "type": "long"
                                },
                                "TURNO_AT": {
                                  "type": "float"
                                },
                                "URGEMERG": {
                                  "type": "long"
                                },
                                "VINC_SUS": {
                                  "type": "long"
                                },
                                "ano_competen": {
                                  "type": "keyword"
                                },
                                "def_competen": {
                                  "type": "keyword"
                                },
                                "def_av_acred": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "def_av_pnass": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "def_niv_dep": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "def_orgexped": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "def_pf_pj": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mes_competen": {
                                  "type": "keyword"
                                },
                                "mun_ALTITUDE": {
                                  "type": "long"
                                },
                                "mun_AMAZONIA": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mun_AREA": {
                                  "type": "float"
                                },
                                "mun_CAPITAL": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mun_CSAUDCOD": {
                                  "type": "long"
                                },
                                "mun_FRONTEIRA": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mun_LATITUDE": {
                                  "type": "float"
                                },
                                "mun_LONGITUDE": {
                                  "type": "float"
                                },
                                "mun_MSAUDCOD": {
                                  "type": "long"
                                },
                                "mun_MUNNOME": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mun_MUNNOMEX": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "mun_RSAUDCOD": {
                                  "type": "long"
                                },
                                "mun_codigo_adotado": {
                                  "type": "long"
                                },
                                "uf_CODIGO_UF": {
                                  "type": "long"
                                },
                                "uf_NOME_UF": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                },
                                "uf_SIGLA_UF": {
                                  "type": "text",
                                  "fields": {
                                    "keyword": {
                                      "type": "keyword",
                                      "ignore_above": 256
                                    }
                                  }
                                }
                            }
                        }
                    }
                }

In [None]:
#cria índice no Elasticsearch
es.indices.create(index=indice,body=cnes_type)

---
### Carrega dados preparados/transformados do Dataiku

In [None]:
data_prepared = dataiku.Dataset("DATA_prep")

---
### Recupera métricas (record_count) do dataset no Dataiku

In [None]:
def get_metric(project_name,dataset_name,metric_ids):
    client = dataiku.api_client()
    current_project = client.get_project(project_name)
    dataset = current_project.get_dataset(dataset_name)
    metrics = dataset.compute_metrics(partition='ALL', metric_ids=metric_ids)
    metrics = [{'metric':m["metricId"],'value':int(m["value"])} for m in metrics["result"]["computed"] if m["metricId"] in metric_ids][0]

    return metrics

In [None]:
def record_count(project_name,dataset_name):
    return get_metric(project_name,dataset_name,['records:COUNT_RECORDS'])['value']

In [None]:
nrows = record_count('ETLCNES','DATA_prep')

---
### Gera json para indexação

In [None]:
def geraJson(df):
    return json.loads(df.T.to_json())

### Indexação em lote

In [None]:
#tamanho do chunk
chunksize = 10000

#número total de chunks a serem indexados
nchunks = nrows/chunksize

#imprime o número total de documentos a serem indexados
print("Documentos: %i\n"%nrows)

res_bulk=[]

for chunk,df in enumerate(data_prepared.iter_dataframes(chunksize=chunksize)):

    #gera o json do chunk de dados atual (formato pronto para indexação)
    data_json = geraJson(df)

    #imprime o número do chunk atual e o total de chunks a serem indexados
    print("Chunk: %i/%i"%(chunk,nchunks))

    #cria lista de ações para indexação de cada documento do chunk atual
    lista=[]
    for i, item in enumerate(data_json.values()):
        data_dict = {
            '_op_type': 'index',
            '_index': indice,
            '_type': doc_type,
            '_source': item
        }
        lista.append(data_dict)

    #indexa todos os documentos do chunk atual (bulk indexa em chunks)
    res = helpers.bulk(client=es, actions=lista, chunk_size=1000, raise_on_error=False, raise_on_exception = False)
    res_bulk.append(res)

    print(res)

In [None]:
res_df = pd.DataFrame(res_bulk)
res_df.columns = ['indexed_chunksize', 'errors']

# Write recipe outputs
res_Elasticsearch = dataiku.Dataset("bulk_elasticsearch")
res_Elasticsearch.write_with_schema(res_df)