# Fuzzy Name Matching in Elastic
For Language Analysis
1. Install RNI Plugin
2. Create an new index
3. Create an document_type with an RNI filed type
4. Add a handfull of records 
5. Example Queries

----
### 1) Install RNI Plugin

In [2]:
# Simple Plugin Installation
# ./bin/plugin --install rni-es --url file://path/rni-es-7.13.0.zip

# Add your license file
# cp /path/rlp-license.xml plugins/analysis-rbl-je/rbl-je-7.12.1/licenses/ 

----
### 2) Create an empty index

In [None]:
# Global Settings for Elasticsearch
es_host = "_enter_your_elastic_hostname_here_" #"localhost"
es_port = "9200"
index_name = "fuzzy_match_demo"
document_type = "customers"

# Intall the python binding for elastic search
# pip install elasticsearch

# Use the elasticsearch python client
from elasticsearch import Elasticsearch
import json

# Create connection to es server
es = Elasticsearch(host=es_host, port=es_port)

In [None]:
# Uncomment the following line to delete existing index with this name. Ignores 404 IndexMissingException.
print "delete", index_name, es.indices.delete(index=index_name, ignore=[404])

# Create the index. Ignores 400 IndexAlreadyExistsException
print "create", index_name, es.indices.create(index=index_name, ignore=[400])

----
### 3) Create an document_type with an RNI filed type

In [None]:
# Add a people field to news_articles
es.indices.put_mapping(index=index_name, doc_type=document_type, body={
  "customers" : {
    "properties" : {
        "full_name" : { "type" : "rni_name" }
    }
  }   
})

In [None]:
# Confirm Mapping Created
mapping = es.indices.get_mapping(index=index_name)
print json.dumps(mapping, indent=2)

----
### 4) Add a handfull of records 

In [None]:
# Add a few people records
print "add document", index_name, es.index(index=index_name, doc_type=document_type, id=1, body={
  "full_name" :{
  "data" : "Christopher Mack",
  "language" : "eng",
  "script" : "Latn",
  "entityType" : "PERSON"
  },
  "hair_color" : "Blonde",
  "nationality" : "US"
})

print "add document", index_name, es.index(index=index_name, doc_type=document_type, id=2, body={
  "full_name" :{
  "data" : "Kris Mac",
  "language" : "eng",
  "script" : "Latn",
  "entityType" : "PERSON"
  },
  "hair_color" : "Black",
  "nationality" : "DE"
})

print "add document", index_name, es.index(index=index_name, doc_type=document_type, id=3, body={
  "full_name" :{
  "data" : "Bill Swanson",
  "language" : "eng",
  "script" : "Latn",
  "entityType" : "PERSON"
  },
  "hair_color" : "Blonde",
  "nationality" : "US"    
})

In [None]:
# refresh to make the documents available for search
print "refresh", index_name, es.indices.refresh(index=index_name)

# and now we can count the documents
print(es.count(index=index_name)['count'], 'documents in index')

----
### 5) Example Queries
 - High Recall (with lucene scores) 
 - Resocore (with name simimaliry scores)
 - Combined with other fields

In [None]:
rosette_result = es.search(index=index_name, doc_type=document_type, body={
    "query" : {
        "match" : {
            "full_name" : "Christopher Mac"
        }
    }
})
print json.dumps(rosette_result, indent=2)

In [None]:
rosette_result = es.search(index=index_name, doc_type=document_type, body={
"query" : {
    "match" : {
        "full_name" : "Christopher Mac"
        }
    },
    "rescore" : {
        "window_size" : 200,
        "query" : {
        "rescore_query" : {
            "function_score" : {
                    "name_score" : {
                        "field" : "full_name",
                        "query_name" : "Christopher Mac"
                    }
                }
            },
            "query_weight" : 0.0,
            "rescore_query_weight" : 1.0
        }
    }
    })
print json.dumps(rosette_result, indent=2)

In [None]:
rosette_result = es.search(index=index_name, doc_type=document_type, body={
"query" : {
    "match" : {"full_name" : "Christopher Mac"},
    "match" : {"hair_color" : "Blonde"}
    },
    "rescore" : {
        "window_size" : 200,
        "query" : {
        "rescore_query" : {
            "function_score" : {
                    "name_score" : {
                        "field" : "full_name",
                        "query_name" : "Christopher Mac"
                    }
                }
            },
            "query_weight" : 0.25,
            "rescore_query_weight" : 0.5
        }
    }
    })
print json.dumps(rosette_result, indent=2)

----
### 5) Other Queries


In [None]:
rosette_result = es.search(index=index_name, doc_type=document_type, body={
  "query": {
    "match" : {
      "people" : "{\"data\" : \"Christopher Mac\", \"language\" : \"eng\" , \"script\" : \"Latn\", \"entityType\" : \"PERSON\"}"
    }
  }
})
print json.dumps(rosette_result, indent=2)
