In [None]:
from flask import Flask, request, redirect, url_for, render_template, send_from_directory, Request, jsonify, session
import os
import pandas as pd
from werkzeug.utils import secure_filename
import dedupe
import pickle
import csv
import re
import numpy as np
import pysolr
import sodaclient
import recordlinkage
from unidecode import unidecode

UPLOAD_FOLDER = '/tmp/'
ALLOWED_EXTENSIONS = set(['csv'])

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.secret_key = 'super secret key'
app.config['SESSION_TYPE'] = 'filesystem'

# def allowed_file(filename):
#     return '.' in filename and \
#            filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/')
@app.route('/login.html')
def login(name=None):
    return render_template('login.html', name=name)

@app.route('/index.html')
def index(name=None):
    return render_template('index.html', name=name)

# @app.route('/charts.html')
# def charts(name=None):
#     return render_template('charts.html', name=name)

@app.route('/register.html')
def register(name=None):
    return render_template('register.html', name=name)

@app.route('/success.html')
def success():
    method = request.args.get('method', None)
    response = session.get(method, None)
    user_addr = session.get('user_addr', None)
    if method == 'rl':
        method = "Record Linkage"
    return render_template('success.html', **locals())

@app.route('/failure.html')
def failure():
    return render_template('failure.html')

@app.route('/rlUI.html')
def rlUI():
    
    # initialize responses
    response_rl = ("No matching address was found!")
    
    # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = str(session.get('code', None))
        
    # write the user entry to a csv
    with open ('user_input_file.csv', 'w', newline='') as csvfile:
        fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code': code})
    csvfile.close()
    
    dfA = pd.read_csv("companies_final.csv")
    dfB = pd.read_csv('user_input_file.csv')
    dfD = pd.read_csv("companies_dict.csv")
    dfA.drop('id', axis=1)
    dfB.drop('id', axis=1)
    
    
    #############   BLOCK INDEXING
    def runBlock():  
        block = 0
        both = 0

        #Next, check for exact match in name
        blockName = recordlinkage.BlockIndex(on=['name'])
        blockNamePairs = blockName.index(dfA, dfB)
        if len(blockNamePairs) > 0:
             block += 1

        #Next, check for exact match in addr    
        blockAddr = recordlinkage.BlockIndex(on=['addr'])
        blockAddrPairs = blockAddr.index(dfA, dfB)
        if len(blockAddrPairs) > 0:
            block += 2

        #Next, check for exact match in name AND addr    
        blockNA = recordlinkage.BlockIndex(on=['name','addr'])
        blockNAPairs = blockNA.index(dfA, dfB)
        if len(blockNAPairs) > 0:
            both = 1 
            print("they match")

        if both == 1:
        #grab the id and contents of addr match
            print("block name and addr match")
            return(returnResultsMI(blockNAPairs))
        else:

            if block == 0:
            #run sorted neighborhood both
                print("no exact matches")
                runSort()
            if block == 1:
            #grab the id and contents of name match
                print("block name match")
                if len(blockNamePairs) > 1:
                    runCompName(blockNamePairs)
                else:
                    return(returnResultsMI(blockNamePairs))        
            if block == 2:
                #grab the id and contents of addr match
                print("block addr match")
                if len(blockAddrPairs) > 1:
                    runCompAddr(blockAddrPairs)
                else:
                    return(returnResultsMI(blockAddrPairs))
            if block == 3:
                #matches both, inconclusive
                print("block different name and addr match")
                runSort()


    ###############
    
    #############   SORTED NEIGHBORHOOD INDEXING
    def runSort():
        sort = 0

        sortedNameIndexer = recordlinkage.SortedNeighbourhoodIndex(on='name')
        sortedNamePairs = sortedNameIndexer.index(dfA, dfB)
        if len(sortedNamePairs) > 0:
            sort += 1

        sortedAddrIndexer = recordlinkage.SortedNeighbourhoodIndex(on='addr')
        sortedAddrPairs = sortedAddrIndexer.index(dfA, dfB)
        if len(sortedAddrPairs) > 0:
            sort += 2

        if sort == 0:
            #run sorted neighborhood both
            print("full index NEXT")
            runFull()
            print("sort is "+ sort)
        if sort == 1:
            #compare with name pairs
            print("sort name match")
            runCompName(sortedNamePairs)
        if sort == 2:
            #compare with addr pairs
            print("sort addr match")
            runCompAddr(sortedAddrPairs)
        if sort == 3:
            #compare with both pairs?
            print("sort both match")
            runCompBoth(sortedNamePairs, sortedAddrPairs)

    def runCompBoth(name_pairs,addr_pairs):
        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for name
        featuresName = compare.compute(name_pairs, dfA, dfB)

        # The comparison vectors for addr
        featuresAddr = compare.compute(addr_pairs, dfA, dfB)

        ########## Classification

        featuresName.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesNameAll = featuresName[featuresName.sum(axis=1) > 4]
        if len(matchesNameAll) > 0:
            nameMatch = True 
            matchesName = matchesNameAll #overwriting the larger set of results
        else:
            matchesName = featuresName[featuresName.sum(axis=1) > 3]
            if len(matchesName) > 0:
                nameMatch = True
        print(len(matchesName))


        featuresAddr.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesAddrAll = featuresAddr[featuresAddr.sum(axis=1) > 4]
        if len(matchesAddrAll) > 0:
            addrMatch = True 
            matchesAddr = matchesAddrAll #overwriting the larger set of results
        else:
            matchesAddr = featuresAddr[featuresAddr.sum(axis=1) > 3]
            if len(matchesAddr) > 0:
                addrMatch = True
        print(len(matchesAddr))    

    ########## FSM

        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("cb Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cb sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("cb sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                print(len(matchesName) + len(matchesAddr))
                if len(matchesName) == 1:
                    return(returnResultsDF(matchesName))
                elif len(matchesAddr) == 1:
                    return(returnResultsDF(matchesAddr))
                    print("cb sort addr and sort name")
                else:
                    runFull()
                    
    ######### Specify to neighborhood & Compare
    def runCompName(name_pairs):

        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for name
        featuresName = compare.compute(name_pairs, dfA, dfB)

        ########## Classification

        featuresName.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesNameAll = featuresName[featuresName.sum(axis=1) > 4]
        if len(matchesNameAll) > 0:
            nameMatch = True 
            matchesName = matchesNameAll #overwriting the larger set of results
        else:
            matchesName = featuresName[featuresName.sum(axis=1) > 3]
            if len(matchesName) > 0:
                nameMatch = True
        print(len(matchesName))

    ########## FSM

        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("cn Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cn sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("cn sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cn sort addr and sort name")
                runFull()
    
    ######### Specify to neighborhood & Compare
    def runCompAddr(addr_pairs):

        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for addr
        featuresAddr = compare.compute(addr_pairs, dfA, dfB)

        ########## Classification

        featuresAddr.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesAddrAll = featuresAddr[featuresAddr.sum(axis=1) > 4]
        if len(matchesAddrAll) > 0:
            addrMatch = True 
            matchesAddr = matchesAddrAll #overwriting the larger set of results
        else:
            matchesAddr = featuresAddr[featuresAddr.sum(axis=1) > 3]
            if len(matchesAddr) > 0:
                addrMatch = True
        print(len(matchesAddr))    

    ########## FSM
        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("ca Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("ca sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("ca sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                #grab the id and contents of addr match, 11 = preference towards addr
                print("ca sort addr and sort name")
                runFull()
    
    ####### If nothing else finds matches, run FULL INDEX
    def runFull():
        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.90)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        fullIndexer = recordlinkage.FullIndex()
        fullIndexPairs = fullIndexer.index(dfA, dfB)

        featuresFull = compare.compute(fullIndexPairs, dfA, dfB)

        matchesFullAll = featuresFull[featuresFull.sum(axis=1) > 4]
        if len(matchesFullAll) > 0:
            print("full match all")
            return(returnResultsDF(matchesFullAll))
        else:
            matchesFull = featuresFull[featuresFull.sum(axis=1) > 3]
            if len(matchesFull) >0:
                print("full match")
                #print(type(matchesFull))
                print(matchesFull)
                return(returnResultsDF(matchesFull))
        #return the match/matches with highest sum. Maybe try >4 first then >3. for row in frame, 
        #grab id then return the full dict entry of the id

    ######## RETURN FROM MULTIINDEX

    def returnResultsMI(pairs):
        data = pairs.to_frame(index = False)[0]
        i = 0
        grab_ids = []
        print(data)
        while i < len(data):
            grab_ids.append(data[i])
            i+=1 
        for grab_id in grab_ids:
            result = dfD.loc[grab_id].to_string(header = False, index = False)
            results.append(result)
            print(result)
        print(grab_ids)
        return results

    def returnResultsDF(pairs):
        pairs2 = pairs.index
        print(type(pairs))
        print(pairs2)
        data = pairs2.to_frame(index = False)[0]
        i = 0
        grab_ids = []
        while i < len(data):
            grab_ids.append(data[i])
            i+=1   
        for grab_id in grab_ids:
            result = dfD.loc[grab_id].to_string(header = False, index = False)
            results.append(result)
        print(grab_ids)
        return results
    
    results = []
    blockIndexer = recordlinkage.BlockIndex(on=['name', 'addr', 'city', 'ctry', 'code'])
    blockIndexPairs = blockIndexer.index(dfA, dfB)
    if len(blockIndexPairs) > 0:
        #continue
        print(returnResults(blockIndexPairs))
    else:
        runBlock()
    
    INSERTION_PENALTY = 1
    DELETION_PENALTY = 1
    # This substitution penalty differentiates from Levenshtein cost (would be 1)
    SUBSTITUTION_PENALTY = 2
    ALLOWED_LEVELS = ["word", "char"]
    LEVEL = "word"
    
    def compute_cost(D, i, j, token_X, token_Y):
        relative_subst_cost = 0 if token_X == token_Y else SUBSTITUTION_PENALTY
        return min(D[i-1, j] + INSERTION_PENALTY, D[i, j-1] + DELETION_PENALTY, D[i-1, j-1] + relative_subst_cost)
    def tokenize_string(string, level="word"):
        assert level in ALLOWED_LEVELS
        if level is "word":
            return string.split(" ")
        else:
            return list(string)
    def minimum_edit_distance(string1, string2, level="word"):
        """The function uses the dynamic programming approach from Wagner-Fischer to compute the minimum edit distance
        between two sequences.
        :param string1 first sequence
        :param string2 second sequence
        :param level defines on which granularity the algorithm will be applied. "word" specifies the token to
        be sequential words while "char" applies the algorithm on a character-by-character level"""
        # Call tokenize string on the two address strings that were passed to the method
        string1_tokens = tokenize_string(string1, level)
        string2_tokens = tokenize_string(string2, level)
        n = len(string1_tokens)
        m = len(string2_tokens) 
        D = np.zeros((n, m))

        for i in range(n):
            for j in range(m):
                if j == 0:
                    D[i,j] = i
                elif i == 0:
                    D[i,j] = j
                else:
                    D[i,j] = compute_cost(D, i, j, string1_tokens[i], string2_tokens[j])

        return string2_tokens, D[n-1, m-1]
    def preProcess(column):
        # convert any unicode data into ASCII characters
        column = unidecode(column)
        # ignore new lines
        column = re.sub('\n', ' ', column)
        # ignore special characters
        column = re.sub('-', '', column)
        column = re.sub('/', ' ', column)
        column = re.sub("'", '', column)
        column = re.sub(",", '', column)
        column = re.sub(":", ' ', column)
        # ignore extra white space
        column = re.sub('  +', ' ', column)
        # ignore casing
        column = column.strip().strip('"').strip("'").lower().strip()
        if not column :
            column = None
        return column
    
    
    if len(results) == 0:
        response_rl = "No matching address was found!"
    if len(results) == 1:
        response_rl = results[0]
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    user_proc = preProcess(user_entry)
    min_dist = 9999
    if 1 < len(results) < 4:
        for result in results:
            result_proc = preProcess(result)
            dist = minimum_edit_distance(result_proc, user_proc)[1]
            if (dist < min_dist):
                min_dist = dist
                response_rl = result
    print(response_rl)
    
    session['rl'] = response_rl
    session['user_addr'] = user_entry
    return render_template('rlUI.html', **locals())

@app.route('/SODAUI.html')
def SODAUI(): 
    
    # initialize responses
    response_soda = ("No matching address was found!")
    notFound = response_soda
    
    # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    # SoDA
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    
    # Establish a connection to the soda web client
    client = sodaclient.SodaClient("http://localhost:8080")
    df = pd.DataFrame.from_csv('companies_dict.tsv', sep='\t')
    
    # Edit Distance
    INSERTION_PENALTY = 1
    DELETION_PENALTY = 1
    # This substitution penalty differentiates from Levenshtein cost (would be 1)
    SUBSTITUTION_PENALTY = 2
    ALLOWED_LEVELS = ["word", "char"]
    LEVEL = "word"
    def compute_cost(D, i, j, token_X, token_Y):
        relative_subst_cost = 0 if token_X == token_Y else SUBSTITUTION_PENALTY
        return min(D[i-1, j] + INSERTION_PENALTY, D[i, j-1] + DELETION_PENALTY, D[i-1, j-1] + relative_subst_cost)
    def tokenize_string(string, level="word"):
        assert level in ALLOWED_LEVELS
        if level is "word":
            return string.split(" ")
        else:
            return list(string)
    def minimum_edit_distance(string1, string2, level="word"):
        """The function uses the dynamic programming approach from Wagner-Fischer to compute the minimum edit distance
        between two sequences.
        :param string1 first sequence
        :param string2 second sequence
        :param level defines on which granularity the algorithm will be applied. "word" specifies the token to
        be sequential words while "char" applies the algorithm on a character-by-character level"""
        # Call tokenize string on the two address strings that were passed to the method
        string1_tokens = tokenize_string(string1, level)
        string2_tokens = tokenize_string(string2, level)
        n = len(string1_tokens)
        m = len(string2_tokens)
        D = np.zeros((n, m))
        for i in range(n):
            for j in range(m):
                if j == 0:
                    D[i,j] = i
                elif i == 0:
                    D[i,j] = j
                else:
                    D[i,j] = compute_cost(D, i, j, string1_tokens[i], string2_tokens[j])
        return string2_tokens, D[n-1, m-1]

    
    name_rlook = (client.rlookup('companies_name', name, 's3sort'))
    addr_rlook = (client.rlookup('companies_addr', addr, 's3sort'))
    # Case 1
    if len(name_rlook['entries']) == 1:
        name_id = name_rlook['entries'][0]['id']
        raw_id = int(name_id.split('_')[1])
        response_soda = (df.iloc[raw_id - 1]['NAME'])
        dist = minimum_edit_distance(response_soda, user_entry)
        # if the edit distance is below the threshold, return the suggestion
        if (dist[1] > 25.0):
            response_soda = "No matching address was found!"
    # Case 2
    if response_soda == notFound:
        for name in name_rlook['entries']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_rlook['entries']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    # Case 3
    if response_soda == notFound:
        if len(addr_rlook['entries']) == 1:
            addr_id = addr_rlook['entries'][0]['id']
            raw_id = int(addr_id.split('_')[1])
            response_soda = (df.iloc[raw_id - 1]['NAME'])
            dist = minimum_edit_distance(response_soda, user_entry)
            # if the edit distance is below the threshold, return the suggestion
            if (dist[1] > 25.0):
                response_soda = "No matching address was found!"
    ## STEP 2
    # Case 1
    if response_soda == notFound:
        name_annot = client.annot('companies_name', user_entry, 'stem2')
        addr_annot = client.annot('companies_addr', user_entry, 'stem2')
        if len(name_annot['annotations']) == 1:
            raw_id = int(name_id.split('_')[1])
            name_id = name_annot['annotations'][0]['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            dict_id = "DICT_" + name_id.split('_')[1]
            #First, check if there is a matching address with the same ID
            for entry in addr_annot['annotations']:
                if entry['id'] == addr_id:
                    # print the full name and address to recommend to user
                    response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Next, check city
            if response_soda == notFound:
                city_id = "CITY_" + name_id.split('_')[1]
                city_annot = (client.annot('companies_city', user_entry, 'stem2'))
                for entry in city_annot['annotations']:
                    if entry['id'] == city_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Then check country
            if response_soda == notFound:
                ctry_id = "CTRY_" + name_id.split('_')[1]
                ctry_annot = (client.annot('companies_ctry', user_entry, 'stem2'))
                for entry in ctry_annot['annotations']:
                    if entry['id'] == ctry_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Finally, check postal code
            if response_soda == notFound:
                code_id = "CODE_" + name_id.split('_')[1]
                code_annot = (client.annot('companies_code', user_entry, 'stem2'))
                for entry in code_annot['annotations']:
                    if entry['id'] == code_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
                        
    # Case 2
    if response_soda == notFound:
        for name in name_annot['annotations']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_annot['annotations']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    
    # Case 3
    if response_soda == notFound:
        if len(addr_annot['annotations']) == 1:
            addr_id = addr_annot['annotations'][0]['id']
            raw_id = int(addr_id.split('_')[1])
            response_soda = (df.iloc[raw_id - 1]['NAME'])
            dist = minimum_edit_distance(response_soda, user_entry)
            # if the edit distance is below the threshold, return the suggestion
            if (dist[1] > 25.0):
                response_soda = "None found!"
    
    # Step 3
    # Case 1
    if response_soda == notFound:
        for name in name_annot['annotations']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_rlook['entries']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    session['SoDA'] = response_soda
    session['user_addr'] = user_entry
    return render_template('SODAUI.html', **locals())

@app.route('/pysolrUI.html', methods=['GET', 'POST'])
def pysolrUI():
    
    # initialize responses
    response_pysolr = ("No matching address was found!")
    
    if request.method == 'POST':
        name =  request.form['name']
        session['name'] = name
        addr = request.form['addr']
        session['addr'] = addr
        city = request.form['city']
        session['city'] = city
        ctry = request.form['ctry']
        session['ctry'] = ctry
        code = request.form['code']
        session['code'] = code
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    
    # Pysolr
    # Create a dictionary to index to solr
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    solr = pysolr.Solr
    conn = solr('http://localhost:8984/solr/new_core')
    # Clear what is currently in the index and add the dictionary of addresses
    conn.delete(q="*:*")
    conn.add(dict_list)
    # Query solr for the user input
    query_str = "name:" + "'" + name + "' " + "addr:" + "'" + addr + "' " + "city:" + "'" + city + "' " + "ctry:" + "'" + ctry + "' " + "code:" + "'" + code + "'"
    results = conn.search(query_str)
    i = 1
    for result in results:
        if i == 1:
            response_pysolr = (" ".join(result['name'] + result['addr'] + result['city'] + result['ctry'] + result['code']))
        i += 1
    session['pysolr'] = response_pysolr
    session['user_addr'] = user_entry
    return render_template('pysolrUI.html', **locals())

@app.route('/dedupeUI.html')
def dedupeUI():
    
    # initialize responses
    response_dedupe = ("No matching address was found!")
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    
    # Write the user input to a file
    user_input_file = 'user_input_file.csv'
    with open (user_input_file, 'w', newline='') as csvfile:
        fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code':code})
    csvfile.close()
    def preProcess(column):
        # convert any unicode data into ASCII characters
        column = unidecode(column)
        # ignore new lines
        column = re.sub('\n', ' ', column)
        # ignore special characters
        column = re.sub('-', '', column)
        column = re.sub('/', ' ', column)
        column = re.sub("'", '', column)
        column = re.sub(",", '', column)
        column = re.sub(":", ' ', column)
        # ignore extra white space
        column = re.sub('  +', ' ', column)
        # ignore casing
        column = column.strip().strip('"').strip("'").lower().strip()
        if not column :
            column = None
        return column
    def readData(filename):
        data_d = {}
        with open(filename) as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
                data_d[filename + str(i)] = dict(clean_row)
        return data_d
    data_entry = readData(user_input_file)
    data_1 = readData("companies_final.csv")
    with open('data_matching_learned_settings', 'rb') as sf :
        linker = dedupe.StaticRecordLink(sf)
    try:
        match = linker.match(data_1, data_entry)
        id = int(data_1[match[0][0][0]]['id'])
        with open('companies_final.csv', 'r') as my_file:
            reader = csv.reader(my_file)
            rows = list(reader)
            response_dedupe = ((rows[id][1] + " " + rows[id][2] + " " + rows[id][3] + " " + rows[id][4] + " " + rows[id][5]))
            my_file.close()
    except dedupe.core.BlockingError:
        response_dedupe = ("No matching address was found!")
    session['dedupe'] = response_dedupe
    session['user_addr'] = user_entry
    return render_template('dedupeUI.html', **locals())


# @app.route('/tmp/<filename>')
# def uploaded_file(filename):
#     return send_from_directory(app.config['UPLOAD_FOLDER'],
#                                filename)

@app.route('/upload.html', methods=['GET', 'POST'])
def upload(name=None):
#     fi = request.form['fileinput']
#     df = pd.read_csv(request.form['fileinput'])
#Use below when downloading data to filesystem
    if request.method == 'POST':
        # check if the post request has the file part
#         model = pickle.load(open("Entity_Resolution", 'rb'))
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            return redirect(url_for('uploaded_file',
                                    filename=filename))
        if 'file2' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file2 = request.files['file2']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file2.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file2 and allowed_file(file2.filename):
            filename = secure_filename(file2.filename)
            file2.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            return redirect(url_for('uploaded_file',
                                    filename=filename))
    return render_template('upload.html', name=name)

@app.route('/tables.html')
def tables(name=None):
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    return render_template('tables.html', dict_list=dict_list)

@app.route('/forgot-password.html')
def forgot(name=None):
    return render_template('forgot-password.html', name=name)

@app.route('/search.html')
def search():
    return render_template('search.html')

@app.route('/search.html', methods=['GET', 'POST'])
def search_submit():
    return render_template('search.html', **locals())


@app.route('/searchAll.html', methods=['GET', 'POST'])
def searchAll():
    
    # initialize responses
    response_pysolr = ("No matching address was found!")
    response_soda = ("No matching address was found!")
    notFound = response_soda
    response_dedupe = ("No matching address was found!")
    response_rl = ("No matching address was found!")
    
    if request.method == 'POST':
        name =  request.form['name']
        session['name'] = name
        addr = request.form['addr']
        session['addr'] = addr
        city = request.form['city']
        session['city'] = city
        ctry = request.form['ctry']
        session['ctry'] = ctry
        code = request.form['code']
        session['code'] = code
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = str(session.get('code', None))
    
    # Pysolr
    # Create a dictionary to index to solr
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    solr = pysolr.Solr
    conn = solr('http://localhost:8984/solr/new_core')
    # Clear what is currently in the index and add the dictionary of addresses
    conn.delete(q="*:*")
    conn.add(dict_list)
    # Query solr for the user input
    query_str = "name:" + "'" + name + "' " + "addr:" + "'" + addr + "' " + "city:" + "'" + city + "' " + "ctry:" + "'" + ctry + "' " + "code:" + "'" + code + "'"
    results = conn.search(query_str)
    i = 1
    for result in results:
        if i == 1:
            response_pysolr = (" ".join(result['name'] + result['addr'] + result['city'] + result['ctry'] + result['code']))
        i += 1
    
   # SoDA
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    # Establish a connection to the soda web client
    client = sodaclient.SodaClient("http://localhost:8080")
    df = pd.DataFrame.from_csv('companies_dict.tsv', sep='\t')
    
    # Edit Distance
    INSERTION_PENALTY = 1
    DELETION_PENALTY = 1
    # This substitution penalty differentiates from Levenshtein cost (would be 1)
    SUBSTITUTION_PENALTY = 2
    ALLOWED_LEVELS = ["word", "char"]
    LEVEL = "word"
    def compute_cost(D, i, j, token_X, token_Y):
        relative_subst_cost = 0 if token_X == token_Y else SUBSTITUTION_PENALTY
        return min(D[i-1, j] + INSERTION_PENALTY, D[i, j-1] + DELETION_PENALTY, D[i-1, j-1] + relative_subst_cost)
    def tokenize_string(string, level="word"):
        assert level in ALLOWED_LEVELS
        if level is "word":
            return string.split(" ")
        else:
            return list(string)
    def minimum_edit_distance(string1, string2, level="word"):
        """The function uses the dynamic programming approach from Wagner-Fischer to compute the minimum edit distance
        between two sequences.
        :param string1 first sequence
        :param string2 second sequence
        :param level defines on which granularity the algorithm will be applied. "word" specifies the token to
        be sequential words while "char" applies the algorithm on a character-by-character level"""
        # Call tokenize string on the two address strings that were passed to the method
        string1_tokens = tokenize_string(string1, level)
        string2_tokens = tokenize_string(string2, level)
        n = len(string1_tokens)
        m = len(string2_tokens)
        D = np.zeros((n, m))
        for i in range(n):
            for j in range(m):
                if j == 0:
                    D[i,j] = i
                elif i == 0:
                    D[i,j] = j
                else:
                    D[i,j] = compute_cost(D, i, j, string1_tokens[i], string2_tokens[j])
        return string2_tokens, D[n-1, m-1]

    
    name_rlook = (client.rlookup('companies_name', name, 's3sort'))
    addr_rlook = (client.rlookup('companies_addr', addr, 's3sort'))
    # Case 1
    if len(name_rlook['entries']) == 1:
        name_id = name_rlook['entries'][0]['id']
        raw_id = int(name_id.split('_')[1])
        response_soda = (df.iloc[raw_id - 1]['NAME'])
        dist = minimum_edit_distance(response_soda, user_entry)
        # if the edit distance is below the threshold, return the suggestion
        if (dist[1] > 25.0):
            response_soda = "No matching address was found!"
    # Case 2
    if response_soda == notFound:
        for name in name_rlook['entries']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_rlook['entries']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    # Case 3
    if response_soda == notFound:
        if len(addr_rlook['entries']) == 1:
            addr_id = addr_rlook['entries'][0]['id']
            raw_id = int(addr_id.split('_')[1])
            response_soda = (df.iloc[raw_id - 1]['NAME'])
            dist = minimum_edit_distance(response_soda, user_entry)
            # if the edit distance is below the threshold, return the suggestion
            if (dist[1] > 25.0):
                response_soda = "No matching address was found!"
    ## STEP 2
    # Case 1
    if response_soda == notFound:
        name_annot = client.annot('companies_name', user_entry, 'stem2')
        addr_annot = client.annot('companies_addr', user_entry, 'stem2')
        if len(name_annot['annotations']) == 1:
            raw_id = int(name_id.split('_')[1])
            name_id = name_annot['annotations'][0]['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            dict_id = "DICT_" + name_id.split('_')[1]
            #First, check if there is a matching address with the same ID
            for entry in addr_annot['annotations']:
                if entry['id'] == addr_id:
                    # print the full name and address to recommend to user
                    response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Next, check city
            if response_soda == notFound:
                city_id = "CITY_" + name_id.split('_')[1]
                city_annot = (client.annot('companies_city', user_entry, 'stem2'))
                for entry in city_annot['annotations']:
                    if entry['id'] == city_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Then check country
            if response_soda == notFound:
                ctry_id = "CTRY_" + name_id.split('_')[1]
                ctry_annot = (client.annot('companies_ctry', user_entry, 'stem2'))
                for entry in ctry_annot['annotations']:
                    if entry['id'] == ctry_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
            # Finally, check postal code
            if response_soda == notFound:
                code_id = "CODE_" + name_id.split('_')[1]
                code_annot = (client.annot('companies_code', user_entry, 'stem2'))
                for entry in code_annot['annotations']:
                    if entry['id'] == code_id:
                        # print the full name and address to recommend to user
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
                        
    # Case 2
    if response_soda == notFound:
        for name in name_annot['annotations']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_annot['annotations']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    
    # Case 3
    if response_soda == notFound:
        if len(addr_annot['annotations']) == 1:
            addr_id = addr_annot['annotations'][0]['id']
            raw_id = int(addr_id.split('_')[1])
            response_soda = (df.iloc[raw_id - 1]['NAME'])
            dist = minimum_edit_distance(response_soda, user_entry)
            # if the edit distance is below the threshold, return the suggestion
            if (dist[1] > 25.0):
                response_soda = "None found!"
    
    # Step 3
    # Case 1
    if response_soda == notFound:
        for name in name_annot['annotations']:
            highest_confidence = 0
            name_id = name['id']
            addr_id = "ADDR_" + name_id.split('_')[1]
            raw_id = int(name_id.split('_')[1])
            for addr in addr_rlook['entries']:
                if addr['id'] == addr_id:
                    conf = (name['confidence'] + addr['confidence'])
                    confidence = max(highest_confidence, conf)
                    if conf > highest_confidence:
                        response_soda = (df.iloc[raw_id - 1]['NAME'])
    
    # Record linkage
    
    
    # write the user entry to a csv
    with open ('user_input_file.csv', 'w', newline='') as csvfile:
        fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code':code})
    csvfile.close()
    
    dfA = pd.read_csv("companies_final.csv")
    dfB = pd.read_csv('user_input_file.csv')
    dfD = pd.read_csv("companies_dict.csv")
    dfA.drop('id', axis=1)
    dfB.drop('id', axis=1)
    
    
   #############   BLOCK INDEXING
    def runBlock():  
        block = 0
        both = 0

        #Next, check for exact match in name
        blockName = recordlinkage.BlockIndex(on=['name'])
        blockNamePairs = blockName.index(dfA, dfB)
        if len(blockNamePairs) > 0:
             block += 1

        #Next, check for exact match in addr    
        blockAddr = recordlinkage.BlockIndex(on=['addr'])
        blockAddrPairs = blockAddr.index(dfA, dfB)
        if len(blockAddrPairs) > 0:
            block += 2

        #Next, check for exact match in name AND addr    
        blockNA = recordlinkage.BlockIndex(on=['name','addr'])
        blockNAPairs = blockNA.index(dfA, dfB)
        if len(blockNAPairs) > 0:
            both = 1 
            print("they match")

        if both == 1:
        #grab the id and contents of addr match
            print("block name and addr match")
            return(returnResultsMI(blockNAPairs))
        else:

            if block == 0:
            #run sorted neighborhood both
                print("no exact matches")
                runSort()
            if block == 1:
            #grab the id and contents of name match
                print("block name match")
                if len(blockNamePairs) > 1:
                    runCompName(blockNamePairs)
                else:
                    return(returnResultsMI(blockNamePairs))        
            if block == 2:
                #grab the id and contents of addr match
                print("block addr match")
                if len(blockAddrPairs) > 1:
                    runCompAddr(blockAddrPairs)
                else:
                    return(returnResultsMI(blockAddrPairs))
            if block == 3:
                #matches both, inconclusive
                print("block different name and addr match")
                runSort()


    ###############
    
    #############   SORTED NEIGHBORHOOD INDEXING
    def runSort():
        sort = 0

        sortedNameIndexer = recordlinkage.SortedNeighbourhoodIndex(on='name')
        sortedNamePairs = sortedNameIndexer.index(dfA, dfB)
        if len(sortedNamePairs) > 0:
            sort += 1

        sortedAddrIndexer = recordlinkage.SortedNeighbourhoodIndex(on='addr')
        sortedAddrPairs = sortedAddrIndexer.index(dfA, dfB)
        if len(sortedAddrPairs) > 0:
            sort += 2

        if sort == 0:
            #run sorted neighborhood both
            print("full index NEXT")
            runFull()
            print("sort is "+ sort)
        if sort == 1:
            #compare with name pairs
            print("sort name match")
            runCompName(sortedNamePairs)
        if sort == 2:
            #compare with addr pairs
            print("sort addr match")
            runCompAddr(sortedAddrPairs)
        if sort == 3:
            #compare with both pairs?
            print("sort both match")
            runCompBoth(sortedNamePairs, sortedAddrPairs)

    def runCompBoth(name_pairs,addr_pairs):
        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for name
        featuresName = compare.compute(name_pairs, dfA, dfB)

        # The comparison vectors for addr
        featuresAddr = compare.compute(addr_pairs, dfA, dfB)

        ########## Classification

        featuresName.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesNameAll = featuresName[featuresName.sum(axis=1) > 4]
        if len(matchesNameAll) > 0:
            nameMatch = True 
            matchesName = matchesNameAll #overwriting the larger set of results
        else:
            matchesName = featuresName[featuresName.sum(axis=1) > 3]
            if len(matchesName) > 0:
                nameMatch = True
        print(len(matchesName))


        featuresAddr.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesAddrAll = featuresAddr[featuresAddr.sum(axis=1) > 4]
        if len(matchesAddrAll) > 0:
            addrMatch = True 
            matchesAddr = matchesAddrAll #overwriting the larger set of results
        else:
            matchesAddr = featuresAddr[featuresAddr.sum(axis=1) > 3]
            if len(matchesAddr) > 0:
                addrMatch = True
        print(len(matchesAddr))    

    ########## FSM

        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("cb Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cb sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("cb sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                print(len(matchesName) + len(matchesAddr))
                if len(matchesName) == 1:
                    return(returnResultsDF(matchesName))
                elif len(matchesAddr) == 1:
                    return(returnResultsDF(matchesAddr))
                    print("cb sort addr and sort name")
                else:
                    runFull()
                    
    ######### Specify to neighborhood & Compare
    def runCompName(name_pairs):

        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for name
        featuresName = compare.compute(name_pairs, dfA, dfB)

        ########## Classification

        featuresName.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesNameAll = featuresName[featuresName.sum(axis=1) > 4]
        if len(matchesNameAll) > 0:
            nameMatch = True 
            matchesName = matchesNameAll #overwriting the larger set of results
        else:
            matchesName = featuresName[featuresName.sum(axis=1) > 3]
            if len(matchesName) > 0:
                nameMatch = True
        print(len(matchesName))

    ########## FSM

        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("cn Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cn sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("cn sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                #grab the id and contents of addr match, 11 = preference towards addr
                print("cn sort addr and sort name")
                runFull()
    
    ######### Specify to neighborhood & Compare
    def runCompAddr(addr_pairs):

        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.95)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        nameMatch = False
        addrMatch = False
        # The comparison vectors for addr
        featuresAddr = compare.compute(addr_pairs, dfA, dfB)

        ########## Classification

        featuresAddr.sum(axis=1).value_counts().sort_index(ascending=False)

        matchesAddrAll = featuresAddr[featuresAddr.sum(axis=1) > 4]
        if len(matchesAddrAll) > 0:
            addrMatch = True 
            matchesAddr = matchesAddrAll #overwriting the larger set of results
        else:
            matchesAddr = featuresAddr[featuresAddr.sum(axis=1) > 3]
            if len(matchesAddr) > 0:
                addrMatch = True
        print(len(matchesAddr))    

    ########## FSM
        print("let's check " + str(nameMatch) + str(addrMatch))
        if (not nameMatch):
            if (not addrMatch):
                #run sorted neighborhood both
                print("ca Full Index NEXT")
                runFull()
            elif (addrMatch):
                #grab the id and contents of addr match, 11 = preference towards addr
                print("ca sort addr match")
                print(matchesAddr)
                if len(matchesAddr) > 2:
                    runFull()
                else:
                    return(returnResultsDF(matchesAddr))
        if (nameMatch):
            if(not addrMatch):
                #grab the id and contents of name match
                print("ca sort name match")
                if len(matchesName) > 2:
                    runFull()
                else:            
                    return(returnResultsDF(matchesName))
            elif(addrMatch): 
                #grab the id and contents of addr match, 11 = preference towards addr
                print("ca sort addr and sort name")
                runFull()
    
    ####### If nothing else finds matches, run FULL INDEX
    def runFull():
        compare = recordlinkage.Compare()

        compare.string('name', 'name', method='jarowinkler', threshold=0.90)
        compare.string('addr', 'addr', method='jarowinkler', threshold=0.95)
        compare.exact('city', 'city')
        compare.exact('ctry', 'ctry')
        compare.string('code', 'code', method='jarowinkler', threshold=0.90)

        fullIndexer = recordlinkage.FullIndex()
        fullIndexPairs = fullIndexer.index(dfA, dfB)

        featuresFull = compare.compute(fullIndexPairs, dfA, dfB)

        matchesFullAll = featuresFull[featuresFull.sum(axis=1) > 4]
        if len(matchesFullAll) > 0:
            print("full match all")
            return(returnResultsDF(matchesFullAll))
        else:
            matchesFull = featuresFull[featuresFull.sum(axis=1) > 3]
            if len(matchesFull) >0:
                print("full match")
                #print(type(matchesFull))
                print(matchesFull)
                return(returnResultsDF(matchesFull))
        #return the match/matches with highest sum. Maybe try >4 first then >3. for row in frame, 
        #grab id then return the full dict entry of the id

    ######## RETURN FROM MULTIINDEX

    def returnResultsMI(pairs):
        data = pairs.to_frame(index = False)[0]
        i = 0
        grab_ids = []
        print(data)
        while i < len(data):
            grab_ids.append(data[i])
            i+=1 
        for grab_id in grab_ids:
            result = dfD.loc[grab_id].to_string(header = False, index = False)
            results.append(result)
            print(result)
        print(grab_ids)
        return results

    def returnResultsDF(pairs):
        pairs2 = pairs.index
        print(type(pairs))
        print(pairs2)
        data = pairs2.to_frame(index = False)[0]
        i = 0
        grab_ids = []
        while i < len(data):
            grab_ids.append(data[i])
            i+=1   
        for grab_id in grab_ids:
            result = dfD.loc[grab_id].to_string(header = False, index = False)
            results.append(result)
        print(grab_ids)
        return results
    
    results = []
    blockIndexer = recordlinkage.BlockIndex(on=['name', 'addr', 'city', 'ctry', 'code'])
    blockIndexPairs = blockIndexer.index(dfA, dfB)
    if len(blockIndexPairs) > 0:
        #continue
        print(returnResults(blockIndexPairs))
    else:
        runBlock()
    
    INSERTION_PENALTY = 1
    DELETION_PENALTY = 1
    # This substitution penalty differentiates from Levenshtein cost (would be 1)
    SUBSTITUTION_PENALTY = 2
    ALLOWED_LEVELS = ["word", "char"]
    LEVEL = "word"
    
    def compute_cost(D, i, j, token_X, token_Y):
        relative_subst_cost = 0 if token_X == token_Y else SUBSTITUTION_PENALTY
        return min(D[i-1, j] + INSERTION_PENALTY, D[i, j-1] + DELETION_PENALTY, D[i-1, j-1] + relative_subst_cost)
    def tokenize_string(string, level="word"):
        assert level in ALLOWED_LEVELS
        if level is "word":
            return string.split(" ")
        else:
            return list(string)
    def minimum_edit_distance(string1, string2, level="word"):
        """The function uses the dynamic programming approach from Wagner-Fischer to compute the minimum edit distance
        between two sequences.
        :param string1 first sequence
        :param string2 second sequence
        :param level defines on which granularity the algorithm will be applied. "word" specifies the token to
        be sequential words while "char" applies the algorithm on a character-by-character level"""
        # Call tokenize string on the two address strings that were passed to the method
        string1_tokens = tokenize_string(string1, level)
        string2_tokens = tokenize_string(string2, level)
        n = len(string1_tokens)
        m = len(string2_tokens) 
        D = np.zeros((n, m))

        for i in range(n):
            for j in range(m):
                if j == 0:
                    D[i,j] = i
                elif i == 0:
                    D[i,j] = j
                else:
                    D[i,j] = compute_cost(D, i, j, string1_tokens[i], string2_tokens[j])

        return string2_tokens, D[n-1, m-1]
    def preProcess(column):
        # convert any unicode data into ASCII characters
        column = unidecode(column)
        # ignore new lines
        column = re.sub('\n', ' ', column)
        # ignore special characters
        column = re.sub('-', '', column)
        column = re.sub('/', ' ', column)
        column = re.sub("'", '', column)
        column = re.sub(",", '', column)
        column = re.sub(":", ' ', column)
        # ignore extra white space
        column = re.sub('  +', ' ', column)
        # ignore casing
        column = column.strip().strip('"').strip("'").lower().strip()
        if not column :
            column = None
        return column
    
    
    if len(results) == 0:
        response_rl = "No matching address was found!"
    if len(results) == 1:
        response_rl = results[0]
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    user_proc = preProcess(user_entry)
    min_dist = 9999
    if 1 < len(results) < 4:
        for result in results:
            result_proc = preProcess(result)
            dist = minimum_edit_distance(result_proc, user_proc)[1]
            if (dist < min_dist):
                min_dist = dist
                response_rl = result
    print(response_rl)
    
    session['rl'] = response_rl
    session['user_addr'] = user_entry
    # Dedupe
    # Write the user input to a file
    user_input_file = 'user_input_file.csv'
#     with open (user_input_file, 'w', newline='') as csvfile:
#         fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
#         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
#         writer.writeheader()
#         writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code':code})
#     csvfile.close()
#     def preProcess(column):
#         # convert any unicode data into ASCII characters
#         column = unidecode(column)
#         # ignore new lines
#         column = re.sub('\n', ' ', column)
#         # ignore special characters
#         column = re.sub('-', '', column)
#         column = re.sub('/', ' ', column)
#         column = re.sub("'", '', column)
#         column = re.sub(",", '', column)
#         column = re.sub(":", ' ', column)
#         # ignore extra white space
#         column = re.sub('  +', ' ', column)
#         # ignore casing
#         column = column.strip().strip('"').strip("'").lower().strip()
#         if not column :
#             column = None
#         return column
    def readData(filename):
        data_d = {}
        with open(filename) as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
                data_d[filename + str(i)] = dict(clean_row)
        return data_d
    data_entry = readData(user_input_file)
    data_1 = readData("companies_final.csv")
    with open('data_matching_learned_settings', 'rb') as sf :
        linker = dedupe.StaticRecordLink(sf)
    try:
        match = linker.match(data_1, data_entry)
        id = int(data_1[match[0][0][0]]['id'])
        with open('companies_final.csv', 'r') as my_file:
            reader = csv.reader(my_file)
            rows = list(reader)
            response_dedupe = ((rows[id][1] + " " + rows[id][2] + " " + rows[id][3] + " " + rows[id][4] + " " + rows[id][5]))
            my_file.close()
    except dedupe.core.BlockingError:
        response_dedupe = ("No matching address was found!")
    session['pysolr'] = response_pysolr
    session['SoDA'] = response_soda
    session['dedupe'] = response_dedupe
    session['user_addr'] = user_entry
    return render_template('searchAll.html', **locals())

In [None]:
if __name__ == "__main__":
    app.run()

INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 13:05:00] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 13:05:00] "GET /favicon.ico HTTP/1.1" 404 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:01:23] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:01:29] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:02:07] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.016 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.038 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LIMITED%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.005 seconds, with status

block name match
0    0
Name: 0, dtype: int64
1 MOBILE LIMITED 30 CITY ROAD LONDON EC1Y 2AB
[0]
1 MOBILE LIMITED 30 CITY ROAD LONDON EC1Y 2AB


INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:04:01] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:04:06] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:04:09] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:14:08] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:14:19] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/Jul/2018 14:14:20] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.008 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.036 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LIMITED%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 seconds,

block name match
0    0
Name: 0, dtype: int64
1 MOBILE LIMITED 30 CITY ROAD LONDON EC1Y 2AB
[0]
1 MOBILE LIMITED 30 CITY ROAD LONDON EC1Y 2AB


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/vendor/bootstrap/css/bootstrap.min.css HTTP/1.1" 304 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/vendor/font-awesome/css/font-awesome.min.css HTTP/1.1" 304 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/vendor/jquery/jquery.min.js HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/css/sb-admin.css HTTP/1.1" 304 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/vendor/bootstrap/js/bootstrap.bundle.min.js HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:00] "GET /static/vendor/jquery-easing/jquery.easing.min.js HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:09] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:10] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 09:46:10] "G

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


  return self.new_target(*args, **kwargs)
INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:13:25] "POST /searchAll.html HTTP/1.1" 200 -


No matching address was found!


INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.079 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:13:30] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.011 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.074 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27UK%27+code%3A%27%27&wt=json' (get) with body '' in 0.004 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records


No matching address was found!


INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:13:53] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.070 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.004 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:14:23] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.009 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.037 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 seconds, with status 200
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:14:56] "POST /pysolrUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:15:07] "GET /SODAUI.html HTTP/1.1" 200 -


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:15:08] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:15:09] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:15:13] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records


No matching address was found!


INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:15:36] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:16:15] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:16:22] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:16:24] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:19:51] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.033 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 s

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:20:01] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:20:15] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:22:36] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:22:37] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.036 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.004 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records


No matching address was found!


INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:23:49] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.034 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27UK%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records


No matching address was found!


INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:23:53] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.039 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LTD%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27UK%27+code%3A%27EC1Y+2AB%27&wt=json' (get) with body '' in 0.004 seconds, with status 200


no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records


full match
       0    1  2  3    4
0 0  1.0  0.0  1  1  1.0
<class 'pandas.core.frame.DataFrame'>
MultiIndex(levels=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 1

INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:24:02] "POST /searchAll.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.035 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%271+MOBILE+LIMITED+COMPANY%27+addr%3A%2730+CITY+RD%27+city%3A%27LONDON%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.003 seconds, with status 200
[2018-08-01 10:24:52,272] ERROR in app: Exception on /searchAll.html [POST]
Traceback (most recent call last):
  File "/anaconda/lib/python3.6/site-packages/flask/app.py", line 1982, in wsgi_app
    response = self.full_dispatch_request()
  File "/anaconda/lib/python3.6/site-packages/flask/app.py", l

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:37:49] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:37:52] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:38:00] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records


No matching address was found!


INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:38:20] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:39:00] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:39:06] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 10:39:07] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:07:26] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:07:33] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:07:46] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:08:02] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.011 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:08:33] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:08:34] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:08:43] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:09:09] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:09:38] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:09:45] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 11:09:50] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:37:25] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:51:43] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:52:00] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:52:16] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.012 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.035 seconds, with status 200
INFO:pysolr:Finished 'http://lo

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:52:49] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:52:50] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:52:51] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:53:09] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:53:48] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:53:55] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 12:53:59] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:13:51] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:14:01] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:14:16] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:14:28] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.011 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.037 seconds, with status 200
INFO:pysolr:Finished 'http://lo

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:15:04] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:15:05] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:15:12] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records


No matching address was found!


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:15:29] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:16:00] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:16:07] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:16:09] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 13:19:47] "GET /login.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:05:44] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:06:02] "GET /tables.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:06:18] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.011 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.033 seconds, with status 200
INFO:pysolr:Finished 'http://lo

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:06:51] "GET /rlUI.html HTTP/1.1" 200 -


No matching address was found!


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records
INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:06:52] "GET /dedupeUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:06:55] "GET /success.html?method=dedupe HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.011 seconds, with status

no exact matches
sort both match
0
0
let's check FalseFalse
cb Full Index NEXT


INFO:dedupe.api:((SimplePredicate: (sameFiveCharStartPredicate, name), SimplePredicate: (tokenFieldPredicate, ctry)), (SimplePredicate: (firstTokenPredicate, addr), SimplePredicate: (hundredIntegerPredicate, name)), (LevenshteinSearchPredicate: (4, name), SimplePredicate: (alphaNumericPredicate, addr)), (SimplePredicate: (commonThreeTokens, name), SimplePredicate: (tokenFieldPredicate, ctry)))
INFO:dedupe.api:0 records
INFO:dedupe.api:100 records
INFO:dedupe.api:200 records
INFO:dedupe.api:300 records
INFO:dedupe.api:400 records
INFO:dedupe.api:500 records
INFO:dedupe.api:600 records
INFO:dedupe.api:700 records
INFO:dedupe.api:800 records


No matching address was found!


INFO:dedupe.api:900 records
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:07:14] "POST /searchAll.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:07:50] "GET /failure.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:07:56] "GET /search.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Aug/2018 14:07:59] "GET /tables.html HTTP/1.1" 200 -
