In [None]:
from flask import Flask, request, redirect, url_for, render_template, send_from_directory, Request, jsonify, session
import os
import pandas as pd
from werkzeug.utils import secure_filename
import dedupe
import pickle
import csv
import re
import numpy as np
import pysolr
import sodaclient
from unidecode import unidecode

UPLOAD_FOLDER = '/tmp/'
ALLOWED_EXTENSIONS = set(['csv'])

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.secret_key = 'super secret key'
app.config['SESSION_TYPE'] = 'filesystem'

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/')
@app.route('/login.html')
def login(name=None):
    return render_template('login.html', name=name)

@app.route('/index.html')
def index(name=None):
    return render_template('index.html', name=name)

@app.route('/charts.html')
def charts(name=None):
    return render_template('charts.html', name=name)

@app.route('/register.html')
def register(name=None):
    return render_template('register.html', name=name)

@app.route('/success.html')
def success():
    method = request.args.get('method', None)
    response = session.get(method, None)
    user_addr = session.get('user_addr', None)
    return render_template('success.html', **locals())

@app.route('/failure.html')
def failure():
    return render_template('failure.html')

@app.route('/SODAUI.html')
def SODAUI(): 
    
    # initialize responses
    response_soda = ("No matching address was found!")
    
    # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    # SODA
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    # Establish a connection to the soda web client
    client = sodaclient.SodaClient("http://localhost:8080/")
    resp_name = client.annot('companies_name', user_entry, 'stem1')
    resp_addr = client.annot('companies_addr', user_entry, 'stem1')
    # case 1- there is only one match in the name dictionary
    if len(resp_name['annotations']) == 1:
        name_id = resp_name['annotations'][0]['id']
        addr_id = "ADDR_" + resp_name['annotations'][0]['id'].split('_')[1]
        #First, check if there is a matching address with the same ID
        for entry in resp_addr['annotations']:
            if entry['id'] == addr_id:
                # print the full name and address to recommend to user
                response_soda = (resp_name['annotations'][0]['coveredText'] + " " + entry['coveredText'])
    session['SoDA'] = response_soda
    session['user_addr'] = user_entry
    return render_template('SODAUI.html', **locals())

@app.route('/pysolrUI.html', methods=['GET', 'POST'])
def pysolrUI():
    
    # initialize responses
    response_pysolr = ("No matching address was found!")
    
    if request.method == 'POST':
        name =  request.form['name']
        session['name'] = name
        addr = request.form['addr']
        session['addr'] = addr
        city = request.form['city']
        session['city'] = city
        ctry = request.form['ctry']
        session['ctry'] = ctry
        code = request.form['code']
        session['code'] = code
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    
    # Pysolr
    # Create a dictionary to index to solr
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    solr = pysolr.Solr
    conn = solr('http://localhost:8984/solr/new_core')
    # Clear what is currently in the index and add the dictionary of addresses
    conn.delete(q="*:*")
    conn.add(dict_list)
    # Query solr for the user input
    query_str = "name:" + "'" + name + "' " + "addr:" + "'" + addr + "' " + "city:" + "'" + city + "' " + "ctry:" + "'" + ctry + "' " + "code:" + "'" + code + "'"
    results = conn.search(query_str)
    i = 1
    for result in results:
        if i == 1:
            response_pysolr = (" ".join(result['name'] + result['addr'] + result['city'] + result['ctry'] + result['code']))
        i += 1
    session['pysolr'] = response_pysolr
    session['user_addr'] = user_entry
    return render_template('pysolrUI.html', **locals())

@app.route('/dedupeUI.html')
def dedupeUI():
    
    # initialize responses
    response_dedupe = ("No matching address was found!")
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    
    # Write the user input to a file
    user_input_file = 'user_input_file.csv'
    with open (user_input_file, 'w', newline='') as csvfile:
        fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code':code})
    csvfile.close()
    def preProcess(column):
        # convert any unicode data into ASCII characters
        column = unidecode(column)
        # ignore new lines
        column = re.sub('\n', ' ', column)
        # ignore special characters
        column = re.sub('-', '', column)
        column = re.sub('/', ' ', column)
        column = re.sub("'", '', column)
        column = re.sub(",", '', column)
        column = re.sub(":", ' ', column)
        # ignore extra white space
        column = re.sub('  +', ' ', column)
        # ignore casing
        column = column.strip().strip('"').strip("'").lower().strip()
        if not column :
            column = None
        return column
    def readData(filename):
        data_d = {}
        with open(filename) as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
                data_d[filename + str(i)] = dict(clean_row)
        return data_d
    data_entry = readData(user_input_file)
    data_1 = readData("companies_final.csv")
    with open('data_matching_learned_settings', 'rb') as sf :
        linker = dedupe.StaticRecordLink(sf)
    try:
        match = linker.match(data_1, data_entry)
        id = int(data_1[match[0][0][0]]['id'])
        with open('companies_final.csv', 'r') as my_file:
            reader = csv.reader(my_file)
            rows = list(reader)
            response_dedupe = ((rows[id][1] + " " + rows[id][2] + " " + rows[id][3] + " " + rows[id][4] + " " + rows[id][5]))
            my_file.close()
    except dedupe.core.BlockingError:
        response_dedupe = ("No matching address was found!")
    session['dedupe'] = response_dedupe
    session['user_addr'] = user_entry
    return render_template('dedupeUI.html', **locals())


@app.route('/tmp/<filename>')
def uploaded_file(filename):
    return send_from_directory(app.config['UPLOAD_FOLDER'],
                               filename)

@app.route('/upload.html', methods=['GET', 'POST'])
def upload(name=None):
#     fi = request.form['fileinput']
#     df = pd.read_csv(request.form['fileinput'])
#Use below when downloading data to filesystem
    if request.method == 'POST':
        # check if the post request has the file part
#         model = pickle.load(open("Entity_Resolution", 'rb'))
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            return redirect(url_for('uploaded_file',
                                    filename=filename))
        if 'file2' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file2 = request.files['file2']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file2.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file2 and allowed_file(file2.filename):
            filename = secure_filename(file2.filename)
            file2.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            return redirect(url_for('uploaded_file',
                                    filename=filename))
    return render_template('upload.html', name=name)

@app.route('/tables.html')
def tables(name=None):
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    return render_template('tables.html', dict_list=dict_list)

@app.route('/forgot-password.html')
def forgot(name=None):
    return render_template('forgot-password.html', name=name)

@app.route('/navbar.html')
def navbar(name=None):
    return render_template('navbar.html', name=name)

@app.route('/cards.html')
def cards(name=None):
    return render_template('cards.html', name=name)

@app.route('/search.html')
def search():
    return render_template('search.html')

@app.route('/search.html', methods=['GET', 'POST'])
def search_submit():
    return render_template('search.html', **locals())


@app.route('/searchAll.html', methods=['GET', 'POST'])
def searchAll():
    
    # initialize responses
    response_pysolr = ("No matching address was found!")
    response_soda = ("No matching address was found!")
    response_dedupe = ("No matching address was found!")
    
    if request.method == 'POST':
        name =  request.form['name']
        session['name'] = name
        addr = request.form['addr']
        session['addr'] = addr
        city = request.form['city']
        session['city'] = city
        ctry = request.form['ctry']
        session['ctry'] = ctry
        code = request.form['code']
        session['code'] = code
    
     # Get the user-entered address
    name = session.get('name', None)
    addr = session.get('addr', None)
    city = session.get('city', None)
    ctry = session.get('ctry', None)
    code = session.get('code', None)
    
    # Pysolr
    # Create a dictionary to index to solr
    dict_list = []
    fields = ['id', 'name', 'addr', 'city', 'ctry', 'code']
    with open('companies_final.csv', mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)
        for rows in reader:
            dictionary = {}
            for i in range(6):
                dictionary[fields[i]] = rows[i]
            dict_list.append(dictionary)
    solr = pysolr.Solr
    conn = solr('http://localhost:8984/solr/new_core')
    # Clear what is currently in the index and add the dictionary of addresses
    conn.delete(q="*:*")
    conn.add(dict_list)
    # Query solr for the user input
    query_str = "name:" + "'" + name + "' " + "addr:" + "'" + addr + "' " + "city:" + "'" + city + "' " + "ctry:" + "'" + ctry + "' " + "code:" + "'" + code + "'"
    results = conn.search(query_str)
    i = 1
    for result in results:
        if i == 1:
            response_pysolr = (" ".join(result['name'] + result['addr'] + result['city'] + result['ctry'] + result['code']))
        i += 1
    
#   SODA
    user_entry = name + " " + addr + " " + city + " " + ctry + " " + code 
    # Establish a connection to the soda web client
    client = sodaclient.SodaClient("http://localhost:8080")
    resp_name = client.annot('companies_name', user_entry, 'stem1')
    resp_addr = client.annot('companies_addr', user_entry, 'stem1')
    # case 1- there is only one match in the name dictionary
    if len(resp_name['annotations']) == 1:
        name_id = resp_name['annotations'][0]['id']
        addr_id = "ADDR_" + resp_name['annotations'][0]['id'].split('_')[1]
        #First, check if there is a matching address with the same ID
        for entry in resp_addr['annotations']:
            if entry['id'] == addr_id:
                # print the full name and address to recommend to user
                response_soda = (resp_name['annotations'][0]['coveredText'] + " " + entry['coveredText'])
    
    # Dedupe
    # Write the user input to a file
    user_input_file = 'user_input_file.csv'
    with open (user_input_file, 'w', newline='') as csvfile:
        fieldnames = ['id', 'name', 'addr', 'city', 'ctry', 'code']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow({'id' : "1", 'name': name, 'addr': addr, 'city':city, 'ctry':ctry, 'code':code})
    csvfile.close()
    def preProcess(column):
        # convert any unicode data into ASCII characters
        column = unidecode(column)
        # ignore new lines
        column = re.sub('\n', ' ', column)
        # ignore special characters
        column = re.sub('-', '', column)
        column = re.sub('/', ' ', column)
        column = re.sub("'", '', column)
        column = re.sub(",", '', column)
        column = re.sub(":", ' ', column)
        # ignore extra white space
        column = re.sub('  +', ' ', column)
        # ignore casing
        column = column.strip().strip('"').strip("'").lower().strip()
        if not column :
            column = None
        return column
    def readData(filename):
        data_d = {}
        with open(filename) as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
                data_d[filename + str(i)] = dict(clean_row)
        return data_d
    data_entry = readData(user_input_file)
    data_1 = readData("companies_final.csv")
    with open('data_matching_learned_settings', 'rb') as sf :
        linker = dedupe.StaticRecordLink(sf)
    try:
        match = linker.match(data_1, data_entry)
        id = int(data_1[match[0][0][0]]['id'])
        with open('companies_final.csv', 'r') as my_file:
            reader = csv.reader(my_file)
            rows = list(reader)
            response_dedupe = ((rows[id][1] + " " + rows[id][2] + " " + rows[id][3] + " " + rows[id][4] + " " + rows[id][5]))
            my_file.close()
    except dedupe.core.BlockingError:
        response_dedupe = ("No matching address was found!")
    session['pysolr'] = response_pysolr
    session['SoDA'] = response_soda
    session['dedupe'] = response_dedupe
    session['user_addr'] = user_entry
    return render_template('searchAll.html', **locals())

In [None]:
if __name__ == "__main__":
    app.run()

INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
INFO:werkzeug:127.0.0.1 - - [24/Jul/2018 14:49:36] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jul/2018 14:49:37] "GET /index.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jul/2018 14:49:38] "GET /search.html HTTP/1.1" 200 -
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<delete><q' in 0.010 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/update/?commit=true' (post) with body '<add><doc>' in 0.034 seconds, with status 200
INFO:pysolr:Finished 'http://localhost:8984/solr/new_core/select/?q=name%3A%27%27+addr%3A%27%27+city%3A%27%27+ctry%3A%27%27+code%3A%27%27&wt=json' (get) with body '' in 0.002 seconds, with status 200
INFO:werkzeug:127.0.0.1 - - [24/Jul/2018 14:49:40] "POST /pysolrUI.html HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jul/2018 14:49:42] "GET /success.html?method=pysolr HTTP/1.1" 200 -
