In [1]:
import pandas as pd 
import time
import traceback

from ast import literal_eval
import numpy as np
from datetime import datetime
from pymongo import MongoClient

import AllSample as ass
import Filter2 as fil2
import TopWords as top
import WordCount3 as woc3
import WordTF_IDF as tf_idf

from flask import Flask, render_template, request, jsonify

In [2]:
t0 = time.time()
client = MongoClient()
db = client.test_database
collection = db.test_collection

reviewsMongo = db.reviews_mini
#restaurantsMongo = db.restaurants_change
restaurantsMongoA = db.restaurants_change_a
restaurantsMongoB = db.restaurants_change_b
reviews_plainMongo = db.reviews_plain

allDataLem = pd.DataFrame(list(reviewsMongo.find({})))
t1 = time.time()
print("Database converted in: " + str((t1-t0)/60))
del allDataLem["_id"]
allDataLem = allDataLem.astype({'stars': np.int8})
t2 = time.time()
print("allDataLem done in: " + str((t2-t0)/60))

restaurantNamesA = pd.DataFrame(list(restaurantsMongoA.find({})))
del restaurantNamesA["_id"]
restaurantNamesB = pd.DataFrame(list(restaurantsMongoB.find({})))
del restaurantNamesB["_id"]
#Change the values here for testing purposes!
restaurantNamesA = restaurantNamesA.to_json(orient='records')
restaurantNamesB = restaurantNamesB.to_json(orient='records')

t3 = time.time()
print("Restaurants done in: " + str((t3-t2)/60))

reviewsPlain = pd.DataFrame(list(reviews_plainMongo.find({})))
del reviewsPlain["_id"]
reviewsPlain = reviewsPlain.astype({'stars': np.int8})
t4 = time.time()
print("Just reviews in: " + str((t4-t3)/60))

print("All done in: " + str((t4-t0)/60))

Database converted in: 1.2586257775624594
allDataLem done in: 1.3668813904126484
Restaurants done in: 0.005828666687011719
Just reviews in: 0.9225619832674662
All done in: 2.2952720403671263


In [3]:
lastDF = pd.DataFrame(index = ['A'], columns = ['new_name'])
app = Flask(__name__)

@app.route("/")
def index():
    return render_template("/index2.html")
    
@app.route("/AllSample")
def allSample():
    return render_template("AllSample2.html")

@app.route("/AllSample2")
def allSample2():
    return render_template("AllSample3.html")

@app.route("/restaurantsNames", methods=['POST'])
def restaurantsNames():
    return jsonify(restaurantNamesA)

@app.route("/restaurantsNames2", methods=['POST'])
def restaurantsNames2():
    return jsonify(restaurantNamesB)

@app.route("/topWords", methods=['GET', 'POST'])
def endpointTW():    
    if request.method == 'POST':
        t0 = time.time()
        try:
            currentDF = allDataLem.copy()
            newName = request.form['newName']
            rating = request.form['rating']
            dateLow = request.form['dateLow']
            dateUp = request.form['dateUp']
            filtered = fil2.main(currentDF, "", newName, rating, dateLow, dateUp)            
            if (filtered.empty):
                return "No data for those filters."        
            status = top.main(filtered["lemmas"])
        except:
            return "Python  eroor: " + traceback.format_exc().splitlines()[-1]
        return str(status)
    else:
        return "Send a proper POST request!"
    
@app.route("/tf_idf", methods=['GET', 'POST'])
def endpointTF_IDF():    
    if request.method == 'POST':
        try:
            currentDF = allDataLem.copy()
            newName = request.form['newName']
            rating = request.form['rating']
            dateLow = request.form['dateLow']
            dateUp = request.form['dateUp']
            granulation = request.form['granulation']
            if granulation == 'Year':
                currentDF = fil2.main(currentDF, "", newName, "", "", "")
                if (currentDF.empty):
                    return "No data for those filters."
                filtered0 = currentDF.copy()
                filtered1 = fil2.main(currentDF, "", "", "", dateLow, dateUp)            
                if (filtered1.empty):
                    return "No data for those time periods."         
                if (filtered1.equals(filtered0)):
                    return "Nothing to compare with."
                status = tf_idf.main(filtered0, filtered1, granulation)
            elif granulation == 'Month':
                currentDF = fil2.main(currentDF, "", newName, "", dateLow[0:5] + '01-01', dateUp[0:5] + '12-31')
                if (currentDF.empty):
                    return "No data for those filters."
                filtered0 = currentDF.copy()
                filtered1 = fil2.main(currentDF, "", "", "", dateLow, dateUp)            
                if (filtered1.empty):
                    return "No data for those time periods."            
                if (filtered1.equals(filtered0)):
                    return "Nothing to compare with."
                status = tf_idf.main(filtered0, filtered1, granulation)
            elif granulation == 'Rating':
                currentDF = fil2.main(currentDF, "", newName, "", dateLow, dateUp)
                if (currentDF.empty):
                    return "No data for those filters."
                filtered0 = currentDF.copy()
                filtered1 = fil2.main(currentDF, "", "", rating, "", "")            
                if (filtered1.empty):
                    return "No data for those ratings."         
                if (filtered1.equals(filtered0)):
                    return "Nothing to compare with."
                status = tf_idf.main(filtered0, filtered1, granulation)
            if granulation == 'YearPlus':
                currentDF = fil2.main(currentDF, "", newName, "", "", "")
                if (currentDF.empty):
                    return "No data for those filters."
                filtered0 = currentDF.copy()
                filtered1 = fil2.main(currentDF, "", "", rating, dateLow, dateUp)            
                if (filtered1.empty):
                    return "No data for those time periods."         
                if (filtered1.equals(filtered0)):
                    return "Nothing to compare with."
                status = tf_idf.main(filtered0, filtered1, granulation)
            elif granulation == 'MonthPlus':
                currentDF = fil2.main(currentDF, "", newName, "", dateLow[0:5] + '01-01', dateUp[0:5] + '12-31')
                if (currentDF.empty):
                    return "No data for those filters."
                filtered0 = currentDF.copy()
                filtered1 = fil2.main(currentDF, "", "", rating, dateLow, dateUp)            
                if (filtered1.empty):
                    return "No data for those time periods."            
                if (filtered1.equals(filtered0)):
                    return "Nothing to compare with."
                status = tf_idf.main(filtered0, filtered1, granulation)
        except:
            return "Python  eroor: " + traceback.format_exc().splitlines()[-1]
        return str(status)
    else:
        return "Send a proper POST request!"
    
@app.route("/yearView")
def endpointYV():    
    return  render_template("/SingleRestaurant2.html")

@app.route("/allRestReviews")
def endpointARR():    
    return  render_template("/SingleRestaurant3.html")

@app.route("/plainReviews", methods=['GET', 'POST'])
def endpointRP():    
    if request.method == 'POST':
        t0 = time.time()
        try:
            plainCDF = reviewsPlain.copy()
            newName = request.form['newName']
            rating = request.form['rating']
            dateLow = request.form['dateLow']
            dateUp = request.form['dateUp']
            searchWord = request.form['searchWord']
            filtered = fil2.main(plainCDF, "", newName, rating, dateLow, dateUp)            
            if (filtered.empty):
                return "No data for those filters." 
            if (searchWord != ""):
                filtered = filtered[filtered['text'].str.contains(searchWord)]
                if (filtered.empty):
                    return "Search word not found. The search is case sensitive!"
            status = list()
            if filtered.shape[0] >= 5:
                randomRow = np.random.choice(range(filtered.shape[0]), 5, replace=False)
                for i in randomRow:      
                    status.append(str(filtered["stars"].iloc[i]))
                    status.append(str(filtered["date"].iloc[i]))
                    status.append(str(filtered["text"].iloc[i]))
            else:
                for i in range(filtered.shape[0]):
                    status.append(str(filtered["stars"].iloc[i]))
                    status.append(str(filtered["date"].iloc[i]))
                    status.append(str(filtered["text"].iloc[i]))
        except:
            return "Python  error: " + traceback.format_exc().splitlines()[-1]
        return jsonify(status)
    else:
        return "Send a proper POST request!"

@app.route("/plainReviewsAll", methods=['GET', 'POST'])
def endpointRPA():    
    if request.method == 'POST':
        t0 = time.time()
        try:
            plainCDF = reviewsPlain.copy()
            newName = request.form['newName']
            rating = request.form['rating']
            dateLow = request.form['dateLow']
            dateUp = request.form['dateUp']
            tryCount = request.form['tryCount']
            filtered = fil2.main(plainCDF, "", newName, "", "", "")                
            if (filtered.empty):
                return "No data for those filters."  
            filtered.sort_values(by=['date'], ascending=False, inplace=True)  
            status = list()    
            minStart = 20 * (int(tryCount) - 1)
            maxEnd = 20 * int(tryCount) - 1
            filtered = filtered.iloc[minStart:maxEnd]
            if (filtered.empty):
                return "No more reviews to show. Press the button again to view latest reviews!" 
            for i in range(filtered.shape[0]):
                status.append(str(filtered["stars"].iloc[i]))
                status.append(str(filtered["date"].iloc[i]))
                status.append(str(filtered["text"].iloc[i]))
        except:
            return "Python  error: " + traceback.format_exc().splitlines()[-1]
        return jsonify(status)
    else:
        return "Send a proper POST request!"
    
@app.route("/filter", methods=['GET', 'POST'])
def endpointFilter():    
    if request.method == 'POST':
        try:
            global lastDF
            newName = request.form['newName']  
            dateLow = request.form['dateLow']
            dateUp = request.form['dateUp']
            granulation = request.form['granulation']            
            if str(lastDF["new_name"].iloc[0]) == newName:
                currentDF = lastDF.copy()
                filtered = fil2.main(currentDF, "", "", "", dateLow, dateUp)
                result = woc3.main(filtered, granulation)
                if (filtered.empty):
                    return "No data for those filters."
            else:
                currentDF = allDataLem.copy()
                filtered = fil2.main(currentDF, "", newName, "", dateLow, dateUp)
                result = woc3.main(filtered, granulation)
                if (filtered.empty):
                    return "No data for those filters."
                lastDF = filtered.copy()
        except:
            return "Python  eroor: " + traceback.format_exc().splitlines()[-1]
        return jsonify(filtered.to_json(orient='records'), result.to_json(orient='records'))
    else:
        return "Send a proper POST request!"

@app.route("/monthView")
def endpointMV():    
    return  render_template("/SingleYear.html")
    
if __name__ == "__main__":
    app.run(debug=True, use_reloader=False, threaded=True)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [17/Jan/2020 17:27:11] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:30] "[37mGET /AllSample HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:30] "[37mPOST /restaurantsNames HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:36] "[37mGET /AllSample2 HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:37] "[37mPOST /restaurantsNames2 HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:41] "[37mGET /AllSample HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:42] "[37mPOST /restaurantsNames HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:45] "[37mGET /yearView?%22Sidelines%20Tavern%20&%20Grill%22%20-%2085248 HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:27:46] "[37mPOST /filter HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:29:10] "[37mGET /yearView?%22Sidelines%20Tavern%20&%20Grill%22%20-%2085248 HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:29:10] "[37mGET /s

127.0.0.1 - - [17/Jan/2020 17:46:57] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:47:13] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:47:35] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:47:41] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:47:58] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:19] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:28] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:33] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:48] "[37mPOST /plainReviews HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:56] "[37mPOST /topWords HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:58] "[37mGET /monthView?%22Bacchanal%20Buffet%22%20-%2089109__split__2016 HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2020 17:48:58] "[37mPOST /filter 

In [34]:
restaurantsMongoA = db.restaurants_change_a
restaurantsMongoB = db.restaurants_change_b

restaurantsMongoA.insert_many(restaurantNames[0].to_dict('records'))
restaurantsMongoB.insert_many(restaurantNames[1].to_dict('records'))

<pymongo.results.InsertManyResult at 0x22ed2524308>

In [17]:
reviewsPlainProba = reviewsPlain.copy()
reviewsPlainProba

Unnamed: 0,stars,date,text,new_name
0,5,2017-08-02,Dont miss the peppercorn steak . The peppercor...,"""Mon Ami Gabi"" - 89109"
1,3,2013-03-16,This is a great place to come during the summe...,"""Mon Ami Gabi"" - 89109"
2,4,2013-01-04,Mon Ami Gabi is practically a landmark. In fac...,"""Mon Ami Gabi"" - 89109"
3,4,2010-07-02,Having done Vegas (at least once) every year f...,"""Mon Ami Gabi"" - 89109"
4,5,2017-08-04,"To start with, I'm French and I'm a Vegas loca...","""Mon Ami Gabi"" - 89109"
...,...,...,...,...
3346019,1,2017-08-18,Bar is absolute trash. Came in spend over 150 ...,"""Timbers Rancho"" - 89130"
3346020,4,2017-10-13,Nice place! I am a non-smoker and although a f...,"""Timbers Rancho"" - 89130"
3346021,5,2009-05-22,Are you kidding me?? 1 star? \r\r\r\n\r\r\r\nO...,"""Budweiser Racing Track Bar & Grill"" - 89119"
3346022,2,2013-05-14,$14.35 for a beer and a coke. \r\r\r\n\r\r\r\n...,"""Budweiser Racing Track Bar & Grill"" - 89119"


In [21]:
stringa1 = """"McDonald'"""
stringa2 = """s"""
stringa3 = stringa1 + stringa2
print(stringa3)
zaProbi = fil2.main(reviewsPlainProba, "", stringa3 + '" - M4C 1H9', "", '2016-08-01', '2016-08-31')
zaProbi

"McDonald's


Unnamed: 0,stars,date,text,new_name
3339556,1,2016-08-31,Wrote them up on their horrible customer servi...,"""McDonald's"" - M4C 1H9"


In [54]:
t0 = time.time()
reviewsPlainProba["text"].str.contains("good")

indexNames = reviewsPlainProba[reviewsPlainProba["text"].str.contains("Having", regex=False)==False].index
reviewsPlainProba.drop(indexNames , inplace=True)  

t1 = time.time()
print("Clocks at: " + str(t1-t0))
reviewsPlainProba

Clocks at: 7.203769207000732


Unnamed: 0,stars,date,text,new_name
3,4,2010-07-02,Having done Vegas (at least once) every year f...,"""Mon Ami Gabi"" - 89109"
49,4,2010-12-29,"What is French for ""OMG I would totally go bac...","""Mon Ami Gabi"" - 89109"
208,5,2015-06-15,"We enjoyed dinner here prior to seeing O, and ...","""Mon Ami Gabi"" - 89109"
248,5,2016-09-05,Having a great time hanging out at the patio b...,"""Mon Ami Gabi"" - 89109"
393,3,2014-02-28,"Having Mon Ami Gabi bookmarked for awhile, I f...","""Mon Ami Gabi"" - 89109"
...,...,...,...,...
3344906,2,2010-07-03,Eek me thinks not is right! \r\r\r\n\r\r\r\n...,"""La Belle Province"" - H1W 2G2"
3345002,4,2011-06-16,I had dinner with two friends here the other n...,"""DiMartino's Italian Restaurant"" - 89123"
3345070,1,2015-12-29,Having been a Starbucks Barista myself that wo...,"""Starbucks"" - M8Y 0B6"
3345185,4,2014-08-19,"A newcomer to the Spring Mountain scene, I'm g...","""A & K Chinese Restaurant"" - 89146"


In [5]:
t0 = time.time()
reviewsPlainProba = reviewsPlainProba[reviewsPlainProba['text'].str.contains("Having")]
t1 = time.time()
print("Clocks at: " + str(t1-t0))
reviewsPlainProba

NameError: name 'reviewsPlainProba' is not defined