# My Web App

In [None]:
import pickle
import os
import re
import pandas as pd

from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import SGDClassifier

from nltk.corpus import stopwords
stop = stopwords.words('english')

In [None]:
# load the data
df = pd.read_csv('movie_data.csv', encoding='utf-8')

In [None]:
# train model by Stochastic Gradient Descent classifier
classifier = SGDClassifier(loss='log', random_state=1, max_iter=1)
X_train = df['review'].values
y_train = df['sentiment'].values

X_train = vect.transform(X_train)
classifier.fit(X_train, y_train)

## Export the Model

In [None]:
# create dir and subdir for pickled objects (export of the built model)
dest = os.path.join('model', 'pickles')
if not os.path.exists(dest):
    os.makedirs(dest)

In [None]:
# serialize the model
pickle.dump(stop, open(os.path.join(dest, 'stopwords.pkl'), 'wb'), protocol=4)
pickle.dump(classifier, open(os.path.join(dest, 'classifier.pkl'), 'wb'), protocol=4)

## Load the Model

In [None]:
# load and reuse the pickles
from sklearn.feature_extraction.text import HashingVectorizer
import re
import os
import pickle

cur_dir = os.getcwd()
stop = pickle.load(open(
                os.path.join('model', 
                'pickles', 
                'stopwords.pkl'), 'rb'))

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)',
                           text.lower())
    text = re.sub('[\W]+', ' ', text.lower()) \
                   + ' '.join(emoticons).replace('-', '')
    tokenized = [w for w in text.split() if w not in stop]
    return tokenized

# converts document into word vector
vect = HashingVectorizer(decode_error='ignore',
                         n_features=2**21,
                         preprocessor=None,
                         tokenizer=tokenizer)
classifier = pickle.load(open(
                os.path.join('model', 
                'pickles', 
                'classifier.pkl'), 'rb'))

In [None]:
# reuse the model for prediction
import numpy as np
label = {0:'negative', 1:'positive'}

# predict() returns predicted label
# predict_proba(X) returns probability

example1 = ['Nice movie']
X = vect.transform(example1)
print('Prediction 1: %s\nProbability 1: %.2f%%' %(label[classifier.predict(X)[0]], np.max(classifier.predict_proba(X))*100))

example2 = ['Terrible film']
X = vect.transform(example2)
print('Prediction 2: %s\nProbability 2: %.2f%%' %(label[classifier.predict(X)[0]], np.max(classifier.predict_proba(X))*100))

# Create Database

In [None]:
# check current directory
os.getcwd()

In [None]:
import sqlite3
import os

if os.path.exists('MyReviewDB.sqlite'):
    os.remove('MyReviewDB.sqlite')

# create connection
conn = sqlite3.connect('MyReviewDB.sqlite')

# create cursor
c = conn.cursor()

# execute commands
c.execute('CREATE TABLE review_db (review TEXT, sentiment INTEGER, date TEXT)')

example1 = 'I love this movie'
c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example1, 1))

example2 = 'I hate this movie'
c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example2, 0))

conn.commit()
conn.close()

## Test DB

In [None]:
# Open DB
conn = sqlite3.connect('reviews.sqlite')
c = conn.cursor()

c.execute("SELECT * FROM review_db WHERE date BETWEEN '2018-01-01 10:10:10' AND DATETIME('now')")
results = c.fetchall()

conn.close()

In [None]:
print(results)

In [None]:
len(results)

# Develop Web App

In [None]:
%%writefile mywebapp.py
# define app that will be deployed on a server and save it in a file
# class ReviewForm(Form):
#    moviereview = TextAreaField('', [validators.DataRequired(), validators.length(min=15)])

# import class Flask
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
import pickle
import sqlite3
import os
import numpy as np

# load and reuse the pickles
from sklearn.feature_extraction.text import HashingVectorizer
import re
import os
import pickle

cur_dir = os.getcwd()
stop = pickle.load(open(
                os.path.join('model', 
                'pickles', 
                'stopwords.pkl'), 'rb'))

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower())
    text = re.sub('[\W]+', ' ', text.lower()) \
                   + ' '.join(emoticons).replace('-', '')
    tokenized = [w for w in text.split() if w not in stop]
    return tokenized

# converts document into word vector
vect = HashingVectorizer(decode_error='ignore',
                         n_features=2**21,
                         preprocessor=None,
                         tokenizer=tokenizer)
classifier = pickle.load(open(
                os.path.join('model', 
                'pickles', 
                'classifier.pkl'), 'rb'))

db = os.path.join(os.getcwd(), 'reviews.sqlite')

def classify(document):
    label = {0: 'negative', 1: 'positive'}
    X = vect.transform([document])
    y = classifier.predict(X)[0]
    proba = np.max(classifier.predict_proba(X))
    return label[y], proba

def train(document, y):
    X = vect.transform([document])
    classifier.partial_fit(X, [y])

def sqlite_entry(path, document, y):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    c.execute("INSERT INTO review_db (review, sentiment, date)"\
    " VALUES (?, ?, DATETIME('now'))", (document, y))
    conn.commit()
    conn.close()


# create an instance (our app)
app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def index():
    form = None
    if request.method == 'POST' and 'review' in request.form:
        form = request.form['review']
    return render_template('default.html', form=form)


@app.route('/results', methods=['POST'])
def results():
    form = request.form
    if request.method == 'POST':
        review = request.form['review']
        y, proba = classify(review)
        return render_template('results.html',
                                content=review,
                                prediction=y,
                                probability=round(proba*100, 2))
    return render_template('results.html', name=name)

@app.route('/bye', methods=['POST'])
def feedback():
    feedback = request.form['feedback_button']
    review = request.form['review']
    prediction = request.form['prediction']

    inv_label = {'negative': 0, 'positive': 1}
    y = inv_label[prediction]
    if feedback == 'Incorrect':
        y = int(not(y))
    train(review, y)
    sqlite_entry(db, review, y)
    return render_template('bye.html')

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
!python mywebapp.py

# Updating the movie review classifier

In [None]:
# Define a function to update the classifier with the data stored in the local SQLite database
import pickle
import sqlite3
import numpy as np

# import HashingVectorizer from local dir
# from vectorizer import vect

# converts document into word vector
vect = HashingVectorizer(decode_error='ignore',
                         n_features=2**21,
                         preprocessor=None,
                         tokenizer=tokenizer)

def update_model(db_path, model, batch_size=10000):

    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('SELECT * from review_db')
    
    results = c.fetchmany(batch_size)
    while results:
        data = np.array(results)
        X = data[:, 0]
        y = data[:, 1].astype(int)
    
        classes = np.array([0, 1])
        X_train = vect.transform(X)
        clf.partial_fit(X_train, y, classes=classes)
        results = c.fetchmany(batch_size)
    
    conn.close()
    return None

In [None]:
import os
cur_dir = os.getcwd()

clf = pickle.load(open(os.path.join(cur_dir, 'model/pickles', 'classifier.pkl'), 'rb'))
db = os.path.join(cur_dir, 'reviews.sqlite')

update_model(db_path=db, model=clf, batch_size=10000)

# update your classifier.pkl file
pickle.dump(clf, open(os.path.join(cur_dir, 'model/pickles', 'classifier.pkl'), 'wb') , protocol=4)