# E9-2 My Web App

This notebook demonstrates the procedures needed for creating and deploying web application on Flask in Python.
The aplication runs a machine learning model, built in another application. Afgter mnew predictions have been made, it offers storing the new data into an SQLite database.

In [1]:
import pickle
import os
import re
import pandas as pd

from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import SGDClassifier

In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\Andreas Heick
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:

from nltk.corpus import stopwords
stop = stopwords.words('english')

In [4]:
# load the data
df = pd.read_csv('./movie_data_small.csv', encoding='utf-8')

In [5]:
df.shape


(101, 2)

In [6]:
df.head()

Unnamed: 0,review,sentiment
0,"In 1974, the teenager Martha Moxley (Maggie Gr...",1
1,OK... so... I really like Kris Kristofferson a...,0
2,"***SPOILER*** Do not read this, if you think a...",0
3,hi for all the people who have seen this wonde...,1
4,"I recently bought the DVD, forgetting just how...",0


### We have a model already created 

In [7]:
# This model is trained by Stochastic Gradient Descent classifier
# classifier = SGDClassifier(loss='log', random_state=1, max_iter=1)
# X_train = df['review'].values
# y_train = df['sentiment'].values

# X_train = vect.transform(X_train)
# classifier.fit(X_train, y_train)

## To Export the Model

### 1. Create directory on the local storage

In [8]:
# Create dir and subdir for pickled objects (export of the built model)
dest = os.path.join('model', 'pickles')
if not os.path.exists(dest):
    os.makedirs(dest)

In [9]:
open(os.path.join(dest, 'classifier.pkl'), 'r')

<_io.TextIOWrapper name='model\\pickles\\classifier.pkl' mode='r' encoding='cp1252'>

### 2. Store the model and other supplementary files
Here we have stored the model __classifier__ and the stop-words dictionnaire __stop__ in one file each.

In [10]:
# serialize the model
pickle.dump(stop, open(os.path.join(dest, 'stopwords.pkl'), 'wb'), protocol=4)
# pickle.dump(classifier, open(os.path.join(dest, 'classifier.pkl'), 'wb'), protocol=4)

## To Load the Model in Another Application

In [1]:
# load and reuse the pickles
from sklearn.feature_extraction.text import HashingVectorizer
import re
import os
import pickle

cur_dir = os.getcwd()

# Read the stored files from the directory
stop = pickle.load(open(os.path.join('model', 'pickles', 'stopwords.pkl'), 'rb'))
classifier = pickle.load(open(os.path.join('model', 'pickles', 'classifier.pkl'), 'rb'))



In [2]:
# Preprocess the text
def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower())
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    tokenized = [w for w in text.split() if w not in stop]
    return tokenized

# Converts it into word vector
vect = HashingVectorizer(decode_error='ignore',n_features=2**21, preprocessor=None, tokenizer=tokenizer)


In [3]:
# Reuse the restored model for new prediction
import numpy as np
label = {0:'negative', 1:'positive'}

# predict() returns predicted label
# predict_proba(X) returns probability

example1 = ['Nice movie']
X = vect.transform(example1)
print('Prediction 1: %s\nProbability 1: %.2f%%' %(label[classifier.predict(X)[0]], np.max(classifier.predict_proba(X))*100))

example2 = ['Terrible film']
X = vect.transform(example2)
print('Prediction 2: %s\nProbability 2: %.2f%%' %(label[classifier.predict(X)[0]], np.max(classifier.predict_proba(X))*100))

Prediction 1: positive
Probability 1: 58.55%
Prediction 2: negative
Probability 2: 93.44%


# Create Database

In [4]:
# check current directory
os.getcwd()

'C:\\Users\\Andreas Heick Laptop\\Documents\\VisualStudio Projects\\SOFT - 1 Semester\\Artificial Intelligence\\Week 14'

In [5]:
import sqlite3
import os

if os.path.exists('MyReviewDB.sqlite'):
    os.remove('MyReviewDB.sqlite')

In [6]:
# create connection
conn = sqlite3.connect('MyReviewDB.sqlite')

# create cursor
c = conn.cursor()

In [7]:
# execute commands to create and read some test data
c.execute('CREATE TABLE review_db (review TEXT, sentiment INTEGER, date TEXT)')

example1 = 'I love this movie'
c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example1, 1))

example2 = 'I hate this movie'
c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example2, 0))

<sqlite3.Cursor at 0x20c43858880>

In [8]:
conn.commit()
conn.close()

## Test DB

In [11]:
# Open DB
conn = sqlite3.connect('MyReviewDB.sqlite')
c = conn.cursor()

c.execute("SELECT * FROM review_db WHERE date BETWEEN '2018-01-01 10:10:10' AND DATETIME('now')")
results = c.fetchall()

conn.close()

In [12]:
print(results)

[('I love this movie', 1, '2020-04-01 12:34:14'), ('I hate this movie', 0, '2020-04-01 12:34:14')]


In [13]:
len(results)

2

# Develop Web App

In [14]:
%%writefile mywebapp.py
# define app that will be deployed on a server and save it in a file
# class ReviewForm(Form):
#    moviereview = TextAreaField('', [validators.DataRequired(), validators.length(min=15)])

# import class Flask
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
import sqlite3
import numpy as np

# load and reuse the pickles
from sklearn.feature_extraction.text import HashingVectorizer
import re
import os
import pickle

cur_dir = os.getcwd()
stop = pickle.load(open(os.path.join('model', 'pickles', 'stopwords.pkl'), 'rb'))

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower())
    text = re.sub('[\W]+', ' ', text.lower()) \
                   + ' '.join(emoticons).replace('-', '')
    tokenized = [w for w in text.split() if w not in stop]
    return tokenized

# converts document into word vector
vect = HashingVectorizer(decode_error='ignore',
                         n_features=2**21,
                         preprocessor=None,
                         tokenizer=tokenizer)
classifier = pickle.load(open(
                os.path.join('model', 
                'pickles', 
                'classifier.pkl'), 'rb'))

db = os.path.join(os.getcwd(), 'reviews.sqlite')

def classify(document):
    label = {0: 'negative', 1: 'positive'}
    X = vect.transform([document])
    y = classifier.predict(X)[0]
    proba = np.max(classifier.predict_proba(X))
    return label[y], proba

def train(document, y):
    X = vect.transform([document])
    classifier.partial_fit(X, [y])

def sqlite_entry(path, document, y):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    c.execute("INSERT INTO review_db (review, sentiment, date)"\
    " VALUES (?, ?, DATETIME('now'))", (document, y))
    conn.commit()
    conn.close()


# create an instance (our app)
app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def index():
    form = None
    if request.method == 'POST' and 'review' in request.form:
        form = request.form['review']
    return render_template('default.html', form=form)


@app.route('/results', methods=['POST'])
def results():
    form = request.form
    if request.method == 'POST':
        review = request.form['review']
        y, proba = classify(review)
        return render_template('results.html', content=review, prediction=y, probability=round(proba*100, 2))
    return render_template('results.html', name=name)

@app.route('/bye', methods=['POST'])
def feedback():
    feedback = request.form['feedback_button']
    review = request.form['review']
    prediction = request.form['prediction']

    inv_label = {'negative': 0, 'positive': 1}
    y = inv_label[prediction]
    if feedback == 'Incorrect':
        y = int(not(y))
    train(review, y)
    sqlite_entry(db, review, y)
    return render_template('bye.html')

if __name__ == '__main__':
    app.run(debug=True)

Writing mywebapp.py


In [16]:
#!pip3 install --upgrade pip
#!pip3 install --upgrade Flask
#!pip3 install numpy
!pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py): started
  Building wheel for sklearn (setup.py): finished with status 'done'
  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1320 sha256=cc723641cdf0191499209152211fa2f523bfa333f917196a36dbf4b2d6320c25
  Stored in directory: c:\users\andreas heick laptop\appdata\local\pip\cache\wheels\46\ef\c3\157e41f5ee1372d1be90b09f74f82b10e391eaacca8f22d33e
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0


In [None]:
!python mywebapp.py
# !FLASK_APP=mywebapp.py flask run

# Updating the movie review classifier

In [None]:
# Define a function to update the classifier with the data stored in the local SQLite database
import pickle
import sqlite3
import numpy as np

# import HashingVectorizer from local dir
# from vectorizer import vect

# converts document into word vector
vect = HashingVectorizer(decode_error='ignore',
                         n_features=2**21,
                         preprocessor=None,
                         tokenizer=tokenizer)

def update_model(db_path, model, batch_size=10000):

    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('SELECT * from review_db')
    
    results = c.fetchmany(batch_size)
    
    while results:
        data = np.array(results)
        X = data[:, 0]
        y = data[:, 1].astype(int)
    
        classes = np.array([0, 1])
        X_train = vect.transform(X)
        clf.partial_fit(X_train, y, classes=classes)
        results = c.fetchmany(batch_size)
    
    conn.close()
    return None

In [None]:
import os
cur_dir = os.getcwd()

clf = pickle.load(open(os.path.join(cur_dir, 'model/pickles', 'classifier.pkl'), 'rb'))
db = os.path.join(cur_dir, 'reviews.sqlite')

update_model(db_path=db, model=clf, batch_size=10000)

# update your classifier.pkl file
pickle.dump(clf, open(os.path.join(cur_dir, 'model/pickles', 'classifier.pkl'), 'wb') , protocol=4)