In [4]:
import os
import pickle
import re

from sklearn.feature_extraction.text import HashingVectorizer

stop = pickle.load(
    open(os.path.join("movieclassifier", "pkl_objects", "stopwords.pkl"), "rb")
)


def tokenizer(text):
    text = re.sub("<[^>]*>", "", text)
    emoticons = re.findall("(?::|;|=)(?:-)?(?:\)|\(|D|P)", text)
    text = re.sub("[\W]+", " ", text.lower()) + " ".join(emoticons).replace("-", "")
    tokenized = [w for w in text.split() if w not in stop]
    return tokenized


vect = HashingVectorizer(
    decode_error="ignore", n_features=2 ** 21, preprocessor=None, tokenizer=tokenizer
)

In [7]:
clf = pickle.load(
    open(os.path.join("movieclassifier", "pkl_objects", "classifier.pkl"), "rb")
)

In [12]:
import numpy as np

label = {0: "negative", 1: "positive"}

example = ["I love this movie"]

X = vect.transform(example)
print(
    "Prediction: {pred}\nProbability: {prob:.2f}%".format(
        pred=label[clf.predict(X)[0]], prob=np.max(clf.predict_proba(X)) * 100
    )
)

Prediction: positive
Probability: 88.25%


In [14]:
import os
import sqlite3

if os.path.exists("reviews.sqlite"):
    os.remove("reviews.sqlite")
conn = sqlite3.connect("reviews.sqlite")
c = conn.cursor()
c.execute("CREATE TABLE review_db(review TEXT, sentiment INTEGER, date TEXT)")

example1 = "I love this movie"
c.execute(
    "INSERT INTO review_db(review, sentiment, date) VALUES (?, ?, DATETIME('now'))",
    (example1, 1),
)

example2 = "I disliked this movie"
c.execute(
    "INSERT INTO review_db(review, sentiment, date) VALUES (?, ?, DATETIME('now'))",
    (example2, 0),
)
conn.commit()
conn.close()

In [22]:
conn = sqlite3.connect("reviews.sqlite")
c = conn.cursor()
c.execute(
    "SELECT * FROM review_db WHERE date BETWEEN '2017-01-01 00:00:00' AND DATETIME('now')"
)
results = c.fetchall()
conn.close()
print(results)

[('I love this movie', 1, '2022-01-31 09:13:36'), ('I disliked this movie', 0, '2022-01-31 09:13:36')]
