In [None]:
# Import the necessary libraries
import nltk
from nltk.corpus import movie_reviews, wordnet, stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

import re
import random
import numpy as np

In [None]:
movie_reviews.categories()

In [None]:
movie_reviews.fileids('pos')

In [None]:
movie_reviews.fileids('pos')[0]

In [None]:
movie_reviews.words(movie_reviews.fileids('pos')[0])

In [None]:
# Load the movie reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

In [None]:
documents[0][1], documents[0][0]

In [None]:
documents[-1][1], documents[-1][0]

In [None]:
len(documents), type(documents)

In [None]:
documents = documents[:100] + documents[-100:]

In [None]:
# Shuffle the documents
random.shuffle(documents)

In [None]:
len(documents)

In [None]:
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

def clean_review(review):
    """
    Receives a raw review and clean it using the following steps:
    1. Remove all non-words
    2. Transform the review in lower case
    3. Remove specific stop words
    4. Perform stemming/lemma

    Args:
        review: the review that iwill be cleaned
    Returns:
        a clean review using the mentioned steps above.
    """
    
    review = re.sub("[^A-Za-z]", " ", review)
    review = review.lower()
    review = word_tokenize(review)
#     review = [stemmer.stem(word) for word in review if word not in set(stopwords.words("english"))]
#     review = [lemmatizer.lemmatize(word) for word in review if word not in set(stopwords.words("english"))]
    review = " ".join(review)
    return review

In [None]:
t = " ".join(documents[0][0])
t

In [None]:
clean_review(t)

In [None]:
rev_text,labels = [], []
for doc, label in documents:
    text = " ".join(doc)
    rev_text.append(clean_review(text))
    labels.append(label)

In [None]:
# !pip install spacy

In [None]:
# !python -m spacy download en_core_web_sm

In [None]:
# Import the necessary libraries
import spacy

In [None]:
# Load a pre-trained word embeddings model
nlp = spacy.load("en_core_web_sm")

In [None]:
text_data = "This is a sentence.This is another sentence."
doc = nlp(text_data)
doc.vector

In [None]:
# Convert the text data to a matrix of word embeddings
def get_features(rev_text):
    feature_matrix = []
    for document in rev_text:
        doc = nlp(document)
        feature_vector = doc.vector
        feature_matrix.append(feature_vector)
    return feature_matrix

In [None]:
feature_matrix = get_features(rev_text)

In [None]:
X = np.array(feature_matrix)

# Convert the label data to numerical format
y = np.array([1 if x=="pos" else 0 for x in labels])

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, Flatten

# Define the deep learning model
model = Sequential()
model.add(Embedding(input_dim=X.shape[1], output_dim=32, input_length=X.shape[1]))
model.add(Flatten())
model.add(Dense(units=64, activation="relu"))
model.add(Dropout(rate=0.2))
model.add(Dense(units=1, activation="sigmoid"))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print("Test loss:", loss)
print("Test accuracy:", accuracy)

In [None]:
model.predict(np.array(get_features(["This is a senetcne"])))

## Deploy

In [None]:
!pip install flask

In [None]:
from flask import Flask, request, jsonify
import numpy as np

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json['data']
    output = model.predict(np.array(get_features([data])))
    return jsonify({'result': output.tolist()})

In [None]:
if __name__ == '__main__':
    app.run(port=5000)