In [884]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import random
import json
from sklearn.preprocessing import OneHotEncoder

In [885]:
# Define hyper parameters
oov_token = "<OOV>"
max_words = 20
embedding_dim = 16
vocab_size = 5000

In [None]:
# Prepare data
with open("./sarcasm.json", 'r') as f:
    datastore = json.load(f)

sentences = []
labels = []
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])
    print(item)

In [887]:
# Use tokenizer
tokenizer = Tokenizer(vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentences)

In [888]:
# Use pad_sequences
padded_sequences = pad_sequences(sequences, maxlen=max_words)

In [889]:
# Split train/test
train_ratio = .6
sequences_len = len(padded_sequences)

dataset = [[info[0], info[1]] for info in zip(padded_sequences, labels)]
random.shuffle(dataset)

x_train = np.asarray(list(map(lambda x: x[0], dataset[:int(sequences_len*train_ratio)])))
x_test = np.asarray(list(map(lambda x: x[0], dataset[int(sequences_len*train_ratio):])))
y_train = np.asarray(list(map(lambda y: y[1], dataset[:int(sequences_len*train_ratio)])))
y_test = np.asarray(list(map(lambda y: y[1], dataset[int(sequences_len*train_ratio):])))

In [898]:
# Create RNN with Embedding and two layer LSTM
model = tf.keras.models.Sequential(layers=
    [
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_words),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(24, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ]
)

In [899]:
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])

In [900]:
model.summary()

Model: "sequential_83"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_98 (Embedding)    (None, 20, 16)            80000     
                                                                 
 bidirectional_34 (Bidirecti  (None, 20, 128)          41472     
 onal)                                                           
                                                                 
 bidirectional_35 (Bidirecti  (None, 128)              98816     
 onal)                                                           
                                                                 
 dropout_21 (Dropout)        (None, 128)               0         
                                                                 
 dense_163 (Dense)           (None, 24)                3096      
                                                                 
 dropout_22 (Dropout)        (None, 24)              

In [901]:
class myCallBack(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, log={}):
        if log.get('accuracy') > 0.91 and log.get('val_accuracy') > 0.84:
            self.model.stop_training = True

mycallback = myCallBack()
model.fit(x_train, y_train, epochs=1000, validation_data=(x_test, y_test), callbacks=[mycallback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000


<keras.callbacks.History at 0x190656550>

In [902]:
new_test = ['I like you', 'I feel bad']
# tokenized_test = tokenizer.texts_to_sequences(new_test)
# padded_test = pad_sequences(tokenized_test, max_words)
prediction=model.predict(x_test)

model.metrics
tp = 0 
tn = 0
fp = 0
fn = 0
for info in zip(prediction.round(), y_test):
    if info[1] == 1:
        if info[0] == 1:
            tp += 1
        if info[0] == 0:
            fp += 1
    if info[1] == 0:
        if info[0] == 0:
            tn += 1
        if info[0] == 1:
            fp += 1

prec = tp/(tp + fn)
recall = tp/(tp + fp)

print('prec:', prec, 'recall:', recall, 'accuracy:', (tp + tn)/(tp + tn + fp + fn))

print(fn, fp)

prec: 1.0 recall: 0.6929507885236557 accuracy: 0.8487457880943466
0 1616


In [None]:
import io
import uvicorn
import numpy as np
import nest_asyncio
from enum import Enum
from fastapi import FastAPI, HTTPException
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse

# Assign an instance of the FastAPI class to the variable "app".
# You will interact with your api using this instance.
app = FastAPI(title='Deploying a ML Model with FastAPI')

# By using @app.get("/") you are allowing the GET method to work for the / endpoint.
@app.get("/")
def home():
    return "Welcome to the sarcasm predictor. Now head over to http://localhost:8000/docs."

# This endpoint handles all the logic necessary for the object detection to work.
# It requires the desired model and the image in which to perform object detection.
@app.post("/predict") 
def prediction(sentence: str):

    # PREPROCESS DATA
    sequence = tokenizer.texts_to_sequences([sentence])
    padding = pad_sequences(sequence, maxlen=max_words)
    
    # RUN PREDICTION MODEL
    prediction = model.predict(padding).round()
    
    # RETURN PREDICTION
    if prediction == 0:
        return "Not Sarcastic"
    if prediction == 1:
        return "Sarcastic"
        

# Allows the server to be run in this interactive environment
nest_asyncio.apply()

# Host depends on the setup you selected (docker or virtual env)
host = "127.0.0.1"

# Spin up the server!    
uvicorn.run(app, host=host, port=8000)

INFO:     Started server process [75404]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:50311 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:50311 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:50312 - "POST /predict?sentence=stock%20analysts%20confused%2C%20frightened%20by%20boar%20market HTTP/1.1" 200 OK
INFO:     127.0.0.1:50312 - "POST /predict?sentence=stock%20analysts%20confused%2C%20frightened%20by%20boar%20market HTTP/1.1" 200 OK
INFO:     127.0.0.1:50320 - "POST /predict?sentence=bloomberg%27s%20program%20to%20build%20better%20cities%20just%20got%20bigger HTTP/1.1" 200 OK
