## Read in Data + Install Packages

In [None]:
!pip install tensorflow tensorflow-gpu pandas matplotlib sklearn

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
df = pd.read_csv(os.path.join('toxic_speech_data','train.csv', 'train.csv'))

In [None]:
df.head()

In [None]:
df.info()

## Data Preprocessing

In [None]:
from tensorflow.keras.layers import TextVectorization

In [None]:
data = df['comment_text']
labels = df[df.columns[2:]].values

In [None]:
data.head()

In [None]:
labels[:5]

In [None]:
print(f'Data type of labels: {type(labels)}')
print(f'Data type of data: {type(data)}')

In [None]:
# Num of words that can be stored
MAX_FEATURES = 10000

In [None]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1000,
                               output_mode='int')

In [None]:
vectorizer

In [None]:
vectorizer.adapt(data.values)

In [None]:
# Here you can see where each word is stored in our 'dictionary'
vectorizer('Hello, it is me')[:4]

In [None]:
# Building up vectorized dataset
vectorized_text = vectorizer(data.values)

In [None]:
vectorized_text[:5]

In [None]:
# Tensorflow data pipeline (map, chache, shuffle, batch, prefetch  from_tensor_slices, list_file)
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, labels))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8)

In [None]:
train = dataset.take(int(len(dataset)*.7))
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.15))
test = dataset.skip(int(len(dataset)*.85)).take(int(len(dataset)*.15))

## Building Sequential Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding

In [None]:
model = Sequential()

# Create the embedding layer 
model.add(Embedding(MAX_FEATURES+1, 32))

# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(32, activation='tanh')))

# Feature extractor Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))

# Final layer 
model.add(Dense(6, activation='sigmoid'))

In [None]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [None]:
model.summary()

In [None]:
history = model.fit(train, epochs=1, validation_data=val)

In [None]:
history.history

## Test Predictions

In [None]:
input_text = vectorizer('I hate you stupid man.')

In [None]:
res = model.predict(np.array([input_text]))

In [None]:
df.columns[2:]

In [None]:
(res > 0.5).astype(int)

In [None]:
batch_X, batch_y = test.as_numpy_iterator().next()

In [None]:
(model.predict(batch_X) > 0.5).astype(int)

In [None]:
res.shape

## Evaluation

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [None]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator(): 
    # Unpack the batch 
    X_true, y_true = batch
    # Make a prediction 
    yhat = model.predict(X_true)
    
    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)

In [None]:
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')

In [None]:
model.save('demo.h5')

## Setup Gradio

In [None]:
!pip install gradio jinja2

In [None]:
import gradio as gr

In [None]:
def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    
    return text

In [None]:
interface = gr.Interface(fn=score_comment, 
                         inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')

In [None]:
interface.launch(share=True)