# 0. Install Dependencies and Bring in Data

In [None]:
!pip install tensorflow tensorflow-gpu pandas matplotlib sklearn

In [None]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

In [None]:
os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv')

In [None]:

df = pd.read_csv(os.path.join(r'E:\CommentToxicity\jigsaw-toxic-comment-classification-challenge','train.csv', 'train.csv'))

In [None]:
df.head()

# 1. Preprocess

In [None]:
df.tail()

In [None]:
df.iloc[3]['comment_text']

In [None]:
df[df.columns[2:]].iloc[5]

In [None]:
df[df['toxic']==1].head()

In [None]:
#2. preprocessing 
from tensorflow.keras.layers import TextVectorization


In [None]:

X = df['comment_text']
y = df[df.columns[2:]].values


In [None]:
df.columns

In [None]:
df.columns[2:]

In [None]:
df[df.columns[2:]]

In [None]:
df[df.columns[2:]].values


In [None]:
MAX_FEATURES = 200000 

In [None]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')


In [None]:
vectorizer.adapt(X.values)


In [None]:
vectorized_text = vectorizer(X.values)

In [None]:
vectorized_text

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000) 
dataset = dataset.batch(16) 
dataset = dataset.prefetch(8) 


In [None]:
dataset.as_numpy_iterator().next()

In [None]:
batch_X, batch_Y = dataset.as_numpy_iterator().next()
batch_X.shape

batch_Y.shape


In [None]:
int(len(dataset)*.7) 

In [None]:
train = dataset.take(int(len(dataset)*.7))
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))

# 2. Create Sequential Model

In [None]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding 


In [None]:
model = Sequential() 
model.add(Embedding(MAX_FEATURES+1, 32)) 
model.add(Bidirectional(LSTM(32, activation='tanh'))) 

model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))#128 units


model.add(Dense(6, activation='sigmoid'))

In [None]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [None]:
model.summary()

In [None]:
history = model.fit(train, epochs=1, validati-on_data=val) 

In [None]:
history.history 


In [None]:
from matplotlib import pyplot as plt


In [None]:
plt.figure(figsize=(8,5))
pd.DataFrame(history.history).plot()
plt.show()

In [None]:
batch = test.as_numpy_iterator().next()

In [None]:
batch_X,batch_Y = test.as_numpy_iterator().next()

In [None]:
batch_Y

In [None]:
(model.predict(batch_X) >0.5).astype(int)

In [None]:
test.as_numpy_iterator().next()

# 3. Make Predictions

In [None]:
input_text = vectorizer('You freaking suck! I am going to hit you.')

In [None]:
df.columns[2:]

In [None]:
input_text

In [None]:
model.predict(np.expand_dims(input_text,0))

In [None]:
res = model.predict(np.expand_dims(input_text,0))

In [None]:
res.shape

# 4. Evaluate Model

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy#import some metrics

In [None]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():  
    X_true, y_true = batch 
    yhat = model.predict(X_true)
    
    
    y_true = y_true.flatten() 
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat) 
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)

In [None]:
print(f'
      Precision: {pre.result().numpy()}, 
      Recall:{re.result().numpy()}, 
      Accuracy:{acc.result().numpy()}')

# 5. Test and Gradio

In [None]:
!pip install gradio jinja2

In [None]:
import tensorflow as tf
import gradio as gr

In [None]:
model.save('toxicity.h5')

In [None]:
model = tf.keras.models.load_model('toxicity.h5')

In [None]:
input_str = vectorizer('hey i freaken hate you!')

In [None]:
res = model.predict(np.expand_dims(input_str,0))

In [None]:
df.columns[2:]

In [None]:
res

In [None]:
df.columns[2:]

In [None]:
def score_comment(comment):
    vectorized_comment = vectorizer([comment]) 
    results = model.predict(vectorized_comment) 
    
    text = ''  
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5) 
    
    return text

In [None]:
interface = gr.Interface(fn=score_comment, 
                         inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')

In [None]:
interface.launch(share=True)