## Install Dependencies and bring data

In [143]:
import tensorflow as tf
import pandas as pd
from matplotlib.pyplot import matplotlib as plt
import numpy as np
import os

In [10]:
df = pd.read_csv(os.path.join('Data/train.csv'))

In [96]:
df

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
159566,ffe987279560d7ff,""":::::And for the second time of asking, when ...",0,0,0,0,0,0
159567,ffea4adeee384e90,You should be ashamed of yourself \n\nThat is ...,0,0,0,0,0,0
159568,ffee36eab5c267c9,"Spitzer \n\nUmm, theres no actual article for ...",0,0,0,0,0,0
159569,fff125370e4aaaf3,And it looks like it was actually you who put ...,0,0,0,0,0,0


## Preprocess

In [17]:
from tensorflow.keras.layers import TextVectorization

In [27]:
X = df['comment_text']
y = df[df.columns[2:]].values

In [30]:
X.head(5)

0    Explanation\nWhy the edits made under my usern...
1    D'aww! He matches this background colour I'm s...
2    Hey man, I'm really not trying to edit war. It...
3    "\nMore\nI can't make any real suggestions on ...
4    You, sir, are my hero. Any chance you remember...
Name: comment_text, dtype: object

In [32]:
y

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]], dtype=int64)

In [34]:
MAX_FEATURES = 200000

In [38]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=1800,output_mode='int')

In [41]:
vectorizer.adapt(X.values)

In [51]:
vectorizer('hello world i am happy toady')[:6] ## numerical representation of this text

<tf.Tensor: shape=(6,), dtype=int64, numpy=array([  288,   263,     8,    74,   443, 72331], dtype=int64)>

In [53]:
vectorizer_text = vectorizer(X.values)

In [56]:
vectorizer_text

<tf.Tensor: shape=(159571, 1800), dtype=int64, numpy=
array([[  645,    76,     2, ...,     0,     0,     0],
       [    1,    54,  2489, ...,     0,     0,     0],
       [  425,   441,    70, ...,     0,     0,     0],
       ...,
       [32445,  7392,   383, ...,     0,     0,     0],
       [    5,    12,   534, ...,     0,     0,     0],
       [    5,     8,   130, ...,     0,     0,     0]], dtype=int64)>

In [62]:
dataset = tf.data.Dataset.from_tensor_slices((vectorizer_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(16) ## 16 rows in 1 batch
dataset = dataset.prefetch(8)

In [69]:
batch_X, batch_y = dataset.as_numpy_iterator().next()

In [106]:
batch_X.shape

(16, 1800)

In [99]:
batch_y.shape

(16, 6)

In [115]:
train = dataset.take(int(len(dataset)*.7))
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))

## Create Sequential Model

In [126]:
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Embedding, Dense
from tensorflow.keras import layers, Sequential

In [127]:
model = Sequential()
model.add(Embedding(MAX_FEATURES+1,32))
model.add((LSTM(32,activation='tanh')))
model.add(Dense(128,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(6,activation='sigmoid'))

In [131]:
model.compile(loss='binary_crossentropy',optimizer='Adam')

In [132]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 32)          6400032   
                                                                 
 bidirectional_1 (Bidirecti  (None, 64)                16640     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                      

In [None]:
history = model.fit(train,epochs=10,validation_data=val)

## Make Predictions

In [135]:
input_text = vectorizer('You freaking suck! I am going to hit you.')

In [138]:
res = model.predict(np.expand_dims(input_text,0))



In [141]:
df

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
159566,ffe987279560d7ff,""":::::And for the second time of asking, when ...",0,0,0,0,0,0
159567,ffea4adeee384e90,You should be ashamed of yourself \n\nThat is ...,0,0,0,0,0,0
159568,ffee36eab5c267c9,"Spitzer \n\nUmm, theres no actual article for ...",0,0,0,0,0,0
159569,fff125370e4aaaf3,And it looks like it was actually you who put ...,0,0,0,0,0,0


In [150]:
res.shape

(1, 6)

In [148]:
batch_X, batch_y = test.as_numpy_iterator().next()

In [149]:
(model.predict(batch_X) > 0.5).astype(int)



array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

## Evaluate the model

In [145]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [146]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [156]:
for batch in test.as_numpy_iterator(): 
    # Unpack the batch 
    X_true, y_true = batch
    # Make a prediction 
    yhat = model.predict(X_true)
    
    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)



In [157]:
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')

Precision: 0.8044539093971252, Recall:0.7022636532783508, Accuracy:0.4799618422985077


In [165]:
import gradio as gr

In [159]:
model.save('toxicity.h5')

  saving_api.save_model(


In [160]:
model = tf.keras.models.load_model('toxicity.h5')

In [161]:
def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    
    return text

In [168]:
interface = gr.Interface(
    fn=score_comment,
    inputs=gr.Textbox(lines=2, placeholder='Comment to score'),
    outputs=gr.Textbox()
)

In [None]:
interface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860


Exception in thread Thread-620 (_do_normal_analytics_request):
Traceback (most recent call last):
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 69, in map_httpcore_exceptions
    yield
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 233, in handle_request
    resp = self._pool.handle_request(req)
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpcore\_sync\connection_pool.py", line 216, in handle_request
    raise exc from None
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpcore\_sync\connection_pool.py", line 196, in handle_request
    response = connection.handle_request(
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpcore\_sync\connection.py", line 99, in handle_request
    raise exc
  File "


Could not create share link. Missing file: c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\gradio\frpc_windows_amd64_v0.2. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_windows_amd64.exe
2. Rename the downloaded file to: frpc_windows_amd64_v0.2
3. Move the file to this location: c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\gradio






Exception in thread Thread-622 (_do_normal_analytics_request):
Traceback (most recent call last):
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 69, in map_httpcore_exceptions
Exception in thread Thread-623 (_do_normal_analytics_request):
Traceback (most recent call last):
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 69, in map_httpcore_exceptions
    yield
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 233, in handle_request
    yield
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpx\_transports\default.py", line 233, in handle_request
    resp = self._pool.handle_request(req)
  File "c:\Users\VAIBHAV SHARMA\AppData\Local\Programs\Python\Python310\lib\site-packages\httpcore\_sync\connection_pool.py", line

