### Install Dependencies and Bring in Data

In [1]:
!pip install tensorflow pandas matplotlib scikit-learn



In [2]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

In [3]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization

In [4]:
# Load your CSVs
train_df = pd.read_csv(r"C:\Users\DELL\Documents\CommentToxicity\toxic-comment-classificationdata\train.csv")
test_df = pd.read_csv(r"C:\Users\DELL\Documents\CommentToxicity\toxic-comment-classificationdata\test.csv")
test_labels = pd.read_csv(r"C:\Users\DELL\Documents\CommentToxicity\toxic-comment-classificationdata\test_labels.csv")


In [9]:
# Inputs and labels
X = train_df['comment_text']
y = train_df.iloc[:, 2:]  # toxic, severe_toxic, obscene, threat, insult, identity_hate



In [10]:
# Vectorize text
MAX_FEATURES = 200000
vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int')
vectorizer.adapt(X.values)

In [11]:
X_vec = vectorizer(X.values)

In [16]:
# Create dataset
dataset = tf.data.Dataset.from_tensor_slices((X_vec, y.values))
dataset = dataset.shuffle(160000).batch(32).prefetch(tf.data.AUTOTUNE)


In [18]:
# Train/Validation split
train_size = int(0.8 * len(dataset))
train = dataset.take(train_size)
val = dataset.skip(train_size)

In [21]:
###CREATE SEQUENTIAL MODEL


In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding

In [22]:
model = Sequential()
# Create the embedding layer 
model.add(Embedding(MAX_FEATURES+1, 32))
# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(32, activation='tanh')))
# Feature extractor Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
# Final layer 
model.add(Dense(6, activation='sigmoid'))


In [23]:

model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [24]:
model.summary()

In [28]:
history = model.fit(train, epochs=1, validation_data=val)


[1m3989/3989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3731s[0m 935ms/step - loss: 0.0454 - val_loss: 0.0400


In [30]:
from matplotlib import pyplot as plt


In [None]:
plt.figure(figsize=(8,5))
pd.DataFrame(history.history).plot()
plt.show()

In [36]:
##MAKE PREDICTIONS

In [33]:
input_text = vectorizer('You freaking suck! I am going to hit you.')


In [None]:
res = model.predict(input_text)


In [None]:
(res > 0.5).astype(int)


In [None]:
batch_X, batch_y = test.as_numpy_iterator().next()


In [None]:
(model.predict(batch_X) > 0.5).astype(int)

In [None]:
res.shape


In [42]:
###EVALUATE MODEL

In [43]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy



In [44]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator(): 
    # Unpack the batch 
    X_true, y_true = batch
    # Make a prediction 
    yhat = model.predict(X_true)
    
    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)


In [46]:
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')

Precision: 0.0, Recall:0.0, Accuracy:0.0


In [47]:
### Test and Gradio

In [48]:
!pip install gradio jinja2

Collecting gradio
  Downloading gradio-5.42.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading Brotli-1.1.0-cp312-cp312-win_amd64.whl.metadata (5.6 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.11.1 (from gradio)
  Downloading gradio_client-1.11.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.11.1-cp312-cp312-win_amd64.whl.metadata (43 kB)
Collecting pydantic<2.12,>=2.0 (from gradio)
  Downloading pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py

In [49]:
import tensorflow as tf
import gradio as gr


In [None]:
model.save('toxicity.h5')


In [None]:
model = tf.keras.models.load_model('toxicity.h5')


In [None]:
input_str = vectorizer('hey i freaken hate you!')

In [None]:
res = model.predict(np.expand_dims(input_str,0))

In [None]:
res


In [None]:
def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    
    return text

In [None]:
interface = gr.Interface(fn=score_comment, 
                         inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')

In [None]:
interface.launch(share=True)