In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.models import load_model
import joblib

In [9]:
df = pd.read_csv('dataset/train.csv')

In [12]:
df.head(-1)

Unnamed: 0,comment_text,toxic
0,Explanation\nWhy the edits made under my usern...,0
1,D'aww! He matches this background colour I'm s...,0
2,"Hey man, I'm really not trying to edit war. It...",0
3,"""\nMore\nI can't make any real suggestions on ...",0
4,"You, sir, are my hero. Any chance you remember...",0
...,...,...
159565,The numbers in parentheses are the additional ...,0
159566,""":::::And for the second time of asking, when ...",0
159567,You should be ashamed of yourself \n\nThat is ...,0
159568,"Spitzer \n\nUmm, theres no actual article for ...",0


In [11]:
df = df[['comment_text', 'toxic']]

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159571 entries, 0 to 159570
Data columns (total 2 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   comment_text  159571 non-null  object
 1   toxic         159571 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 2.4+ MB


### Preprocessing

In [14]:
X = df['comment_text'].astype(str)
y = df['toxic']

In [15]:
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

### Feature Extraction

In [16]:
# Tokenization
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

In [17]:
# Padding sequences
max_len = 200
X_pad = pad_sequences(X_seq, maxlen=max_len)

### Training

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

In [19]:
embedding_dim = 100
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=1, activation='sigmoid'))

In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])







In [21]:
epochs = 5
batch_size = 32
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

Epoch 1/5












Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x226745b81d0>

In [22]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

Test Loss: 0.13287819921970367, Test Accuracy: 0.9570421576499939


In [23]:
model.save('model_v2.h5')

  saving_api.save_model(


In [33]:
!tensorflowjs_converter --input_format=keras /model_v2.h5 /model


Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\habee\AppData\Local\Programs\Python\Python311\Scripts\tensorflowjs_converter.exe\__main__.py", line 4, in <module>
  File "C:\Users\habee\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflowjs\__init__.py", line 21, in <module>
    from tensorflowjs import converters
  File "C:\Users\habee\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflowjs\converters\__init__.py", line 21, in <module>
    from tensorflowjs.converters.converter import convert
  File "C:\Users\habee\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflowjs\converters\converter.py", line 37, in <module>
    from tensorflowjs.converters import tf_saved_model_conversion_v2
  File "C:\Users\habee\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflowjs\converters\tf_saved_model_conversion_v2.py", line 28, in

### Prediction

In [40]:
joblib.dump(tokenizer,'app/tokenizer.pkl')

['app/tokenizer.pkl']

In [41]:
joblib.dump(label_encoder,'app/label_encoder.pkl')

['app/label_encoder.pkl']

In [30]:
Toxic_model = load_model('app/model.h5')

In [35]:
new_comment = "You are the worst person I have ever met. I hate you."

new_comment_seq = tokenizer.texts_to_sequences([new_comment])
new_comment_pad = pad_sequences(new_comment_seq, maxlen=max_len)

predictions = model.predict(new_comment_pad)

predicted_label = label_encoder.inverse_transform([1 if pred > 0.5 else 0 for pred in predictions])

print("Predicted Label:", predicted_label)

Predicted Label: [1]


### Tensorflowjs

In [5]:
!pip install tensorflowjs

^C


In [1]:
import tensorflow as tf




In [5]:
from tensorflow.keras.models import model_from_json

In [2]:
model = tf.keras.models.load_model('model.h5')




In [6]:
# !tensorflowjs_converter --input_format=keras model.h5 model.json

In [7]:
model_json = model.to_json()

# Write the JSON string to a file
with open('model.json', 'w') as json_file:
    json_file.write(model_json)