In [None]:
import pandas as pd
import numpy as np 

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D
from tensorflow.keras.models import Model
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers

# Importing Dataset

In [None]:
train = pd.read_csv("../input/jigsaw-toxic-comment-classification-challenge/train.csv.zip")
test = pd.read_csv("../input/jigsaw-toxic-comment-classification-challenge/test.csv.zip")

In [None]:
train.head()

# Check for null values

In [None]:
print(train.isnull().sum())
print(test.isnull().sum())

# Separate x and y component from training set

In [None]:
x_train = train["comment_text"]

y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]]

x_test = test["comment_text"]

In [None]:
print(x_train.map(lambda i: len(i)).max())

# Tokenize the input comments

In [None]:
max_feature = 20000

tokenizer = Tokenizer(num_words = max_feature)
tokenizer.fit_on_texts(list(x_train))
tokenized_train = tokenizer.texts_to_sequences(x_train)
tokenized_test = tokenizer.texts_to_sequences(x_test)

In [None]:
tokenized_train[:1]

# Apply Padding
Padding helps in reducing the gap of length between inputs. Here added zeros at the end of the comments whose length is less than 100(=maxlen)

In [None]:
maxlen = 100
x_train = pad_sequences(tokenized_train, maxlen = maxlen)
x_test = pad_sequences(tokenized_test, maxlen = maxlen)

# Building the Model

1. INPUT LAYER

In [None]:
input = Input(shape = (maxlen,))

2. EMBEDDING

In [None]:
embed_size = 128
x = Embedding(max_feature, embed_size)(input)

3. LSTM LAYER

In [None]:
x = LSTM(60, return_sequences = True, name = "lstm_layer")(x)

4. MAX POOLING (To flatten the inputs for dense layer)

In [None]:
x = GlobalMaxPool1D()(x)

In [None]:
#Dropout to prevent overfitting

x = Dropout(0.1)(x)

5. FIRST DENSE LAYER

In [None]:
x = Dense(50, activation = "relu")(x)

In [None]:
#Dropout to prevent overfitting

x = Dropout(0.1)(x)

6. SECOND DENSE LAYER

In [None]:
x = Dense(6, activation = "sigmoid")(x)

7. COMPILING LAYERS

In [None]:
model = Model(inputs = input, outputs = x)
model.compile(loss = "binary_crossentropy",
             optimizer = "adam",
             metrics = ["accuracy"])

# Fit Model to the Data

In [None]:
batch_size = 32
epochs = 2
model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_split = 0.1)

# Predicting output of test set

In [None]:
y_pred = model.predict(x_test,batch_size=32)