In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the data with a specified encoding
try:
    data = pd.read_csv('/content/Complaint_Dataset.csv', encoding='utf-8')
except UnicodeDecodeError:
    data = pd.read_csv('/content/Complaint_Dataset.csv', encoding='latin1')  # or try 'iso-8859-1'

# Preprocess the data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['Description'])

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(data['Description'])
padded_sequences = pad_sequences(sequences, padding='post')

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(data['Category'])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)


In [3]:
from tensorflow.keras.layers import Embedding, MultiHeadAttention, Dense, LayerNormalization, Dropout, GlobalAveragePooling1D, Input
from tensorflow.keras.models import Model

# Define a custom transformer layer
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Define the model
embed_dim = 64  # Embedding size for each token
num_heads = 4  # Number of attention heads
ff_dim = 128  # Hidden layer size in feed forward network inside transformer

inputs = Input(shape=(X_train.shape[1],))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embed_dim)(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(embedding_layer)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(64, activation="relu")(x)
x = Dropout(0.1)(x)
outputs = Dense(len(label_encoder.classes_), activation="softmax")(x)

model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
# Function to predict category for new descriptions
def predict_category(description):
    sequence = tokenizer.texts_to_sequences([description])
    padded_sequence = pad_sequences(sequence, maxlen=padded_sequences.shape[1], padding='post')
    prediction = model.predict(padded_sequence)
    return label_encoder.inverse_transform([prediction.argmax()])[0]

# Test the model
test_description = '''I am writing to formally bring to your attention the disturbing issue of defamatory content being posted against our esteemed institution, Maharana Pratap Group of Institutions (mpgi_official), on the social media platform Instagram. The Instagram handle in question, @mpgi_kingdom, has been consistently posting derogatory and distasteful jokes targeting our faculty and students.

MPGI takes great pride in upholding a respectful and inclusive environment for all members of our community, and it is deeply concerning to see such harmful content circulating online. The defamatory nature of these posts not only tarnishes the reputation of our institution but also has the potential to harm the well-being and morale of our faculty, staff, and students.

We have attempted to address this issue directly with the owner of the Instagram handle, but our attempts have been unsuccessful in halting the dissemination of offensive content. Therefore, we are seeking your assistance in taking appropriate action to address this matter and ensure that such defamatory posts are promptly removed from the platform.

Enclosed with this letter are examples of the defamatory posts for your review and investigation. We kindly request that the cyber cell investigate this matter thoroughly and take necessary actions in accordance with the relevant laws and regulations governing cyber defamation.

We understand the gravity of this situation and trust that you will handle this matter.'''
predicted_category = predict_category(test_description)
print(f'Predicted Category: {predicted_category}')


Predicted Category: Defamation


In [17]:
model.save('loan_fraud_model.h5', save_format='h5', include_optimizer=True)


  saving_api.save_model(


In [18]:
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_custom_objects

# Register the custom object
get_custom_objects().update({'TransformerBlock': TransformerBlock})

# Load the model with custom objects
loaded_model = load_model('loan_fraud_model.h5')


In [19]:
# Function to predict category for new descriptions
def predict_category(description):
    sequence = tokenizer.texts_to_sequences([description])
    padded_sequence = pad_sequences(sequence, maxlen=padded_sequences.shape[1], padding='post')
    prediction = loaded_model.predict(padded_sequence)
    return label_encoder.inverse_transform([prediction.argmax()])[0]

# Test the model
test_description = "Victim received a call from an unknown number claiming to be a loan officer."
predicted_category = predict_category(test_description)
print(f'Predicted Category: {predicted_category}')


Predicted Category: Online Loan Fraud
