In [None]:
!pip install tensorflow_text

In [None]:
!pip install nlpaug

In [None]:
#importing required libraries
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
import numpy as np

In [None]:
#loading the dataset
df=pd.read_csv("/content/TRAIN_DATA.csv")
df.head(5)

In [None]:
# Increase the size of our dataset
import nlpaug.augmenter.word as naw
aug = naw.SynonymAug(aug_src='wordnet',aug_max=2)
for row in df.values.tolist():
  aug_row=aug.augment(row[1],n=2)
  for sent in aug_row:
    temp=[row[0],sent]
    new_row_df = pd.DataFrame([temp], columns=df.columns)
    df = df.append(new_row_df, ignore_index=True)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
  df = df.append(new_row_df, ignore_index=True)


In [None]:
df.describe()

Unnamed: 0,Type,Description
count,600,600
unique,4,563
top,CyberBullying,I found a website that is giving away copies o...
freq,150,3


In [None]:
df.groupby("Type").describe()


Unnamed: 0_level_0,Description,Description,Description,Description
Unnamed: 0_level_1,count,unique,top,freq
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CyberBullying,150,142,Some person created a fake account on my name ...,2
EmployeeRights,150,140,Employees at the company are not provided with...,2
IPR,150,135,I found a website that is giving away copies o...,3
TrafficViolation,150,146,"After consuming alcohol, the driver took contr...",2


In [None]:
#creating labels for each type
df['Labels']=df['Type'].map({'CyberBullying':0,'TrafficViolation':1,'EmployeeRights':2,'IPR':3})

In [None]:
print(df.head(10))

               Type                                        Description  Labels
0     CyberBullying  I'm experiencing cyberbullying through continu...       0
1     CyberBullying  I received threats and abusive comments on my ...       0
2               IPR  They published, performed, and displayed my wo...       3
3    EmployeeRights  Many employees are recruited without the provi...       2
4  TrafficViolation  A driver caused a big accident because they we...       1
5    EmployeeRights  Many workers have said that the company didn't...       2
6     CyberBullying  The continuous cyber bullying took a toll on m...       0
7  TrafficViolation  The driver's drunken state while operating the...       1
8               IPR  I found my artwork being used on a commercial ...       3
9  TrafficViolation  The driver's careless maneuvers behind the whe...       1


In [None]:
#splitting dataset into training and testing sets
from sklearn.model_selection import train_test_split
y = tf.keras.utils.to_categorical(df["Labels"].values, num_classes=4)
X_train,X_test,y_train,y_test=train_test_split(df['Description'],y)
print(X_train.head(5))
len(X_train)

403    Many workers have spoken up about not getting ...
565    I was riding my cycle when the car came skiddi...
295    Someone created a fake social medium account u...
344    Employees are exposed to dangerous chemicals a...
51     I experienced feelings of anxiety, depression,...
Name: Description, dtype: object


450

In [None]:
# to be used for text preprocessing and encoding
preprocess_url="https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
encoder_url="https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4"
bert_preprocess=hub.KerasLayer(preprocess_url)
bert_encoder=hub.KerasLayer(encoder_url)

In [None]:
# an example to show the vector outputs
def get_sentence_embedding(sentences):
  preprocessed_text=bert_preprocess(sentences)
  return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embedding([
    "I met with an accident because the car drive was under influence of alcohol.",
    "They ganged up on me on social media platforms"
])

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.77890766, -0.36578044, -0.8429882 , ..., -0.7866387 ,
        -0.59821194,  0.88224465],
       [-0.8202585 , -0.3553189 , -0.3471778 , ..., -0.15828101,
        -0.6589485 ,  0.8642833 ]], dtype=float32)>

In [None]:
# BERT Layers
text_input=tf.keras.layers.Input(shape=(),dtype=tf.string,name="text")
preprocessed_text=bert_preprocess(text_input)
outputs=bert_encoder(preprocessed_text)

# Neural network Layers
l=tf.keras.layers.Dropout(0.1,name='dropout')(outputs['pooled_output'])
l=tf.keras.layers.Dense(512,activation="relu")(l)
l=tf.keras.layers.Dense(256,activation="relu")(l)
l=tf.keras.layers.Dense(4,activation="softmax",name="output")(l)

# Constructing final model
model=tf.keras.Model(inputs=text_input,outputs=l)

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 text (InputLayer)           [(None,)]                    0         []                            
                                                                                                  
 keras_layer (KerasLayer)    {'input_type_ids': (None,    0         ['text[0][0]']                
                             128),                                                                
                              'input_mask': (None, 128)                                           
                             , 'input_word_ids': (None,                                           
                              128)}                                                               
                                                                                              

In [None]:
METRICS = [
      tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
      tf.keras.metrics.Precision(name="precision"),
      tf.keras.metrics.Recall(name="recall")
]

model.compile(optimizer = "adam",
              loss = "categorical_crossentropy",
              metrics = METRICS)

In [None]:
model.fit(X_train,y_train,epochs = 10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7d58729e53c0>

In [None]:
model.evaluate(X_test,y_test)



[0.6019114255905151, 0.7666666507720947, 0.8014184236526489, 0.753333330154419]

In [None]:
def predict_class(case_des):
  val=np.argmax(model.predict(case_des))
  return(val)

In [None]:
def response(sent):
  val=predict_class([sent])
  output=""
  if(val==0):
    output="This case involves CYBER BULLYING. \nPredicted Sections: IPC-507, IT_Act- 66E, 66A, 67, 67A"
  elif(val==1):
    output="This case involves TRAFFIC VIOLATION.\nPredicted Sections: Motorvehicleact - 185, IPC- 304A (Drink and drive)"
  elif(val==2):
    output="This case is related to EMPLOYEE RIGHTS.\nPredicted Sections: Minimunwagesact-7, paymentofwagesact-20"
  else:
    output="This case is related to INTELLECTUAL PROPERTY RIGHTS(IPC).\nPredicted Sections: Indiancoprightact- 51,63,63A,65,66,67"
  print("Assistant: ",output)

In [None]:
str="hi"
print("Assistant: Hi! Add your case here, I'll help you identify the involved Sections.")
flag=True
while(flag==True):
  str=input("You: ")
  if(str=="Bye" or str=="bye"):
    flag=False
  else:
    response(str)
print("Assistant: Okay Bye!")



Assistant: Hi! Add your case here, I'll help you identify the involved Sections.
You: My friend was involved in a hit-and-run accident.
Assistant:  This case is related to INTELLECTUAL PROPERTY RIGHTS(IPC).
Predicted Sections: Indiancoprightact- 51,63,63A,65,66,67
You: bye
Assistant: Okay Bye!


In [None]:
model.save("legalsortmodel.h5")