<a href="https://colab.research.google.com/github/coderanandmaurya/University-Query-Priority-Classification/blob/main/Classification_UQPC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
train_data='https://raw.githubusercontent.com/coderanandmaurya/University-Query-Priority-Classification/refs/heads/main/university_query_train.csv'
test_data='https://raw.githubusercontent.com/coderanandmaurya/University-Query-Priority-Classification/refs/heads/main/university_query_test.csv'

In [31]:
import pandas as pd

In [32]:
train_df=pd.read_csv(train_data,usecols=['Student_Query', 'Department', 'Days_To_Deadline',
       'Priority_Label'])
test_df=pd.read_csv(test_data,usecols=['Student_Query', 'Department', 'Days_To_Deadline',
       'Priority_Label'])

In [33]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Student_Query     5000 non-null   object
 1   Department        5000 non-null   object
 2   Days_To_Deadline  5000 non-null   int64 
 3   Priority_Label    5000 non-null   object
dtypes: int64(1), object(3)
memory usage: 156.4+ KB


In [34]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Student_Query     1000 non-null   object
 1   Department        1000 non-null   object
 2   Days_To_Deadline  1000 non-null   int64 
 3   Priority_Label    1000 non-null   object
dtypes: int64(1), object(3)
memory usage: 31.4+ KB


In [35]:
train.sample()

Unnamed: 0,Student_Query,Department,Days_To_Deadline,Priority_Label
2105,How to reset my university portal password?,Library,16,Medium


In [36]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model

In [37]:
# find max len of text in data
max_len = train_df["Student_Query"].apply(lambda x: len(x.split())).max()
max_len

12

In [38]:
max_words = 10000
max_len = 15

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(train_df["Student_Query"])

X_train_text = tokenizer.texts_to_sequences(train_df["Student_Query"])
X_test_text = tokenizer.texts_to_sequences(test_df["Student_Query"])

X_train_text = pad_sequences(X_train_text, maxlen=max_len, padding="post")
X_test_text = pad_sequences(X_test_text, maxlen=max_len, padding="post")

In [39]:
X_train_text

array([[13,  8, 55, ...,  0,  0,  0],
       [ 3, 74, 75, ...,  0,  0,  0],
       [13,  8, 30, ...,  0,  0,  0],
       ...,
       [12, 32, 11, ...,  0,  0,  0],
       [13,  8, 30, ...,  0,  0,  0],
       [19, 20, 21, ...,  0,  0,  0]], dtype=int32)

In [40]:
dept_encoder = LabelEncoder()

train_df["Department"] = dept_encoder.fit_transform(train_df["Department"])
test_df["Department"] = dept_encoder.transform(test_df["Department"])

num_departments = len(dept_encoder.classes_)

In [41]:
scaler = StandardScaler()

X_train_days = scaler.fit_transform(train_df[["Days_To_Deadline"]])
X_test_days = scaler.transform(test_df[["Days_To_Deadline"]])

In [42]:
label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(train_df["Priority_Label"])
y_test = label_encoder.transform(test_df["Priority_Label"])

num_classes = len(label_encoder.classes_)

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [43]:
text_input = Input(shape=(max_len,), name="text_input")

embedding = Embedding(input_dim=max_words,
                      output_dim=128,
                      input_length=max_len)(text_input)

lstm_out = LSTM(64)(embedding)



In [44]:
dept_input = Input(shape=(1,), name="dept_input")

dept_embedding = Embedding(input_dim=num_departments,
                           output_dim=8)(dept_input)

dept_flat = tf.keras.layers.Flatten()(dept_embedding)

In [45]:
days_input = Input(shape=(1,), name="days_input")

In [46]:
concat = Concatenate()([lstm_out, dept_flat, days_input])

dense = Dense(64, activation="relu")(concat)
drop = Dropout(0.3)(dense)

output = Dense(num_classes, activation="softmax")(drop)

In [47]:
model = Model(
    inputs=[text_input, dept_input, days_input],
    outputs=output
)

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [48]:
history = model.fit(
    [X_train_text, train_df["Department"], X_train_days],
    y_train,
    validation_data=(
        [X_test_text, test_df["Department"], X_test_days],
        y_test
    ),
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 35ms/step - accuracy: 0.7119 - loss: 0.5498 - val_accuracy: 1.0000 - val_loss: 1.1647e-04
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 1.0000 - loss: 4.6781e-04 - val_accuracy: 1.0000 - val_loss: 2.9815e-05
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 1.0000 - loss: 2.4654e-04 - val_accuracy: 1.0000 - val_loss: 1.1924e-05
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 1.0000 - loss: 1.4372e-04 - val_accuracy: 1.0000 - val_loss: 5.6450e-06
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - accuracy: 1.0000 - loss: 1.0987e-04 - val_accuracy: 1.0000 - val_loss: 3.2681e-06
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 1.0000 - loss: 4.1636e-05 - val_accuracy: 1.0000 