## Before Running

In [2]:
# Before running!
# !pip install tensorflow==2.10.1
# !pip install keras==2.10.0
# !pip install scapy==3.6.1

# You need to do this once
# spacy.cli.download("en_core_web_lg")

Collecting spacy
  Downloading spacy-3.6.1-cp310-cp310-win_amd64.whl (12.0 MB)
     ---------------------------------------- 0.0/12.0 MB ? eta -:--:--
      --------------------------------------- 0.2/12.0 MB 6.9 MB/s eta 0:00:02
     -- ------------------------------------- 0.8/12.0 MB 9.8 MB/s eta 0:00:02
     ---- ----------------------------------- 1.3/12.0 MB 10.4 MB/s eta 0:00:02
     ------ --------------------------------- 1.9/12.0 MB 11.3 MB/s eta 0:00:01
     ------- -------------------------------- 2.3/12.0 MB 10.6 MB/s eta 0:00:01
     ---------- ----------------------------- 3.2/12.0 MB 11.9 MB/s eta 0:00:01
     ------------- -------------------------- 4.0/12.0 MB 12.7 MB/s eta 0:00:01
     --------------- ------------------------ 4.8/12.0 MB 13.3 MB/s eta 0:00:01
     ----------------- ---------------------- 5.4/12.0 MB 13.3 MB/s eta 0:00:01
     ------------------- -------------------- 5.9/12.0 MB 13.0 MB/s eta 0:00:01
     ---------------------- ----------------- 6.8/1

## Imports

In [1]:
import spacy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import pandas as pd
import pickle

# Load pre-trained spaCy model
nlp = spacy.load("en_core_web_lg")

In [13]:
# Read the data from the Excel file into a DataFrame
data = pd.read_excel("response.xlsx") 
data

Unnamed: 0,Responses,Class
0,I'm looking for some information about Deakin ...,Courses
1,I have a question about the admission requirem...,Courses
2,I want to know more about Deakin scholarships....,General
3,I would like to know more about the campus clu...,Activities
4,I am interested in studying psychology at Deak...,Courses
...,...,...
733,What facilities are available on Campus?,Campus
734,How many Campuses Does Deakin University have?,Campus
735,On which campus I can study IT?,Campus
736,Where Can I stay near Deakin?,Accomodation


In [3]:
data["Class"].value_counts()

Class
Courses         448
Accomodation    162
Activities       58
General          43
Campus           27
Name: count, dtype: int64

In [4]:
# Extract sentences and classes from the DataFrame
sentences = data["Responses"].tolist()
classes = data["Class"].tolist()

# Lowercase sentences
sentences = [sentence.lower() for sentence in sentences]

## PreProcessing

In [5]:
# Tokenize sentences and extract word vectors
X = np.array([nlp(sentence).vector for sentence in sentences])

# Convert class labels to numerical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(classes)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Saving the label encoder

In [6]:
import pickle
# Save the encoder
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

# Load the encoder
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

## Training the model

In [22]:
# Build a neural network model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(512, input_dim=X_train.shape[1], activation='relu'))
model.add(tf.keras.layers.Dense(256, input_dim=X_train.shape[1], activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(5, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=15, batch_size=1, verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Test Accuracy:", accuracy)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test Accuracy: 0.9391891956329346


In [23]:
# Save the entire model as a `.keras` zip archive.
model.save('Classification_93.keras')

In [24]:
model = tf.keras.models.load_model('Classification_93.keras')

In [26]:
sentence = nlp('Where can I stay at deakin?'.lower()).vector
predicted_labels = model.predict(np.array([sentence]))
predicted_label = label_encoder.inverse_transform(predicted_labels.argmax(axis=1))[0]
print(predicted_label)

predicted_prob = predicted_labels[0][predicted_labels.argmax(axis=1)]
print(predicted_prob)

Accomodation
[0.9999138]


## Flask Function Check

In [31]:
import requests

res = requests.post(
    'http://10.104.23.130:8891/classifyResponse',
    json={
        'sentence': 'I want to know about courses related to AI and Data Science',
        'threshold': 0.95
    }
)
res.json()

{'abv_thresh': True, 'label': 'Courses'}

In [33]:
res.json()['label']

'Courses'