## Before Running

In [2]:
# Before running!
# !pip install tensorflow==2.10.1
# !pip install keras==2.10.0
# !pip install scapy==3.6.1

# You need to do this once
# spacy.cli.download("en_core_web_lg")

Collecting spacy
  Downloading spacy-3.6.1-cp310-cp310-win_amd64.whl (12.0 MB)
     ---------------------------------------- 0.0/12.0 MB ? eta -:--:--
      --------------------------------------- 0.2/12.0 MB 6.9 MB/s eta 0:00:02
     -- ------------------------------------- 0.8/12.0 MB 9.8 MB/s eta 0:00:02
     ---- ----------------------------------- 1.3/12.0 MB 10.4 MB/s eta 0:00:02
     ------ --------------------------------- 1.9/12.0 MB 11.3 MB/s eta 0:00:01
     ------- -------------------------------- 2.3/12.0 MB 10.6 MB/s eta 0:00:01
     ---------- ----------------------------- 3.2/12.0 MB 11.9 MB/s eta 0:00:01
     ------------- -------------------------- 4.0/12.0 MB 12.7 MB/s eta 0:00:01
     --------------- ------------------------ 4.8/12.0 MB 13.3 MB/s eta 0:00:01
     ----------------- ---------------------- 5.4/12.0 MB 13.3 MB/s eta 0:00:01
     ------------------- -------------------- 5.9/12.0 MB 13.0 MB/s eta 0:00:01
     ---------------------- ----------------- 6.8/1

## Imports

In [1]:
import spacy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import pandas as pd
import pickle

# Load pre-trained spaCy model
nlp = spacy.load("en_core_web_lg")

In [60]:
# Read the data from the Excel file into a DataFrame
data = pd.read_excel("Questiondata.xlsx") 

# Rename headers
data.columns = ['Responses', 'Class']

data

Unnamed: 0,Responses,Class
0,What are the available accommodation options f...,Accomodation
1,Could you provide me with information about th...,Accomodation
2,What are the housing costs like for students w...,Accomodation
3,Can you recommend any off-campus accommodation...,Accomodation
4,What are the facilities and amenities offered ...,Accomodation
...,...,...
674,Bachelor of Zoology and Animal Science,Courses
675,Diploma of AI,Courses
676,Bachelor of AI,Courses
677,Masters of AI,Courses


In [61]:
data["Class"].value_counts()

Class
Courses         408
General          91
Accomodation     90
Activities       90
Name: count, dtype: int64

In [62]:
# Extract sentences and classes from the DataFrame
sentences = data["Responses"].tolist()
classes = data["Class"].tolist()

# Lowercase sentences
sentences = [sentence.lower() for sentence in sentences]

## PreProcessing

In [63]:
import re
import string

# Remove punctuation
processed_sentences = [sentence.translate(str.maketrans('', '', string.punctuation)) for sentence in sentences]
print(processed_sentences[np.random.randint(0, len(X))])
# Tokenize sentences and extract word vectors
X = np.array([nlp(sentence).vector for sentence in processed_sentences])

are there any information sessions about deakinres coming up


In [64]:
# Convert class labels to numerical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(classes)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

### Saving the label encoder

In [7]:
import pickle
# Save the encoder
with open('label_encoder2.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

# Load the encoder
with open('label_encoder2.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

## Training the model

In [65]:
# Callback
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# Build a neural network model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(512, input_dim=X_train.shape[1], activation='relu'))
model.add(tf.keras.layers.Dense(256, input_dim=X_train.shape[1], activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(5, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(
    X_train, y_train, 
    epochs=15, batch_size=1, 
    verbose=1, 
    validation_data=(X_test, y_test),
    callbacks=[callback]
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Test Accuracy:", accuracy)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Test Accuracy: 0.9264705777168274


In [9]:
# Save the entire model as a `.keras` zip archive.
model.save('Classification.keras')

In [24]:
model = tf.keras.models.load_model('Classification.keras')

In [66]:
sentence = nlp('Where can I stay at deakin?'.lower()).vector
predicted_labels = model.predict(np.array([sentence]))
predicted_label = label_encoder.inverse_transform(predicted_labels.argmax(axis=1))[0]
print(predicted_label)

predicted_prob = predicted_labels[0][predicted_labels.argmax(axis=1)]
print(predicted_prob)

Accomodation
[0.99999917]


In [68]:
sentence = nlp('Master of AI'.lower()).vector
predicted_labels = model.predict(np.array([sentence]))
predicted_label = label_encoder.inverse_transform(predicted_labels.argmax(axis=1))[0]
print(predicted_label)

predicted_prob = predicted_labels[0][predicted_labels.argmax(axis=1)]
print(predicted_prob)

Courses
[1.]


## Flask Function Check

In [31]:
import requests

res = requests.post(
    'http://10.104.23.130:8891/classifyResponse',
    json={
        'sentence': 'I want to know about courses related to AI and Data Science',
        'threshold': 0.95
    }
)
res.json()

{'abv_thresh': True, 'label': 'Courses'}

In [33]:
res.json()['label']

'Courses'

In [25]:
from keras.utils import pad_sequences
# Pad sequences to a fixed length
max_sequence_length = 20  # Choose an appropriate value
X = pad_sequences(X, maxlen=max_sequence_length, padding='post', dtype='float32')

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [32]:
from tensorflow.keras.layers import GRU, Dropout, Dense

mega_model = tf.keras.Sequential()
mega_model.add(GRU(128, input_shape=(20, X.shape[1],), return_sequences=True))
mega_model.add(GRU(64, return_sequences=True))
mega_model.add(Dropout(0.2))
mega_model.add(Dense(4, activation='softmax'))

mega_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

mega_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_1 (GRU)                 (None, 20, 128)           165120    
                                                                 
 gru_2 (GRU)                 (None, 20, 64)            37248     
                                                                 
 dropout_1 (Dropout)         (None, 20, 64)            0         
                                                                 
 dense_3 (Dense)             (None, 20, 4)             260       
                                                                 
Total params: 202,628
Trainable params: 202,628
Non-trainable params: 0
_________________________________________________________________


In [35]:
mega_model.fit(X_train, y_train, epochs=11, validation_data=(X_test, y_test))

Epoch 1/11


ValueError: in user code:

    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\sudak\anaconda3\envs\tf\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_3" is incompatible with the layer: expected shape=(None, 20, 300), found shape=(None, 300)


In [46]:
courseName = pd.read_csv('https://raw.githubusercontent.com/VediYD/pepper-bot/dev-gpt-dev/textbyID.csv')
courseName

Unnamed: 0,ID,CourseName,locationText
0,H911,Doctor of Medicine,This course is offered with blended delivery
1,M539,Executive Graduate Certificate of Sport Business,This course is offered Online only
2,M709,Executive Master of Sport Business,This course is offered Online only
3,E506,"Graduate Certificate of Adult, Vocational Educ...",This course is offered Online only
4,H575,Graduate Certificate of Advanced Nursing,This course is offered Online only
...,...,...,...
326,S369,Bachelor of Zoology and Animal Science,This course is offered at Geelong Waurn Ponds ...
327,A221,Diploma of Arabic,This course is offered at Melbourne Burwood Ca...
328,A222,Diploma of Chinese,This course is offered at Melbourne Burwood an...
329,A223,Diploma of Indonesian,This course is offered at Melbourne Burwood an...


In [47]:
courseName.to_csv('courseName.csv', index=False)