In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense,Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('short.csv')

In [3]:
df['action'].value_counts()

action
find_doctor         4000
find_medicalshop    4000
find_hospital       4000
find_lab            4000
book_appointment    4000
my_appointments     4000
my_records          4000
Name: count, dtype: int64

In [4]:
label_encoder = LabelEncoder()
df['action'] = label_encoder.fit_transform(df['action'])

In [5]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['query'])
total_words = len(tokenizer.word_index) + 1

In [7]:
train_sequences = tokenizer.texts_to_sequences(train_data['query'])
test_sequences = tokenizer.texts_to_sequences(test_data['query'])

In [8]:
train_padded_sequences = pad_sequences(train_sequences)
test_padded_sequences = pad_sequences(test_sequences, maxlen=train_padded_sequences.shape[1])

In [9]:
df['action'].unique()

array([1, 4, 2, 3, 0, 5, 6])

In [10]:
train_labels = tf.keras.utils.to_categorical(train_data['action'], num_classes=len(df['action'].unique()))
test_labels = tf.keras.utils.to_categorical(test_data['action'], num_classes=len(df['action'].unique()))

In [11]:
model = Sequential()
model.add(Embedding(total_words, 64, input_length=train_padded_sequences.shape[1]))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(len(set(df['action'])), activation='softmax'))

In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
model.fit(train_padded_sequences, train_labels, epochs=2, verbose=1, validation_data=(test_padded_sequences, test_labels))

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x262ae354f50>

In [24]:
new_queries = ["",""]

In [25]:
new_sequences = tokenizer.texts_to_sequences(new_queries)
new_padded_sequences = pad_sequences(new_sequences, maxlen=train_padded_sequences.shape[1])

In [26]:
predictions = model.predict(new_padded_sequences)
predicted_classes = [label_encoder.classes_[tf.argmax(prediction).numpy()] for prediction in predictions]



In [27]:
predicted_classes

['find_doctor', 'find_doctor']

In [18]:
model.evaluate(test_padded_sequences, test_labels)



[0.78941810131073, 0.6489285826683044]

In [19]:
model.save('action.keras')