In [20]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense,Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [21]:
df = pd.read_csv('trail6.csv')

In [23]:
df['when'].value_counts()

when
no_time           27999
today              4000
time               4000
tomorrow           4000
no_timeno_time        1
Name: count, dtype: int64

In [24]:
label_encoder = LabelEncoder()
df['when'] = label_encoder.fit_transform(df['when'])

In [25]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [26]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['query'])
total_words = len(tokenizer.word_index) + 1

In [27]:
train_sequences = tokenizer.texts_to_sequences(train_data['query'])
test_sequences = tokenizer.texts_to_sequences(test_data['query'])

In [28]:
train_padded_sequences = pad_sequences(train_sequences)
test_padded_sequences = pad_sequences(test_sequences, maxlen=train_padded_sequences.shape[1])

In [45]:
train_padded_sequences.shape[1]

32

In [29]:
df['when'].unique()

array([0, 3, 2, 4, 1])

In [30]:
train_labels = tf.keras.utils.to_categorical(train_data['when'], num_classes=len(df['when'].unique()))
test_labels = tf.keras.utils.to_categorical(test_data['when'], num_classes=len(df['when'].unique()))

In [31]:
model = Sequential()
model.add(Embedding(total_words, 64, input_length=train_padded_sequences.shape[1]))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(len(set(df['when'])), activation='softmax'))

In [32]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [33]:
model.fit(train_padded_sequences, train_labels, epochs=2, verbose=1, validation_data=(test_padded_sequences, test_labels))

Epoch 1/2


Epoch 2/2


<keras.callbacks.History at 0x225deade410>

In [34]:
new_queries = ["find a hospital in ranchi for tommorow 10 pm", "i have to refer my medical records please find some"]

In [35]:
new_sequences = tokenizer.texts_to_sequences(new_queries)
new_padded_sequences = pad_sequences(new_sequences, maxlen=train_padded_sequences.shape[1])

In [36]:
predictions = model.predict(new_padded_sequences)
predicted_classes = [label_encoder.classes_[tf.argmax(prediction).numpy()] for prediction in predictions]



In [37]:
predicted_classes

['no_time', 'no_time']

In [38]:
model.evaluate(test_padded_sequences, test_labels)



[0.00022131412697490305, 1.0]

In [39]:
model.save('when.keras')

In [43]:
import pickle

with open('when_label_encoder.pickle', 'wb') as handle:
    pickle.dump(label_encoder, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [44]:
with open('when_tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)