In [52]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

In [37]:
import pickle
import numpy as np

In [1]:
sample_list = ['Abdominal pain',
 'Abdominal redness',
 'Abdominal swelling',
 'Abnormal sweating',
 'Acne',
 'Allergy',
 'Anal Fissure',
 'Anal pain',
 'Anemia',
 'Anhedonia',
 'Ankle pain',
 'Anxiety',
 'Appendicitis',
 'Arm pain',
 'Arm swelling',
 'Arm weakness',
 'Armpit pain',
 'Armpit swelling',
 'Attention deficit',
 'Back pain',
 'Bad breath',
 'Bad or bitter taste',
 'Black or tarry stool',
 'Bladder Infection (UTI)',
 'Blood in stool',
 'Blood in urine',
 'Blurry vision',
 'Bronchitis',
 'Bulging eye',
 'Burning or painful urination',
 'Buttocks pain',
 'Calf pain',
 'Calf swelling',
 'Cheek pain',
 'Cheek swelling',
 'Chest pain',
 'Chills',
 'Confusion',
 'Constipation',
 'Cool bluish skin',
 'Cough',
 'COVID-19',
 'Decreased appetite',
 'Decreased hearing',
 'Decreased urination',
 'Decreased vision',
 'Dental pain',
 'Diarrhea',
 'Difficulty urinating',
 'Dizziness',
 'Drooping eyelid',
 'Dry mouth',
 'Dry mucous membranes',
 'Dry skin',
 'Ear discharge',
 'Ear pain',
 'Ear pressure',
 'Ear swelling',
 'Elbow pain',
 'Emotional stress',
 'Erectile dysfunction',
 'Excessive thirst',
 'Excessive urination',
 'Eye deviation',
 'Eye discharge',
 'Eye dryness',
 'Eye floaters',
 'Eye Infection',
 'Eye pain',
 'Eye redness',
 'Eyelid pain',
 'Eyelid redness',
 'Eyelid swelling',
 'Facial droop',
 'Facial lesions',
 'Facial numbness or tingling',
 'Facial pain',
 'Facial swelling',
 'Fainting (passing out)',
 'Fatigue',
 'Feeling cold',
 'Feeling down',
 'Feeling faint',
 'Fever',
 'Finger discoloration',
 'Finger pain',
 'Flank pain',
 'Flashing lights in vision',
 'Flatulence',
 'Food Poisoning',
 'Foot fungus',
 'Foot numbness or tingling',
 'Foot pain',
 'Foot redness',
 'Foot sores',
 'Foot swelling',
 'Forearm pain',
 'Foreign body in the eye',
 'Frequent burping',
 'Frequent night urination',
 'Frequent urination',
 'Gastroenteritis',
 'Genital lesions',
 'Goiter',
 'Groin pain',
 'Groin swelling',
 'Hair loss',
 'Hand numbness or tingling',
 'Hand pain',
 'Hand redness',
 'Hand swelling',
 'Headache',
 'Heart palpitations']


In [20]:
data = ' '.join(sample_list)

In [21]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

In [103]:
pickle.dump(tokenizer,open('tokenizer.pkl','wb'))

In [27]:
sequences = tokenizer.texts_to_sequences([data])[0]

In [28]:
sequences[:20]

[14, 1, 14, 6, 14, 2, 36, 37, 38, 39, 22, 40, 22, 1, 41, 42, 43, 1, 44, 45]

In [29]:
vocab_size = len(tokenizer.word_index)+1

In [30]:
vocab_size

126

In [34]:
sequence_col = []
for i in range(1,len(sequences)):
    words_num = sequences[i-1:i+1]
    sequence_col.append(words_num)

In [42]:
X = []
y = []

for seq in sequence_col:
    X.append(seq[0])
    y.append(seq[1])

X = np.array(X)
y = np.array(y)

In [43]:
y = to_categorical(y, num_classes=vocab_size)
y

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [45]:
pred_model = Sequential([
    Embedding(vocab_size, 10, input_length=1),
    LSTM(1000, return_sequences=True),
    LSTM(1000),
    Dense(1000, activation="relu"),
    Dense(vocab_size, activation="softmax")
])

In [47]:
 pred_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1, 10)             1260      
                                                                 
 lstm (LSTM)                 (None, 1, 1000)           4044000   
                                                                 
 lstm_1 (LSTM)               (None, 1000)              8004000   
                                                                 
 dense (Dense)               (None, 1000)              1001000   
                                                                 
 dense_1 (Dense)             (None, 126)               126126    
                                                                 
Total params: 13,176,386
Trainable params: 13,176,386
Non-trainable params: 0
_________________________________________________________________


In [51]:
model_checkpoint = ModelCheckpoint('model.h5', monitor='loss', verbose= 1, save_best_only=True, model='auto')
reduce_func = ReduceLROnPlateau(monitor='loss', factor=0.2, patience = 3, min_lr=0.0001, verbose =1 )

In [54]:
pred_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001))

In [55]:
pred_model.fit(X,y, epochs=100, batch_size=64, callbacks=[model_checkpoint, reduce_func])

Epoch 1/100
Epoch 1: loss improved from inf to 4.83441, saving model to model.h5
Epoch 2/100
Epoch 2: loss improved from 4.83441 to 4.81746, saving model to model.h5
Epoch 3/100
Epoch 3: loss improved from 4.81746 to 4.77955, saving model to model.h5
Epoch 4/100
Epoch 4: loss improved from 4.77955 to 4.66912, saving model to model.h5
Epoch 5/100
Epoch 5: loss improved from 4.66912 to 4.49544, saving model to model.h5
Epoch 6/100
Epoch 6: loss improved from 4.49544 to 4.48415, saving model to model.h5
Epoch 7/100
Epoch 7: loss improved from 4.48415 to 4.42255, saving model to model.h5
Epoch 8/100
Epoch 8: loss improved from 4.42255 to 4.42183, saving model to model.h5
Epoch 9/100
Epoch 9: loss improved from 4.42183 to 4.39060, saving model to model.h5
Epoch 10/100
Epoch 10: loss improved from 4.39060 to 4.33681, saving model to model.h5
Epoch 11/100
Epoch 11: loss improved from 4.33681 to 4.29496, saving model to model.h5
Epoch 12/100
Epoch 12: loss improved from 4.29496 to 4.22654, sav

<keras.callbacks.History at 0x12325e18088>

In [56]:
pred_model.fit(X,y, epochs=100, batch_size=64, callbacks=[model_checkpoint, reduce_func])

Epoch 1/100
Epoch 1: loss improved from 1.83580 to 1.83034, saving model to model.h5
Epoch 2/100
Epoch 2: loss improved from 1.83034 to 1.82797, saving model to model.h5
Epoch 3/100
Epoch 3: loss improved from 1.82797 to 1.82065, saving model to model.h5
Epoch 4/100
Epoch 4: loss improved from 1.82065 to 1.80829, saving model to model.h5
Epoch 5/100
Epoch 5: loss improved from 1.80829 to 1.80267, saving model to model.h5
Epoch 6/100
Epoch 6: loss improved from 1.80267 to 1.79532, saving model to model.h5
Epoch 7/100
Epoch 7: loss improved from 1.79532 to 1.79209, saving model to model.h5
Epoch 8/100
Epoch 8: loss improved from 1.79209 to 1.78137, saving model to model.h5
Epoch 9/100
Epoch 9: loss improved from 1.78137 to 1.77750, saving model to model.h5
Epoch 10/100
Epoch 10: loss improved from 1.77750 to 1.77482, saving model to model.h5
Epoch 11/100
Epoch 11: loss improved from 1.77482 to 1.76259, saving model to model.h5
Epoch 12/100
Epoch 12: loss improved from 1.76259 to 1.75135,

<keras.callbacks.History at 0x122418a91c8>

In [57]:
pred_model.fit(X,y, epochs=100, batch_size=64, callbacks=[model_checkpoint, reduce_func])

Epoch 1/100
Epoch 1: loss improved from 1.36114 to 1.35504, saving model to model.h5
Epoch 2/100
Epoch 2: loss did not improve from 1.35504
Epoch 3/100
Epoch 3: loss improved from 1.35504 to 1.35043, saving model to model.h5
Epoch 4/100
Epoch 4: loss did not improve from 1.35043
Epoch 5/100
Epoch 5: loss improved from 1.35043 to 1.34793, saving model to model.h5
Epoch 6/100
Epoch 6: loss improved from 1.34793 to 1.34293, saving model to model.h5
Epoch 7/100
Epoch 7: loss improved from 1.34293 to 1.33880, saving model to model.h5
Epoch 8/100
Epoch 8: loss improved from 1.33880 to 1.33606, saving model to model.h5
Epoch 9/100
Epoch 9: loss did not improve from 1.33606
Epoch 10/100
Epoch 10: loss did not improve from 1.33606
Epoch 11/100
Epoch 11: loss improved from 1.33606 to 1.33270, saving model to model.h5
Epoch 12/100
Epoch 12: loss improved from 1.33270 to 1.33023, saving model to model.h5
Epoch 13/100
Epoch 13: loss improved from 1.33023 to 1.32544, saving model to model.h5
Epoch 1

<keras.callbacks.History at 0x122418da508>

In [82]:
pred_model.fit(X,y, epochs=200, batch_size=64, callbacks=[model_checkpoint, reduce_func])

Epoch 1/200
Epoch 1: loss improved from 1.16786 to 1.16603, saving model to model.h5
Epoch 2/200
Epoch 2: loss did not improve from 1.16603
Epoch 3/200
Epoch 3: loss improved from 1.16603 to 1.16565, saving model to model.h5
Epoch 4/200
Epoch 4: loss improved from 1.16565 to 1.16153, saving model to model.h5
Epoch 5/200
Epoch 5: loss did not improve from 1.16153
Epoch 6/200
Epoch 6: loss improved from 1.16153 to 1.16062, saving model to model.h5
Epoch 7/200
Epoch 7: loss did not improve from 1.16062
Epoch 8/200
Epoch 8: loss improved from 1.16062 to 1.16018, saving model to model.h5
Epoch 9/200
Epoch 9: loss improved from 1.16018 to 1.15721, saving model to model.h5
Epoch 10/200
Epoch 10: loss did not improve from 1.15721
Epoch 11/200
Epoch 11: loss improved from 1.15721 to 1.15570, saving model to model.h5
Epoch 12/200
Epoch 12: loss did not improve from 1.15570
Epoch 13/200
Epoch 13: loss improved from 1.15570 to 1.15327, saving model to model.h5
Epoch 14/200
Epoch 14: loss did not i

<keras.callbacks.History at 0x1232a773f48>

In [115]:
def pred_next_words(text):
    for _ in range(3):
        text_to_seq = tokenizer.texts_to_sequences([text])[0]
        text_to_seq = np.array(text_to_seq)
        # print(text_to_seq)
        predicted_word = []
        # print(pred_model.predict(text_to_seq))
        preds = np.argmax(pred_model.predict(text_to_seq), axis=-1)
        preds = np.argsort(pred_model.predict(text_to_seq), axis=-1)[:,-5:]
        # print(preds)
        for key,value in tokenizer.word_index.items():
            if value in preds:
                predicted_word.append(key)
                # break
        return predicted_word
        
    

In [119]:
word_to_search = "Eye"
next_predictions = pred_next_words(word_to_search)
[f"{word_to_search} {i}" for i in next_predictions]

['Eye pain', 'Eye redness', 'Eye frequent', 'Eye infection', 'Eye discharge']

In [121]:
word_to_search = input()
next_predictions = pred_next_words(word_to_search)
[f"{word_to_search} {i}" for i in next_predictions]

 Food


['Food blurry',
 'Food cool',
 'Food difficulty',
 'Food dizziness',
 'Food poisoning']