In [None]:
import numpy as np
import json
import re
import tensorflow as tf
import random
import spacy
import nltk 
from nltk.tokenize import  word_tokenize

In [None]:
with open('/content/drive/MyDrive/uygulamalar/chatbot/data_chatbot.json') as f:
    intents = json.load(f)

In [None]:
def preprocessing(line):
    line = re.sub(r'[^a-zA-z.?!\']', ' ', line)
    line = re.sub(r'[ ]+', ' ', line)
    return line

In [None]:
# get text and intent title from json data
inputs, targets = [], []
classes = []
intent_doc = {}

for intent in intents['intents']:
    if intent['intent'] not in classes:
        classes.append(intent['intent'])
    if intent['intent'] not in intent_doc:
        intent_doc[intent['intent']] = []
        
    for text in intent['text']:
        inputs.append(text)
        targets.append(intent['intent'])
        
    for response in intent['responses']:
        intent_doc[intent['intent']].append(response)


In [None]:
print(inputs)

['Merhaba', 'Selamun Aleykum', 'Selam', 'Merhabalar', 'Hey', 'Faturamın miktarını öğrenmek istiyorum', 'Son fatura bilgimi öğrenmek istiyorum', 'Faturamı öğrenmek istiyorum', 'Faturam ne kadar', 'Faturam kaç para', 'Paket bilgilerimi öğrenmek istiyorum', 'Paket kalan miktarını öğrenmek istiyorum', 'Paketimden kalan kullanımı öğrenmek', 'Paket kullanım miktarı', 'Yardımın için teşekkürler, hoşçakal', 'Teşekkür ederim, hoşçakal', 'Teşekkürler hoşça kalın', 'Teşekkürler güle güle', 'Teşekkürler güle güle', 'Paket satın almak istiyorum', 'Paket yenilemek istiyorum', 'Paket yapmak istiyorum ', 'Arıza var', 'İnternet sorunu var', 'İnternet yavaşlığı ', 'Hat çekmiyor', 'İnternet çekmiyor', 'İnternet ayarları', 'İnternet ayarlarımı yapmak istiyorum', 'Net ayarları']


In [None]:
def bag_of_words(s, words):
    bag = [0 for _ in range(len(words))]
    s_words = nltk.word_tokenize(s)
    s_words = [stemmer.stemWord(word.lower()) for word in s_words]
    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i] = 1
            
    return numpy.array(bag)

In [None]:
def tokenize_data(input_list):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')
    
    tokenizer.fit_on_texts(input_list)
    
    input_seq = tokenizer.texts_to_sequences(input_list)

    input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, padding='pre')
    
    return tokenizer, input_seq

# preprocess input data
tokenizer, input_tensor = tokenize_data(inputs)

In [None]:
print(len(inputs))

30


In [None]:
print(inputs)

['Merhaba', 'Selamun Aleykum', 'Selam', 'Merhabalar', 'Hey', 'Faturamın miktarını öğrenmek istiyorum', 'Son fatura bilgimi öğrenmek istiyorum', 'Faturamı öğrenmek istiyorum', 'Faturam ne kadar', 'Faturam kaç para', 'Paket bilgilerimi öğrenmek istiyorum', 'Paket kalan miktarını öğrenmek istiyorum', 'Paketimden kalan kullanımı öğrenmek', 'Paket kullanım miktarı', 'Yardımın için teşekkürler, hoşçakal', 'Teşekkür ederim, hoşçakal', 'Teşekkürler hoşça kalın', 'Teşekkürler güle güle', 'Teşekkürler güle güle', 'Paket satın almak istiyorum', 'Paket yenilemek istiyorum', 'Paket yapmak istiyorum ', 'Arıza var', 'İnternet sorunu var', 'İnternet yavaşlığı ', 'Hat çekmiyor', 'İnternet çekmiyor', 'İnternet ayarları', 'İnternet ayarlarımı yapmak istiyorum', 'Net ayarları']


In [None]:
def create_categorical_target(targets):
    word={}
    categorical_target=[]
    counter=0
    for trg in targets:
        if trg not in word:
            word[trg]=counter
            counter+=1
        categorical_target.append(word[trg])
    print(categorical_target)
    categorical_tensor = tf.keras.utils.to_categorical(categorical_target, num_classes=len(word), dtype='int32')
    return categorical_tensor, dict((v,k) for k, v in word.items())

# preprocess output data
target_tensor, trg_index_word = create_categorical_target(targets)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6]


In [None]:
print('input shape: {} and output shape: {}'.format(input_tensor.shape, target_tensor.shape))

input shape: (30, 5) and output shape: (30, 7)


In [None]:
# hyperparameters
epochs=5
vocab_size=len(tokenizer.word_index) + 1
embed_dim=512
units=128
target_length=target_tensor.shape[1]

In [None]:
print(vocab_size)

52


In [None]:
# build RNN Model with tensorflow
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),
    tf.keras.layers.Dense(units, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(target_length, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=1e-2)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 512)         26624     
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               656384    
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 903       
Total params: 716,807
Trainable params: 716,807
Non-trainable params: 0
_________________________________________________________________


  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

# train the model
model.fit(input_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f00308fb910>

In [None]:
model.predict()

TypeError: ignored

In [None]:
print(tokenizer.word_index)

{'<unk>': 1, 'istiyorum': 2, 'öğrenmek': 3, 'paket': 4, 'i̇nternet': 5, 'güle': 6, 'teşekkürler': 7, 'miktarını': 8, 'faturam': 9, 'kalan': 10, 'hoşçakal': 11, 'yapmak': 12, 'var': 13, 'çekmiyor': 14, 'ayarları': 15, 'merhaba': 16, 'selamun': 17, 'aleykum': 18, 'selam': 19, 'merhabalar': 20, 'hey': 21, 'faturamın': 22, 'son': 23, 'fatura': 24, 'bilgimi': 25, 'faturamı': 26, 'ne': 27, 'kadar': 28, 'kaç': 29, 'para': 30, 'bilgilerimi': 31, 'paketimden': 32, 'kullanımı': 33, 'kullanım': 34, 'miktarı': 35, 'yardımın': 36, 'için': 37, 'teşekkürler,': 38, 'teşekkür': 39, 'ederim,': 40, 'hoşça': 41, 'kalın': 42, 'satın': 43, 'almak': 44, 'yenilemek': 45, 'arıza': 46, 'sorunu': 47, 'yavaşlığı': 48, 'hat': 49, 'ayarlarımı': 50, 'net': 51}


In [None]:
import nltk
nltk.download('punkt')
  

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
!pip install SpeechRecognition



Collecting SpeechRecognition
[?25l  Downloading https://files.pythonhosted.org/packages/26/e1/7f5678cd94ec1234269d23756dbdaa4c8cfaed973412f88ae8adf7893a50/SpeechRecognition-3.8.1-py2.py3-none-any.whl (32.8MB)
[K     |████████████████████████████████| 32.8MB 73kB/s 
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.8.1


In [None]:
!pip install PyAudio


Collecting PyAudio
  Using cached https://files.pythonhosted.org/packages/ab/42/b4f04721c5c5bfc196ce156b3c768998ef8c0ae3654ed29ea5020c749a6b/PyAudio-0.2.11.tar.gz
Building wheels for collected packages: PyAudio
  Building wheel for PyAudio (setup.py) ... [?25l[?25hdone
  Created wheel for PyAudio: filename=PyAudio-0.2.11-cp37-cp37m-linux_x86_64.whl size=52575 sha256=e956649058a53d79ce144bcffd13e3f291d553f011a4b2a560edf20ec1ade660
  Stored in directory: /root/.cache/pip/wheels/f4/a8/a4/292214166c2917890f85b2f72a8e5f13e1ffa527c4200dcede
Successfully built PyAudio
Installing collected packages: PyAudio
Successfully installed PyAudio-0.2.11


In [None]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg



Reading package lists... Done
Building dependency tree       
Reading state information... Done
libasound2-dev is already the newest version (1.1.3-5ubuntu0.5).
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
Suggested packages:
  portaudio19-doc
The following NEW packages will be installed:
  libportaudio2 libportaudiocpp0 portaudio19-dev
0 upgraded, 3 newly installed, 0 to remove and 39 not upgraded.
Need to get 184 kB of archives.
After this operation, 891 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudiocpp0 amd64 19.6.0-1 [15.1 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 portaudio19-dev amd64 19.6.0-1 [104 kB]
Fetched 184 kB in 1s (178 kB/s)
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 160772 files and directories currently installed.)
Preparing to 

In [None]:
import speech_recognition as sr


In [None]:
# all imports
from io import BytesIO
from base64 import b64decode
from google.colab import output
from IPython.display import Javascript

RECORD = """
const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
"""

def record(sec=3):
  print("Speak Now...")
  display(Javascript(RECORD))
  sec += 1
  s = output.eval_js('record(%d)' % (sec*1000))
  print("Done Recording !")
  b = b64decode(s.split(',')[1])
  return b #byte stream

In [None]:

!pip3 install google-cloud-speech

Collecting google-cloud-speech
[?25l  Downloading https://files.pythonhosted.org/packages/22/db/626e11ef366d5654a2c3dc80f8617e2fb7b0a26dd3fd6ff03318946a7bde/google_cloud_speech-2.5.0-py2.py3-none-any.whl (119kB)
[K     |████████████████████████████████| 122kB 8.0MB/s 
Collecting proto-plus>=1.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/8c/72/6f3f4cdc5bb0294f8d7f3f8aacb617b4c3cb17554ed78f7e28009162c795/proto_plus-1.19.0-py3-none-any.whl (42kB)
[K     |████████████████████████████████| 51kB 8.1MB/s 
[?25hCollecting libcst>=0.2.5
[?25l  Downloading https://files.pythonhosted.org/packages/0d/47/ad8ba60c667252be40be06073b58e3234e960ec70608579518e97b368994/libcst-0.3.19-py3-none-any.whl (513kB)
[K     |████████████████████████████████| 522kB 42.3MB/s 
Collecting pyyaml>=5.2
[?25l  Downloading https://files.pythonhosted.org/packages/7a/a5/393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a/PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636kB)
[K     |█████

In [None]:
def response(sentence):
    sent_seq = []
    
    print(sentence)
    #doc = nlp(repr(sentence))
    doc=word_tokenize(sentence)

    
    # split the input sentences into doc
    for token in doc:
        print(token)
        if token in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token])

        # handle the unknown words error
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    # predict the category of input sentences
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)
    
    # choice a random response for predicted sentence
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]

# chat with bot
print("Note: Enter 'quit' to break the loop.")
while True:
    input_ = input('You: ')
    if input_.lower() == 'quit':
        break
    res, typ = response(input_)
    print('Bot: {} -- TYPE: {}'.format(res, typ))
    print()

Note: Enter 'quit' to break the loop.


KeyboardInterrupt: ignored

In [None]:
def chat():
    print("Chatbot ile konuşmaya başlayabilirsiniz (quit yazarak çıkabilirsiniz)!")
    while True:
        inp = input("You: ")
        if inp.lower() == "quit":
            break
        results = model.predict(np.asanyarray([bag_of_words(inp, words)]))[0]
        print(results)
        results_index = numpy.argmax(results)
        tag = labels[results_index]
        if results[results_index] > 0.70:
            
            for tg in data["intents"]:
                if tg['tag'] == tag:
                    responses = tg['responses']
            print(random.choice(responses))
        else:
            print("Tam olarak anlayamadım")
chat()