In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Install required packages

In [None]:
!pip install simpletransformers
!pip install transformers["ja"]
!pip install -U spacy
!python -m spacy download ja_core_news_lg
!pip install -U sentence-transformers

### Load requried packages

In [1]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sentence_transformers import SentenceTransformer, util
from simpletransformers.seq2seq import Seq2SeqModel
import pandas as pd
import numpy as np
import spacy
import requests
import datetime as dt
import random
import time
import torch
from scipy.special import softmax
import logging
from sklearn.utils import shuffle
import pickle
import json
import os
import warnings
warnings.filterwarnings('ignore')

## Accessories

In [3]:
FILES_DIRECTORY = './drive/MyDrive/chatbot/'

### Data pre-processing

In [4]:
# read excel file
data = pd.read_excel(FILES_DIRECTORY + '/train_intent.xlsx')
data.head()
data = shuffle(data)
# Get all intents as list
intents = data['intent'].unique().tolist()
# map labels with index of the list
data['map labels']  = data.intent.map(lambda v: intents.index(v))
#Intents training data ["text", "labels"]
train_data_intents = data[['Q', 'map labels']].rename(columns={'Q': "text",
                                                      'map labels':"labels"})
train_data_general = data[data['intent']=='general-enquiry'][['Q', 'A']].rename(columns={'Q': "input_text", 
                                                                                 'A':"target_text"})


In [5]:
# Save intents
with open(FILES_DIRECTORY +'/intents.json', 'w') as file:
    json.dump(intents, file)

### Intent Recognition Model

#### Training the model

In [None]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


#defining paramenters
model_type = "bert" 
model_name = "cl-tohoku/bert-base-japanese-whole-word-masking"
output_directory = FILES_DIRECTORY + '/models/model_intent/'
cuda_available = torch.cuda.is_available()
hide_progress = False

model_args = {
    "reprocess_input_data": True,
    "output_dir": output_directory,
    "overwrite_output_dir": True,
    "train_batch_size": 32,
    "num_train_epochs": 50,
    "save_eval_checkpoints": False,
    "save_steps": -1,
    "save_model_every_epoch": False,
    "use_multiprocessing": False,
    "manual_seed": 4,
    "no_cache": True,
    "evaluate_during_training_steps": 2000,
    "silent": hide_progress,
}


model = ClassificationModel(
    model_type,
    model_name,
    num_labels=len(intents),
    args=model_args,
    use_cuda=cuda_available
) 

#training the model
model.train_model(train_data_intents)

#### Loading saved model

In [7]:
# defining accessories
cuda_available = torch.cuda.is_available()
model_directory = FILES_DIRECTORY + "/models/model_intent"
model_args = FILES_DIRECTORY + "/models/model_intent/model_args.json" 
model_type =  "bert"

model_intent = ClassificationModel(
    model_type,
    model_name = model_directory,
    args=model_args,
    use_cuda=cuda_available,
)

with open(FILES_DIRECTORY + '/intents.json') as file:
   intents = json.load(file)

### Predictions

In [8]:
def predict_intent(sentence, model_intent):
  model_intent.args.silent= True
  model_intent.args.use_multiprocessing_for_evaluation = False 
  predictions, raw_outputs = model_intent.predict([sentence])
  probabilities = softmax(raw_outputs, axis=1)
  results = {'label': intents[predictions[0]], 
           'score': probabilities[0][predictions[0]] }
  return results

### Entity Extraction

In [9]:
def get_entities(text):
  nlp = spacy.load('ja_core_news_lg') # add this while initialize the main
  doc = nlp(text, disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])
  entities = {}
  for ent in doc.ents:
    entities[ent.text] = ent.label_

  return entities

### Sentence BERT Transformers

#### Queries embedding/training

In [None]:
model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

# get list of training queries and responses
train_queries = train_data_general['input_text'].values.tolist()
train_responses = train_data_general['target_text'].values.tolist()

# encode queries
embeddings = model.encode(train_queries, convert_to_tensor=True)

emb_dir = FILES_DIRECTORY + "/models/model_general/"

# Create if directory doesn't exists
if not os.path.exists(emb_dir):
    os.mkdir(emb_dir)

#Store data & embeddings on disc
with open(emb_dir + 'embeddings.pkl', "wb") as fOut:
    pickle.dump({'queries': train_queries, 'embeddings': embeddings, 'responses': train_responses}, fOut, protocol=pickle.HIGHEST_PROTOCOL)

#### Loading Model with accessories

In [11]:
model_general = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

#Load data & embeddings from disc
emb_dir = FILES_DIRECTORY + "/models/model_general/embeddings.pkl"
with open(emb_dir, "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_queries = stored_data['queries']
    stored_embeddings = stored_data['embeddings']
    stored_responses = stored_data['responses']

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/paraphrase-xlm-r-multilingual-v1
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda


### Responses

In [12]:
def get_greeting_response():
  greeting_responses = ['おはよう～！今日も元気？', 
                        'こんにちは、元気ですか？',
                        'おはようございます、どうすればお手伝いできますか？',

                        ] 
  return random.choice(greeting_responses) # select random response, TODO: Add more entries 

def get_weather_response(user_query):
  date = '今日は、'
  place = ''
  days_list = ['月曜日', '火曜日', '水曜日', '木曜日', '金曜日', '土曜日', '日曜日']
  entities = get_entities(user_query)
  if 'GPE' in entities.values(): #TO DO: Improvement for weather forecasting etc..
    # get first location  
    location = [key for key in entities if (entities[key] == 'GPE')][0]
    place = f'の{location}は、'
  else:
    location = '東京'
  if 'DATE' in entities.values():
    #[Monday, Sunday, ..]
    days_list = ['月曜日', '火曜日', '水曜日', '木曜日', '金曜日', '土曜日', '日曜日']
    date_ent = [key for key in entities if (entities[key] == 'DATE')][0]
  else:
    date_ent = {}
    # today
  if '今日' in user_query:
    cnt = 0
    date = '今日は、'
  #tomorrow
  elif '明日' in user_query:
    cnt = 1
    date = '明日は、'
  # day after tomorrow
  elif '明後日' in user_query:
    cnt = 2
    date = '明後日は'
  elif '次の週' in user_query:
    cnt = 7
    date = '来週は、'
  elif date_ent in days_list:
    current_weekday = dt.datetime.today().weekday()
    updated_days_list = days_list[current_weekday:] + days_list[:current_weekday]
    cnt = updated_days_list.index(date_ent)
    date = f'{date_ent}は、'
  url = f"http://api.openweathermap.org/data/2.5/weather?q={location}&lang=ja&appid=1c412139fa61a76f27f9683cf5937117"
  response = requests.get(url)
  if response.status_code == 200:
    minimum_temperature = response.json()['main']['temp_min']
    max_temperature = response.json()['main']['temp_max']
    weather = response.json()['weather'][0]['description']
    return f'{date}{place}最高気温{max_temperature}度、最低気温{minimum_temperature}度、{weather}です。'
  else:
    # City not found!
    return "都市が見つかりません！"
  
def get_time_response():
  current_time = dt.datetime.now()
  hour = current_time.strftime("%H")
  minutes = current_time.strftime("%M")
  return f'今の時間は{hour}時{minutes}分だよ' 

def get_alarm_response(user_query):
  alarm_time = '30 秒'
  print(f'{alarm_time}だね！今から数えるよ、スタート！')
  # define the countdown func.
  def countdown(t):
    while t>-1:
        mins, secs = divmod(t, 60)
        timer = '{:02d}:{:02d}'.format(mins, secs)
        print('\r', timer, end="")
        time.sleep(1)
        t -= 1
  set_timer = 30     
  # function call
  countdown(int(set_timer))
  return #f'{alarm_time}だね！今から数えるよ、スタート！'

def get_date_response(user_query):
  today = dt.datetime.today()
  month = today.month
  day = today.day
  return f'今日は{month}月{day}日だよ！'

def get_response_common_enquiry(user_query):
  return 'おかえり！今日も楽しかったかな？'

def get_response_friend_enquiry(user_query):
  return 'おかえり！今日は{user_friend_name}と遊んだかな？'

def get_response_teacher_enquiry(user_query):
  return 'おかえり！今日は{user_teacher_name}先生と仲良くできた？'

def get_response_school_enquiry(user_query):
  return 'おかえり！今日は{user_aftershcool}の日だったかな？楽しかったかな？{robot_name}に聞かせてね！'

def get_response_robot_gender_enquiry(user_query):
  return 'みんな気になるよね～。あんまり教えたくはないんだけど、{robot_name}は{gender}なんだよね。'

def get_response_robot_birthplace_enquiry(user_query):
  return '{robot_name}は{birth_place}から来たんだよ！'

def get_response_robot_age_enquiry(user_query):
  return '{robot_name}は{user_age}だよ！{user_name}と同じかな？'

def get_response_robot_favorite_enquiry(user_query):
  return '{robot_name}は{robot_favorite}が好きなんだー！'

def get_response_song_enquiry(user_query):
  return '{robot_name}は歌を歌うの好きなんだ！{song_name}でも歌おうかなー！'

def get_general_response(user_query, model, stored_embeddings, stored_responses):
  query_embedding = model.encode([user_query], convert_to_tensor=True,  show_progress_bar= False)
  hits = util.semantic_search(query_embedding, stored_embeddings, top_k=1)
  hits = hits[0]
  if hits[0]['score'] > 0.3:
    response = stored_responses[hits[0]['corpus_id']]
  else:
    response = 'すみません、わかりません！'
  return response

#TO DO: DB integration + add  new intent responses ....

## Actions

In [13]:
def action(user_query, intent, score, model, stored_embeddings, stored_responses):
  if (score < 0.4):
    return '申し訳ありませんが、わかりません。'
  elif (intent == 'time-enquiry'):
    return get_time_response()
  elif (intent == 'weather-enquiry'):
    return get_weather_response(user_query)
  elif (intent == 'alarm-enquiry'):
    return get_alarm_response(user_query)
  elif (intent == 'date-enquiry'):
    return get_date_response(user_query)
  elif (intent == 'response-common-enquiry'):
    return get_response_common_enquiry(user_query)
  elif (intent == 'response-friend-enquiry'):
    return get_response_friend_enquiry(user_query)
  elif (intent == 'response-teacher-enquiry'):
    return get_response_teacher_enquiry(user_query)
  elif (intent == 'response-school-enquiry'):
    return get_response_school_enquiry(user_query)
  elif (intent == 'robot-gender-enquiry'):
    return get_response_robot_gender_enquiry(user_query)
  elif (intent == 'robot-birthplace-enquiry'):
    return get_response_robot_birthplace_enquiry(user_query)
  elif (intent == 'robot-age-enquiry'):
    return get_response_robot_age_enquiry(user_query)
  elif (intent == 'robot-favorite-enquiry'):
    return get_response_robot_favorite_enquiry(user_query)
  elif (intent == 'song-enquiry'):
    return get_response_song_enquiry(user_query)
  elif (intent == 'general-enquiry'):
    return get_general_response(user_query, model, stored_embeddings, stored_responses)  
  else:
    return 'This feature is coming soon!'
#TO DO: Add more actions ...

## Chatbot -- Interface

In [14]:
# ChatBot
bot_name = 'bot'
user_name = 'user name'
print("let's chat! Type quit to exit.")
while True:
  sentence = input('you: ')
  if sentence =='quit':
    break
  result = predict_intent(sentence, model_intent)
  response = action(sentence, result['label'], result['score'], model_general, stored_embeddings, stored_responses)
  # replace @user with user name
  response = response.replace('@ユーザ', user_name)
  print(f"{bot_name}: {response}")

let's chat! Type quit to exit.
you: うん。あんまりできなかった。


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 難しかったんだね。わからなかったところは見直しておくといいよ。
you: 明日の品川の天気は？


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 都市が見つかりません！
you: 明後日の品川の天気教えて


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 都市が見つかりません！
you: 天気はどう？


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 今日は、最高気温289.2度、最低気温285.59度、薄い雲です。
you: 今日の天気はどう？


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 今日は、最高気温289.2度、最低気温285.59度、薄い雲です。
you: どう？おいしいでしょ。


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: うん。すごくおいしいよ。user nameーちゃんはお料理上手だね。
you: 計算問題が苦手だな


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: やさしい問題をまずはゆっくり解いてみよう！繰り返しやるとだんだんできるようになるよ。
you: トイレ行かない


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 眠ってるときにトイレに行きたくなるかもしれないから、先に行ってから寝ようね。
you: もったいないって何？


INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


bot: 何か食べ残したり、使い残したりするとむだになってしまうから、そうならないようにしようねということだよ。
you: quit


### Prediction on whole General Data

In [21]:
# For the purpose of evaluation ... 
res_list = []
for inp in train_data_general['input_text']:
  res_list.append(get_general_response(inp, model, stored_embeddings, stored_responses))

train_data_general['predicted'] = res_list
# Save predicted response of general to excel
train_data_general.to_excel('predicted.xlsx')