In [36]:
import numpy as np
import pandas as pd
import re
import jieba
from datetime import datetime, timedelta
from dateutil.parser import parse
import jieba.posseg as psg
import requests
import json
import pygame
from aip import AipSpeech

In [56]:
#DaBaiChatBot
'''
属性说明:
self.state1_i: 一个四维坐标，用list存储，表示当前槽位填充情况
self.intent1_i: 当前的用户意图
self.slot: 槽位信息，用字典存储，共四个key，依次是——department,hospital,time,position
self.action: 机器人的动作，包括继续提问、返回号源信息、处理未知意图
self.userPosition:用户的地址，一个字符串，格式为省市（市区） 
self.countPosition:调用askPosition()函数的次数
self.countTime:调用askTime()函数的次数
self.countHospital:调用askHospital()函数的次数
————————————————————————————————————————
函数说明:
play(self, filename):输出返回结果的语音信息
record(self,word,filename):将查询结果写入.mp3文件
NUL(self,sentence):NUL模块，理解用户语义
DST(self,intent_i,state_i):DST模块,确定当前用户的意图和槽位状态
DPL(self):DPL模块,确定下一步动作
slot_fulfill(self, sentence, state_i):提取槽位信息
diseaseToDepartment(self,description):根据症状判断科室，description表示描述症状的语句
getTime(self,sentence):从用户语句中提取挂号时间
getPosition(self,sentence):从用户语句中提取医生职位
getHospital(self,sentence):从用户语句中提取医院名称
getDepartment(self,sentence):从用户语句中提取科室名称
getSources(self,v3,data):根据槽位信息查询号源
showRegristration(self):返回查询结果
————————————————————————————————————————
必要的文件:
THUOCL_medical.txt —— 医学词库
症状关键词和疾病名称.txt —— 描述症状的关键词，按科室划分
————————————————————————————————————————
intent：三个
time：时间/号源已过期
响应函数：！！！！！！！！！！！！！！
————————————————————————————————————————
用数字表示职位高低!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
主任医师：1
副主任医师、副教授：2
主治医师、医师、暂无：3
        
'''
class DaBaiChatBot():
    def __init__(self,userPosition):
        #定义所需要的属性
        #注意下标：0——department相关,1——hospital,2——time,3——position
        self.state1_i = [0,0,0,0]
        self.intent1_i = None 
        self.slot = {
            'department':None,
            'hospital':None,
            'time':None,
            'position':None
        }
#         self.action = {
#             'askDepartment':self.askDepartment(),
#             'askHospital':self.askHospital(),
#             'askTime':self.askTime(),
#             'askPosition':self.askPosition(),
#             'showRegistration ':self.showRegistration(), #展示挂号信息 time=当天没有号源/department=推荐科室：科室名称
#             'bePerplexed':self.bePerplexed(), #听不懂你在说什么
#             'adviceDepartment': self.adviceDepartment() #None的情况
#         }
        self.action = ['askDepartment','askHospital','askTime','askPosition','showRegistration','bePerplexed','adviceDepartment']
        self.userPosition = userPosition
        self.countHospital = 0
        self.countPosition = 0
        self.countTime = 0
        
    def play(self, filename):
        #mixer的初始化
        pygame.mixer.init()  
        #载入一个音乐文件用于播放
        music = pygame.mixer.music.load('audio/'+str(filename))
        #检查是否正在播放音乐
        if pygame.mixer.music.get_busy()==False: 
            #开始播放音乐流  
            pygame.mixer.music.play()  
    
    def record(self, word, filename):
        APP_ID = '26338036'
        API_KEY = 'PncYxkfOrVIHfjLGhNgpfY2w'
        SECRET_KEY = 'nQEWzx9lBjC7RXn8jORp4nRvs7LbpAne'
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 
        result  = client.synthesis(word, 'zh', 1, {'vol': 5,'per':0}) 
        if not isinstance(result, dict):
            with open('audio/'+str(filename)+'.mp3', 'wb') as f:
                f.write(result)
    
    #NLU模块：识别用户语义
    def NLU(self, sentence):
    
        intent_i = None
        state_i = [0, 0, 0, 0]
        # 返回值为 state_i, intent_i
        return self.intent_recognization(sentence, state_i, intent_i)
    
    #DST模块：确定当前用户的意图和槽位状态
    def DST(self,intent_i,state_i):
        #更新用户的意图
        self.intent1_i = intent_i
        #更新用户的槽位填充情况：共有4个槽位
        self.state1_i = [self.state1_i[n] + state_i[n] for n in range(4)]
        #如果有槽位数值大于一的情况，说明用户再次提及或者修改了槽的内容，不考虑前后不一致的情况
        for n in range(4):
            if self.state1_i[n] > 1:
                self.state1_i[n] = 1
    
    #DPL模块：确定下一步动作
    def DPL(self):
        next_action = ''
        #判断当前意图,如果为空，说明当前意图不明
        if self.intent1_i == '不挂号':
            next_action = 'bePerplexed'
            return next_action
        #如果意图不为空，判断槽位填充情况
        elif self.intent1_i == '问诊':
            next_action = 'adviceDepartment'
            return next_action
        #如果槽位已填充完整，给出号源信息
        if 0 not in self.state1_i:
            next_action =  'showRegistration'
            return next_action
        #如果槽位未填充完整
        else:
            index = self.state1_i.index(0)
            actions = self.action
            next_action = actions[index]
            return next_action
        
        
    #NLG模块整合：完成用户动作
    def NLG(self,next_action):
        if next_action == 'askDepartment':
            return self.askDepartment()
        elif next_action == 'askHospital':
            return self.askHospital()
        elif next_action == 'askTime':
            return self.askTime()
        elif next_action == 'askPosition':
            return self.askPosition()
        elif next_action == 'showRegistration':
            return self.showRegistration()
        elif next_action == 'bePerplexed':
            return self.bePerplexed()
        elif next_action == 'adviceDepartment':
            return self.adviceDepartment()
    
    
    # 意图识别：挂号/症状识别/不挂号
    def intent_recognization(self, sentence, state_i, intent_i):

        order_expression = ["[挂](.*?)[的号]", "[挂号](.*?)", "[看](.*?)", "[查](.*?)", "[预约](.*?)", "[挂](.*?)", "[在](.*?)[预约]", "[在](.*?)[挂号]"]
        diagnosis_expression = ["(.*?)[什么]", "(.*?)[哪]", "(.*?)[怎样]","[治疗](.*?)"]

        for ex in order_expression:
            ex = re.compile(ex)
            sub_sentence = re.findall(ex, sentence)
            # 包含挂号关键词：挂号
            if len(sub_sentence) > 0:
                intent_i = '挂号'
                state_i = self.slot_fulfill(sentence, state_i)
            # 不包含挂号关键词
            else:
                state_i = self.slot_fulfill(sentence, state_i)
                # 包含槽位信息：挂号
                if state_i.any() != 0:
                    intent_i = '挂号'
                # 不包含槽位信息：不挂号
                else:
                    intent_i = '不挂号'
                    for ex in diagnosis_expression:
                        ex = re.compile(ex)
                        sub_sentence = re.findall(ex, sentence)
                         # 包含症状：推荐科室
                        if len(sub_sentence) > 0:
                            # 可以判断科室
                            intent_i = '问诊'
                            self.slot["department"] = self.diseaseToDepartment(sentence)
                            state_i[0] = 1

            return state_i, intent_i
    
    # 调用四个函数，返回槽位信息
    def slot_fulfill(self, sentence, state_i):
        self.getDepartment(sentence)
        # 这里的area是用户界面获取的地理位置信息
        self.getHospital(sentence)
        self.getTime(sentence)
        self.getPosition(sentence)

        # 得到当前轮的状态
        state_i = np.array([self.slot["department"]!=None, self.slot["hospital"]!=None, self.slot["time"]!=None, self.slot["position"]!=None]).astype(int)

        return state_i
    
    # 获取时间信息
    def getTime(self, sentence):

        jieba.add_word("大大后天")
        datetime_now =datetime.now()

        # 预定义两个词典，中文数词和量纲转化成数字
        UTIL_CN_NUM = {
            '零': 0, '一': 1, '二': 2, '两': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9,
            '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
        }
        UTIL_CN_UTIL = {'十': 10, '百': 100, '千': 1000, '万': 10000}

        #对日期有效性进行判断的函数
        def check_time_valid(word):

            m = re.match('\d+$', word)     # match()匹配成功返回对象，否则返回None，
            if m:                 
                # 当正则表达式匹配成功时，判断句子的长度是否小于等于6，如果小于等于6，则返回None
                 # 小于等于6的意思是指非准确日期，比如2020
                if len(word) <= 6:
                    return None

            #把“号”和“日”统一替换为日（如果号/日 后面还跟着莫名其妙的数字都抹掉）
            word_1 = re.sub('[号|日]\d+$', '日', word)  
            if word_1 != word:      
                # 如果清洗出来的句子与原句子不同，则递归调用
                return check_time_valid(word_1)
            else:       
                # 如果清洗出来的句子与原句子相同，则返回任意一个句子
                return word_1   

        #解析年份的函数，将中文或者阿拉伯数字统一转换为阿拉伯数字的年份
        def year2dig(year):

            res = ''
            for item in year:
                # 循环遍历这个年份的每一个字符
                if item in UTIL_CN_NUM.keys():
                    # 如果这个字在UTIL_CN_NUM中，则转换为相应的阿拉伯数字
                    res = res + str(UTIL_CN_NUM[item])
                else:
                    # 否则直接相加
                    res = res + item

            m = re.match("\d+", res)
            if m:
                # 当m开头为数字时，执行下面操作，否则返回None
                if len(m.group(0)) == 2:
                    #把两位的年份表达转化为四位，eg“一五年”实际指的是2015
                    return int(datetime.today().year / 100) * 100 + int(m.group(0))
                else:
                    # 否则直接返回该年份
                    return int(m.group(0))
            else:
                return None
        #year2dig函数的处理效果：
        #year2dig("一三年") ，year2dig("一三") 结果：2013
        #year2dig("二零一五年") ，year2dig("二零一五") 结果：2015
        #year2dig("2019") 结果：2019

        #对除年份以外的其他时间的解析
        def cn2dig(src):

            if src == "":
                # 如果src为空，直接返回None
                return None
            m = re.match("\d+", src)
            if m:
                # 如果m是数字则直接返回该数字
                return int(m.group(0))
            rsl = 0
            unit = 1
            for item in src[:: -1]:
                # 从后向前遍历src
                if item in UTIL_CN_UTIL.keys():
                    # 如果item在UTIL_CN_UTIL中，则unit为这个字转换过来的阿拉伯数字
                    # 如src为"三十"，第一个item为"十"，unit为10
                    unit = UTIL_CN_UTIL[item]
                elif item in UTIL_CN_NUM.keys():
                    # 如果item不在UTIL_CN_UTIL而在UTIL_CN_NUM中，则转换为相应的阿拉伯数字并且与unit相乘
                    # 如"三十"，第二个字为"三"，num为3，rsl为30
                    num = UTIL_CN_NUM[item]
                    rsl += num * unit
                else:
                    # 如果都不在，则不是数字，返回None
                    return None

            if rsl < unit:
                # 如"十五"，先执行上面的elif，即rsl = 5，再执行if，即unit = 10，这时rsl < unit，那么执行相加操作
                rsl += unit

            return rsl
        #cn2dig函数的作用：中文表达转数字
        #cn2dig("二十三")  结果：23

        #将提取到的文本日期串进行时间转换
        def parse_datetime(msg):

            if msg is None or len(msg) == 0:
                # 如果之前清洗失误或者其他原因造成的句子为空，则返回None
                return "句子为空"

            #通过正则表达式将日期串进行切割，分成'年''月''日''时''分''秒'等具体维度，
            #然后针对每个子维度单独再进行识别

            #r表示原生字符串
            m = re.match(r"([0-9零一二两三四五六七八九十]+ 年)? ([0-9一二两三四五六七八九十]+ 月)? "
                             r"([0-9一二两三四五六七八九十]+ [号日])? ([上下午]+)?"
                             r"([0-9零一二两三四五六七八九十百]+[点:.时])?([0-9零一二三四五六七八九十百]+ 分?)?"
                             r"([0-9零一二三四五六七八九十百]+ 秒)?", msg)
            if m and m.group(0) is not None:
                res = {
                    'year': m.group(1),
                    "month": m.group(2),
                    "day": m.group(3),
                    "ampm":m.group(4),
                    "hour": m.group(5) if m.group(5) is not None else '00',                
                    "minute": m.group(6) if m.group(6) is not None else '00',
                    "second": m.group(7) if m.group(7) is not None else '00',
                }
                params = {}
                for name in res.keys():
                    if res[name] is not None and len(res[name]) != 0:
                        if name == 'year':
                            # 如果是年份，tmp就进入year2dig函数
                            tmp = year2dig(res[name][: -1])
                        else:
                            # 否则就是其他时间，则进入cn2dig函数
                            tmp = cn2dig(res[name][: -1])
                        if tmp is not None:
                            # 当tmp之中存在阿拉伯数字的时候，params就为该tmp
                            params[name] = int(tmp)

                    # 使用今天的时间格式，然后将数字全部替换为params[]中的内容
                target_date = datetime.today().replace(**params)

                #判断是不是今明两天，如果是，则上下午可能需要调整
                if res["ampm"] is None:
                    #如果是今天
                    #因为涉及到“4”和“04”的问题，所以对1-9月和10-12月分开讨论
                    if datetime_now.month<10:
                        if res["year"]==str(datetime_now.year)+" 年" and res["month"]=="0"+str(datetime_now.month)+" 月" \
                        and res["day"]==str(datetime_now.day)+" 日":
                            if datetime_now.hour<11 or (datetime_now.hour==11 and datetime_now.minute<=30):
                                res["ampm"]="上午"
                            elif datetime_now.hour<17 or (datetime_now.hour==17 and datetime_now.minute<=30):
                                res["ampm"]="下午"
                            else:
                                res["ampm"]="今天号源已过期"
                        else:
                            res["ampm"]="上午"
                    else:
                        if res["year"]==str(datetime_now.year)+" 年" and res["month"]==str(datetime_now.month)+" 月" and res["day"]==str(datetime_now.day)+" 日":
                            if datetime_now.hour<11 or (datetime_now.hour==11 and datetime_now.minute<=30):
                                res["ampm"]="上午"
                            elif datetime_now.hour<17 or (datetime_now.hour==17 and datetime_now.minute<=30):
                                res["ampm"]="下午"
                            else:
                                res["ampm"]="今天号源已过期"
                        else:
                            res["ampm"]="上午"
                return target_date.strftime("%Y-%m-%d")+ " "+ res["ampm"]
            else:
                m = re.match("([0-9零一二两三四五六七八九十]+年)?([0-9一二两三四五六七八九十]+月)?([0-9一二两三四五六七八九十]+[号日])?([上中下午晚早]+)?([0-9零一二两三四五六七八九十百]+[点:\.时])?([0-9零一二两三四五六七八九十百]+分)?([0-9零一二三四五六七八九十百]+秒)?",msg)
            if m and m.group(0) is not None:
                res = {
                    'year': m.group(1),
                    "month": m.group(2),
                    "day": m.group(3),
                    "ampm": m.group(4),
                    "hour": m.group(5) if m.group(5) is not None else '00',                
                    "minute": m.group(6) if m.group(6) is not None else '00',
                    "second": m.group(7) if m.group(7) is not None else '00',
                }
                params = {}

                for name in res.keys():
                    #print(name)
                    if res[name] is not None and len(res[name]) != 0:
                        if name == 'year':
                            # 如果是年份，tmp就进入year2dig函数
                            tmp = year2dig(res[name][: -1])
                            #print(tmp)
                        else:
                            # 否则就是其他时间，则进入cn2dig函数
                            tmp = cn2dig(res[name][: -1])
                        if tmp is not None:
                            # 当tmp之中存在阿拉伯数字的时候，params就为该tmp
                            params[name] = int(tmp)

                    # 使用今天的时间格式，然后将数字全部替换为params[]中的内容
                target_date = datetime.today().replace(**params)

                if res["ampm"] is None:
                    #如果是今天
                    #因为涉及到“4”和“04”的问题，所以对1-9月和10-12月分开讨论
                    if datetime_now.month<10:
                        if res["year"]==str(datetime_now.year)+" 年" and res["month"]=="0"+str(datetime_now.month)+" 月" \
                        and res["day"]==str(datetime_now.day)+" 日":
                            if datetime_now.hour<11 or (datetime_now.hour==11 and datetime_now.minute<=30):
                                res["ampm"]="上午"
                            elif datetime_now.hour<17 or (datetime_now.hour==17 and datetime_now.minute<=30):
                                res["ampm"]="下午"
                            else:
                                res["ampm"]="今天号源已过期"
                        else:
                            res["ampm"]="上午"
                    else:
                        if res["year"]==str(datetime_now.year)+" 年" and res["month"]==str(datetime_now.month)+" 月" and res["day"]==str(datetime_now.day)+" 日":
                            if datetime_now.hour<11 or (datetime_now.hour==11 and datetime_now.minute<=30):
                                res["ampm"]="上午"
                            elif datetime_now.hour<17 or (datetime_now.hour==17 and datetime_now.minute<=30):
                                res["ampm"]="下午"
                            else:
                                res["ampm"]="今天号源已过期"
                        else:
                            res["ampm"]="上午"
                return target_date.strftime("%Y-%m-%d")+ " "+ res["ampm"]
                #如果用户没有提供日期，则根据当前时间选择最优解

        clean_text=sentence #clea_text是处理后的数据

        #为了避免挂号的“号”字被误判为日期，将文本中与挂号相关的号字去除
        hao_lst=["专家号","普通号","的号","挂号","号源"]
        for h in hao_lst:
            if h in clean_text:
                clean_text = clean_text.replace(h,"");

        time_res = []
        word = ''
        key_date = {'今天': 0, "今日":0, '当天': 0, '明天': 1, "明日":0,'后天': 2 ,"大后天":3, "大大后天":4 }

        for k, v in psg.cut(clean_text) :
            # k: 词语, v: 词性
            if k in key_date:
                # 当k存在于key_date中时
                if word != '':
                    # 如果word不为空时, 列表中添加相应的词语
                    time_res.append(word)
                # 获取系统当前时间，并且获取句子中时间的跨度(0, 1, 2)，通过当前时间 + 时间跨度获得几天后的时间
                word = (datetime.today() + timedelta(days=key_date.get(k, 0))) \
                    .strftime('%Y {0} %m {1} %d {2} ').format('年', '月', '日')
            elif word != '':

                # 如果k不存在于key_date时，word不为空
                if v in ['m', 't']:
                    # 当词性为数字或时间时，添加至word中
                    word = word + k
                else:
                    # 当词性不为数字或时间时，将word放入time_res，同时清空word
                    if "个" not in str(word):
                        time_res.append(word)
                        word = ''
            elif v in ['m', 't']:
                if k not in ["目前","当前"] and "个" not in str(k):
                # 当k不存在于key_date中，且word为空时，如果词性是数字或时间时，word为该词语
                    word = k
        if word != '':
            # word中可能存放的值:
            #   1. 通过词性标注后获得的时间跨度后的时间
            #   2. 非key_date中的时间或数字
            # 即只有k不存在于key_date，word不为空，词性不为数字或时间时，word才为空，进入不了这个if语句
            time_res.append(word)

        # 如果返回的结果是None，则直接清洗，否则放入集合中
        result = list(filter(lambda x: x is not None, [check_time_valid(w) for w in time_res]))
        final_res = [parse_datetime(w) for w in result]
        #return [x for x in final_res if x is not None]
        if len(final_res) !=0:
            self.slot["time"] = final_res[0]
            return self
        
        # 当槽信息为空时
        else:
            try:
                dt = parse(text, fuzzy=True)
                if dt.date() < datetime_now.date():
                    self.slot['time'] = '当天号源已过期'
                    return self
                elif dt.date()>datetime_now.date(): 
                    self.slot["time"] = dt.strftime("%Y-%m-%d")+" "+"上午"
                    return self
                else:
                    if datetime_now.hour<11 or (datetime_now.hour==11 and datetime_now.minute<=30):
                        self.slot["time"] = dt.strftime("%Y-%m-%d")+ " "+ "上午"
                        return self

                    elif datetime_now.hour<17 or (datetime_now.hour==17 and datetime_now.minute<=30):
                        self.slot["time"] = dt.strftime("%Y-%m-%d")+ " "+ "下午"
                        return self

                    else:
                        self.slot["time"] == "未知"
                        return self

            except Exception as e:
                return self
#                 print("I am here!")
#                 if self.countTime > 0:
#                     self.slot['time'] ='未知'
                    
    # 获取职位信息
    def getPosition(self, sentence):    
        '''
        用数字表示职位高低
        主任医师：1
        副主任医师、副教授：2
        主治医师、医师、暂无：3
        '''
        class_one = ['主任', '专家', '教授', '正主任','研究员']
        class_two = ['副主任','副教授','副研究员']
        class_three_1 = ['主治','主管', '住院', '助教', '助理','普通号']
        class_three_2 = ['主治','主管', '住院', '助教', '助理','普通']

        for ct in class_two:
            if ct in sentence:
                self.slot['position'] = '2'
                return self

        for co in class_one:
            if co in sentence:
                self.slot['position'] = '1'
                return self
        if self.countPosition == 0:
            for cth in class_three_1:
                if cth in sentence:
                    self.slot['position'] = '3'
                    return self
        else:
            for cth in class_three_2:
                if cth in sentence:
                    self.slot['position'] = '3'
                    return self
        
        if self.countPosition > 0:
            self.slot['position'] = '未知'
        
        return self
    
    # 获取医院信息
    def getHospital(self, sentence):
        # 导入department.csv和department_dict.txt文件
        hospital_csv = pd.read_csv('hospital.csv', encoding='gbk')

        hospital_list = []
        for i in range(len(hospital_csv)):
            hcl = hospital_csv.iloc[i].tolist()
            hcl = [x for x in hcl if pd.isnull(x) == False]
            hospital_list.append(hcl)
        
        # 这里导入词典
        jieba.load_userdict("hospital_dict.txt")

        # 分词
        seg_sent = jieba.cut(sentence, cut_all=False)
        seg_list = list(seg_sent)

        # 匹配成功
        for seg in seg_list:
            for hl in hospital_list:
                for h in hl:
                    if seg==h:
                        self.slot["hospital"] = hl[0]
                        return self
                        
        if self.countHospital > 0 and self.slot['hospital']==None:
            self.slot['hospital'] = '未知'
        return self
    
    # 获取科室信息
    def getDepartment(self, sentence):
        # 导入department.csv和department_dict.txt文件
        department_csv = pd.read_csv('department.csv', encoding='gbk')
        department_list = []
        for i in range(len(department_csv)):
            dcl = department_csv.iloc[i].tolist()
            dcl = [x for x in dcl if pd.isnull(x) == False]
            department_list.append(dcl)
        # 这里导入词典
        jieba.load_userdict("department_dict.txt")

        # 分词
        seg_sent = jieba.cut(sentence, cut_all=False)
        seg_list = list(seg_sent)

        # 匹配成功
        for seg in seg_list:
            for dl in department_list:
                for d in dl:
                    if seg==d:
                        self.slot["department"] = dl[0]
        return self

    #根据症状判断科室
    def diseaseToDepartment(self,description):
        #导入医学词库，对用户语句进行分词
        jieba.load_userdict(r'THUOCL_medical.txt') 
        words = jieba.lcut(description)
        #导入描述症状的关键词
        with open('症状关键词和疾病名称.txt') as file: 
            text = file.read()
            lines = text.split('department:【')
        disease_keywords = {}
        for line in lines[1:]:
            key_value = line.split('】\n')
            key = key_value[0]
            value = key_value[1]
            disease_keywords[key] = value.split(' ')
        #判断科室：根据用户的病症描述中关键词的出现次数高低判断
        department,max_count = None,0 #分别表示科室、得分
        for key in disease_keywords:
            key_count = 0
            for w in words:
                if w in disease_keywords[key]:
                    key_count += 1
            if key_count > max_count:
                max_count = key_count
                department = key
        #如果未能判断出科室，会返回None
        return department
    
    #机器人响应函数
    def askDepartment(self):
        self.play('askDepartment.mp3')
        return "请告诉我您要就诊的科室？"
    
    def askHospital(self):
        self.countHospital += 1
        self.play('askHospital.mp3')
        return "请告诉我您要就诊的医院？"
    
    def askTime(self):
        self.countTime += 1
        self.play('askTime.mp3')
        return "请告诉我您要就诊的时间？"
    
    def askPosition(self):
        self.countPosition += 1
        self.play('askPosition.mp3')
        return "请告诉我您对医生职位的要求（普通/副主任/主任）？"
    
    def getSources(self, v3, data, time):
    # 将号源信息存储在source中 source[index] = source_list
        result = []
        source = dict()

        links = v3['链接'].tolist()
        indexes = v3['索引'].tolist()
        headers = {'Cookie':'_sid_=164965879506636228139915; c=d3dKbPgk-1649658797685-92062bf8bc08a-1556437995; _ipgeo=province%3A%E5%8C%97%E4%BA%AC%7Ccity%3A; _fmdata=UFDLemaXm%2F1%2BVbd%2FXL0c7t3rU79ppeKPBIFytkou2Af8bRbwhPOXt%2B8od%2B2rgYcgvXAMYmgEO6XmATsd1b%2B9Wa0rzWX3fyl50kn%2BkzQ2Q80%3D; searchHistory=%E5%8C%97%E4%BA%AC%E5%A4%A7%E5%AD%A6%E4%BA%BA%E6%B0%91%E5%8C%BB%E9%99%A2%2C%7C%E6%B8%85%E5%8D%8E%E5%A4%A7%E5%AD%A6%E7%8E%89%E6%B3%89%E5%8C%BB%E9%99%A2%2C%7C%2Cclear; _sh_ssid_=1653092739810; _e_m=1653092739815; Hm_lvt_3a79c3f192d291eafbe9735053af3f82=1650779687,1650870203,1650904782,1653092744; TDpx=4046; JSESSIONID=node02e2s10dqg8qfpocwmap4nl21257.node0; _ci_=jrTLBNibmmIB7q2zoffVCaZPTszf8kG9HRTdQ3r1/+l38CF2Q/FuqmxpGZNqVwxy9CmZeZQvyG5RL8LM6PouFqm0C5iOzgCh; _exp_=Zc0Zi9/Hna4iCQOuAvHpKgO8/s+vy6Ey1xlmkuWCECY=; __wyt__=!Ps8urNoJRC1VKbCbqHfltV0wSfRdKtNk4MY3ELO_j2rdF8EbRbBvZcrlqhWjB7rm4rY8ANZT6UrunVVxU4dzsZutjTeSMBadN3nk_bd0xUfjBXUMJG-6ChkCHOE2AKqRBSLy27MvOiI6gggz4N7IZ7IWuFnKCq3B5ERzxsT5MJlDQ; Hm_lpvt_3a79c3f192d291eafbe9735053af3f82=1653092759; _xid=0CVTtAY2eOCoF3EDHUTGCrwqcQmlxdVW4nxXmeHpEStt%2FsJ7ILqPxUs33k5lvsPouxddxjmgaGpVeiOFkvmiyg%3D%3D; _fm_code=9WPH1653092759119SouA6kou6',\
              'user-agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
        for i in range(len(links)):
            # 解析页面
            link, index = links[i], indexes[i]
            html = requests.get(link, headers=headers, timeout=30).content
            content = str(html, 'utf-8')
            info = json.loads(content)
            source_list = []
            piece = ''
            # 如果时间为固定值
            try:
                for s in info['data']['shiftSchedule']:
                    # 上午
                    if s['apm'] == 1:
                        piece = s['date']+' '+'上午'
                        source_list.append(piece)
                    # 下午
                    elif s['apm'] == 2:
                        piece = s['date']+' '+'下午'
                        source_list.append(piece)

                    # 如果时间符合条件
                    if piece==self.slot['time']:
                        result = ["医院名称："+v3.loc[v3['索引']==index, '医院名称'].values[0], "科室名称："+v3.loc[v3['索引']==index, '科室名称'].values[0], \
                                "医生姓名："+v3.loc[v3['索引']==index, '姓名'].values[0], "医生职位："+v3.loc[v3['索引']==index, '职位'].values[0], \
                                "号源信息："+self.slot['time']]
#                         print("时间是定值且符合条件")
                        return result

                if len(source_list) > 0:
                    source[index] = source_list

            # 如果查不到号源信息 
            except Exception as e:
                source[index] = source_list
#         print(source_list)       
        # 如果时间不是固定值，扩大时间范围
        if self.slot['time']=='未知':
            if len(source) > 0:
                for index in source.keys():
                    if len(source[index]) > 0:
                        for st in source[index]:
                            if st >= time:
#                                 print("时间不是定值")
                                result = ["医院名称："+v3.loc[v3['索引']==index, '医院名称'].values[0], "科室名称："+v3.loc[v3['索引']==index, '科室名称'].values[0], \
                                          "医生姓名："+v3.loc[v3['索引']==index, '姓名'].values[0], "医生职位："+v3.loc[v3['索引']==index, '职位'].values[0], \
                                          "号源信息："+st]
                                return result

        return None
    
    def showRegistration(self):
        '''v1: 已知department
           v2: 已知department、hospital
           v3: 已知department、hospital、position(职位)
           result:返回值为包含详细信息的列表
        '''
        result = []        
        data = pd.read_csv('data.csv', encoding='gbk')

        # time记录默认值填充后的时间信息
        time = ""
        
        if self.slot['hospital']=='未知':
            self.slot['hospital'] = self.userPosition
        
#         print(self.slot["time"])
        # 判断是否过期放在getTime()函数中，若为“未知”，则进行默认值填充
        if self.slot['time'] == '未知':
            now = datetime.now()
            if now.hour<11 or (now.hour==11 and now.minute<=30):
                time = now.strftime("%Y-%m-%d")+ " "+ "上午"
            elif now.hour<17 or (now.hour==17 and now.minute<=30):
                time = now.strftime("%Y-%m-%d")+ " "+ "下午"    
            else:
                time = (now+timedelta(days=1)).strftime("%Y-%m-%d")+ " "+ "上午" 
        else:
            time = self.slot['time']

        # 科室已知
        v1 = data.loc[data['科室标准名称']==self.slot["department"], :]
        # 如果医院已知
        if self.slot["hospital"] != self.userPosition:
            v2 = v1.loc[v1['医院名称']==self.slot["hospital"], :]
            # 如果职位已知
            if self.slot['position'] != '未知':
                if self.slot['position']=='1':
                    v3 = v2.loc[((v2['职位']=='主任医师')==True)]
                    result = self.getSources(v3, data, time)
                elif self.slot['position']=='2':
                    v3 = v2.loc[((v2['职位']=='副主任医师')==True) | ((v2['职位']=='副教授')==True)]
                    result = self.getSources(v3, data, time)
                elif self.slot['position']=='3':
                    v3 = v2.loc[((v2['职位']=='暂无')==True) | ((v2['职位']=='医师')==True) | ((v2['职位']=='主治医师')==True)]
                    result = self.getSources(v3, data, time)
                if result==None:
                    result =  "暂无符合条件的号源信息，请更改就诊时间"
                    self.play('noResult1.mp3')
                    return result
             # 如果职位未知
            else:
                result = self.getSources(v2, data, time)
                if result==None:
                    result = "暂无符合条件的号源信息，请更改就诊时间/医生职位"
                    self.play('noResult2.mp3')
                    return result

        # 如果医院未知
        else:
            # 如果职位已知
            if self.slot['position'] != '未知':
                if self.slot['position']=='1':
                    v2 = v1.loc[((v1['职位']=='主任医师')==True)]
                    v3 = v2.loc[v2['市（区）']== self.userPosition[2:]]
                    result = self.getSources(v3, data, time)
                elif self.slot['position']=='2':
                    v2 = v1.loc[((v1['职位']=='副主任医师')==True) | ((v1['职位']=='副教授')==True)]
                    v3 = v2.loc[v2['市（区）']== self.userPosition[2:]]
                    result = self.getSources(v3, data, time)
                elif self.slot['position']=='3':
                    v2 = v1.loc[((v1['职位']=='暂无')==True) | ((v1['职位']=='医师')==True) | ((v1['职位']=='主治医师')==True)]
                    v3 = v2.loc[v2['市（区）']== self.userPosition[2:]]
                    result = self.getSources(v3, data, time)
                if result==None:
                    result = "暂无符合条件的号源信息，请更改就诊时间/医院名称"
                    self.play('noResult3.mp3')
                    return result
            # 如果职位未知
            else:
                v2 = v1.loc[v1['市（区）']== self.userPosition[2:]]
                result = self.getSources(v2, data, time)
                if result==None:
                    result = "暂无符合条件的号源信息，请更改就诊时间/医院名称/医生职位"
                    self.play('noResult4.mp3')
                    return result
        
        result = '\n'.join(result)
        self.record(result, 'getSource')
        self.play('getSource.mp3')
        return result

    def bePerplexed(self):
        self.play('notUnderstand.mp3')
        return "对不起，大白无法理解您的意思。"
    
    def adviceDepartment(self):
        if self.slot['department']!= None:
            result = "根据您的病症，为您推荐的科室是："+self.slot['department']+"（仅供参考，具体请您询问专业医生）"
            self.record(result, 'adviceDepartment')
            self.play('adviceDepartment.mp3')
            return result
        else:
            self.play('noDepartment.mp3')
            return "对不起，大白没有诊断适合您病症的科室，请您咨询专业医生。"

In [68]:
DaBai = DaBaiChatBot('北京朝阳区')

In [69]:
#第一轮对话
userSentence = '帮我挂306医院的门诊'
#机器人动作
#NLU
state_i,intent_i = DaBai.NLU(userSentence)
#DST
DaBai.DST(intent_i=intent_i,state_i=state_i)
#DPL
next_action = DaBai.DPL()
#NLG
response = DaBai.NLG(next_action)
#打印机器人的响应语句
print(response)
print(DaBai.slot)

请告诉我您要就诊的科室？
{'department': None, 'hospital': '战略支援部队特色医学中心（原解放军第306医院）', 'time': None, 'position': None}


In [71]:
#第二轮对话
userSentence = '创口科'
#机器人动作
#NLU
state_i,intent_i = DaBai.NLU(userSentence)
#DST
DaBai.DST(intent_i=intent_i,state_i=state_i)
#DPL
next_action = DaBai.DPL()
#NLG
response = DaBai.NLG(next_action)
#打印机器人的响应语句
print(response)
print(DaBai.slot)

请告诉我您要就诊的时间？
{'department': '创口科', 'hospital': '战略支援部队特色医学中心（原解放军第306医院）', 'time': None, 'position': None}


In [74]:
#第三轮对话
userSentence = '8月24日'
#机器人动作
#NLU
state_i,intent_i = DaBai.NLU(userSentence)
#DST
DaBai.DST(intent_i=intent_i,state_i=state_i)
#DPL
next_action = DaBai.DPL()
#NLG
response = DaBai.NLG(next_action)
print(response)
print(DaBai.slot)

医院名称：战略支援部队特色医学中心（原解放军第306医院）
科室名称：创口门诊
医生姓名：创口门诊普通号
医生职位：主治医师
号源信息：2022-08-24 上午
{'department': '创口科', 'hospital': '战略支援部队特色医学中心（原解放军第306医院）', 'time': '2022-08-24 上午', 'position': '未知'}


In [73]:
#第四轮对话
userSentence = '都行'
#机器人动作
#NLU
state_i,intent_i = DaBai.NLU(userSentence)
#DST
DaBai.DST(intent_i=intent_i,state_i=state_i)
#DPL
next_action = DaBai.DPL()
#NLG
response = DaBai.NLG(next_action)
print(response)
print(DaBai.slot)

暂无符合条件的号源信息，请更改就诊时间/医生职位
{'department': '创口科', 'hospital': '战略支援部队特色医学中心（原解放军第306医院）', 'time': '2022-08-20 上午', 'position': '未知'}


In [29]:
#GUI界面展示：显示交互界面
#GUI界面展示函数：
#GUI界面——初始化ChatBot(userPosition)——交互过程：NLU-DPL-DST-NLG——打印到频幕

# from tkinter import *
import tkinter as tk
from tkinter import ttk
import time
import tkinter.font as tf

#地理的字典【根据爬到的更改一下/统一叫法，带不带“省”字“市”字，算不算市级县】
area = {
    '北京':["密云区","延庆区","朝阳区","丰台区","石景山区","海淀区","门头沟区","房山区","通州区","顺义区","昌平区","大兴区","怀柔区","平谷区","东城区","西城区"],
   
}

# 创建窗口
app = tk.Tk()
app.title('“大白”医院挂号聊天机器人')

font1 = tf.Font(family='楷体',size=12,weight=tf.NORMAL)
font2 = tf.Font(family='楷体',size=15,weight=tf.NORMAL)

w = 800
h = 660
sw = app.winfo_screenwidth()
sh = app.winfo_screenheight()
x = 200
y = (sh - h) / 2
app.geometry("%dx%d+%d+%d" % (w, h, x, y))
app.resizable(0, 0)


#选择省份
select_location=tk.Label(app, text="请选择 省/直辖市：",font=font1, fg="DimGray")
select_location.place(x=400,y=5)
select_location=tk.Label(app, text="请选择 市/区：",font=font1, fg="DimGray")
select_location.place(x=575,y=5)
province=ttk.Combobox(app)
province["value"]=(list(area.keys()))
province.place(x=400,y=25)
def province_select(*arg):
    city["value"]=area[province.get()]
    
province.bind('<<ComboboxSelected>>', province_select)
city=ttk.Combobox(app)
city.place(x=575,y=25)
def city_select(*arg):
    return city.get()
city.bind('<<ComboboxSelected>>', city_select)

#确认位置
def confirm_loc():
    if city.get()!="":
        strMsg = "大白:" + "【城市选择】"+province.get() + city.get()+'\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n'
        t1_Msg.configure(state=tk.NORMAL)
        t1_Msg.delete('0.0', "end")
        t1_Msg.insert("end", strMsg, 'green')
        t1_Msg.config(state=tk.DISABLED)
        position_str=province.get() + city.get()
        DaBai.userPosition=position_str
confirm_btn = tk.Button(text="确认", command=confirm_loc,font=font1, fg="black")
confirm_btn.place(x=750, y=20)


#初始化DaBaiChatBot(userPosition)
DaBai=DaBaiChatBot(" ")

# 北京+xx区

# 发送消息并回复
def sendMsg():
    t1_Msg.configure(state=tk.NORMAL)
    strMsg = "我: " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n'
    t1_Msg.insert("end", strMsg, 'red')
    sendMsg = t2_sendMsg.get('0.0', 'end')
    t1_Msg.insert("end", sendMsg)
    t1_Msg.insert("end",  '\n')
    t1_Msg.insert("end",  '- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \n',"green")
    #t1_Msg.insert("end",  '\n')
    strMsg = "大白: " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n'
    t1_Msg.insert("end", strMsg, 'green')
    
    
    #获取文本输入框的内容
    msg=t2_sendMsg.get("1.0","end")  
    #调用类和函数
    state_i,intent_i = DaBai.NLU(msg)
    #DST
    DaBai.DST(intent_i=intent_i,state_i=state_i)
    #DPL
    next_action = DaBai.DPL()
    #NLG
    response = DaBai.NLG(next_action)
            
    #在屏幕上打印响应语句
    t1_Msg.insert("end",response)
    t1_Msg.insert("end",  '\n\n')
    t1_Msg.config(state=tk.DISABLED)
    t2_sendMsg.delete('0.0', "end")
    
#清空输入框
def clearMsg():
    t2_sendMsg.delete('0.0', "end")

#清空历史消息界面
def clearHistory():
    t1_Msg.configure(state=tk.NORMAL)
    t1_Msg.delete('0.0', "end")
    strMsg = "大白: " + "【城市选择】"+province.get() + city.get()+'\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n'
    t1_Msg.insert("end", strMsg, 'green')
    t1_Msg.config(state=tk.DISABLED)
    
def newchat():
    t1_Msg.configure(state=tk.NORMAL)
    t1_Msg.delete('0.0', "end")
    tips = "大白: " + "Hi 欢迎使用大白！请先在右上角选择您的就诊地 (๑╹◡╹)ﾉ"+'\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n'
    t1_Msg.insert("end", tips, 'green')
    t1_Msg.config(state=tk.DISABLED)
    #将大白的成员变量恢复到最初状态
    DaBai.state1_i = [0,0,0,0]
    DaBai.intent1_i = None 
    DaBai.slot = {
        'department':None,
        'hospital':None,
        'time':None,
        'position':None
    }
    DaBai.action = ['askDepartment','askHospital','askTime','askPosition','showRegistration','bePerplexed','adviceDepartment']
    DaBai.userPosition = city.get()
    DaBai.countHospital = 0
    DaBai.countPosition = 0
    DaBai.countTime = 0
    
#第三步，放置部件

# 聊天消息预览窗口
t1_Msg = tk.Text(width=113, height=27)
t1_Msg.config(state=tk.DISABLED)
t1_Msg.tag_config('green', foreground='#008C00')  # 创建tag
t1_Msg.tag_config('red', foreground="red")  # 创建tag
t1_Msg.place(x=2, y=55)
t1_Msg.configure(state=tk.NORMAL)
tips = "大白: " + "Hi 欢迎使用大白！请先在右上角选择您的就诊地 (๑╹◡╹)ﾉ"+'\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n'
t1_Msg.insert("end", tips, 'green')
t1_Msg.config(state=tk.DISABLED)

#增加语音输出
#mixer的初始化
pygame.mixer.init()  
#载入一个音乐文件用于播放
music = pygame.mixer.music.load('audio/greet.mp3')
#检查是否正在播放音乐
if pygame.mixer.music.get_busy()==False: 
    #开始播放音乐流  
    pygame.mixer.music.play()  

 # 聊天消息发送窗口
t2_sendMsg = tk.Text(width=113, height=10)
t2_sendMsg.place(x=210, y=460)


# 发送按钮
sendMsg_btn = tk.Button(text=" 发送 ",font=font1, command=sendMsg, fg="black")
sendMsg_btn.place(x=650, y=555)

#清空输入按钮
clearMsg_btn = tk.Button(text=" 清空 ",font=font1,command=clearMsg, fg="black")
clearMsg_btn.place(x=720, y=555)

#清空历史按钮
clearHis_btn = tk.Button(text=" 清屏 ",font=font1,command=clearHistory, fg="black")
clearHis_btn.place(x=720, y=370)

#装饰文字
tk.Label(app, text="点击此处开启新的对话：",font=font1, fg="DimGray").place(x=570, y=603)
#新对话按钮
clearHis_btn = tk.Button(text="new",font=font1,command=newchat, fg="black")
clearHis_btn.place(x=750, y=598)

#装饰图片
photo = tk.PhotoImage(file="大白机器人.GIF")
tk.Label(app, image=photo,width=200, height=200).place(x=2,y=410)

#装饰文字
tk.Label(app, text="消息记录：",font=font2, fg="DimGray").place(x=2,y=20)

#装饰文字
tk.Label(app, text="请在此处输入：",font=font2, fg="DimGray").place(x=212,y=420)


# 主事件循环
app.mainloop()