# My dialog system

**Imports**

In [30]:
from google.cloud import speech #speech recognition from Google Cloud
import io #provides Python’s main facilities for dealing with various types of inputs and outputs 
import requests #uses the API to access data from the web  
import os #for interacting with the operating system - in our case, python files are used in the code 
import sounddevice as sd #for recording an audio file
import numpy as np #for working with numerical data
from scipy.io.wavfile import write #sample rate for the recorded file 
import json #for using the API as a json structure 
from gtts import gTTS #text-to-speech module
from playsound import playsound #part of the text-to-speech module (playing the converted to speech text) 
from eliza import eliza #ELIZA is an early natural language processing computer program created to demonstrate the superficiality of communication between humans and machines
from file_update_vaccinations import file_update_vaccinations #updates the data from the API - for vaccines
from file_update_recovered import file_update_recovered #updates the data from the API - for number of recovered 
import emorec #emotion recognition 

In [31]:
#!dir

In [32]:
#cd..

**Define the parameters of the audio file that will be recorded**

In [33]:
sr = 16000  # Sample rate
seconds = 5  # Duration of recording
filename = 'myfile.wav'

**Data integretation** - two python files are used here which take and update the data of the two APIs: first one for vaccinations and second one for the number of recovered people from the previous day (the APIs are provided by Robert Koch Institut)

In [34]:
file_update_vaccinations()
vaccinations = open('vaccinations.json')
vaccinations = json.load(vaccinations)

Up To Date


In [35]:
vaccinations

{'data': {'administeredVaccinations': 95332331,
  'vaccinated': 52010167,
  'vaccination': {'biontech': 36010517,
   'moderna': 4283289,
   'astraZeneca': 9196460,
   'janssen': 2519901},
  'delta': 55817,
  'quote': 0.625,
  'secondVaccination': {'vaccinated': 45842065,
   'vaccination': {'biontech': 35461546,
    'moderna': 4504358,
    'astraZeneca': 3356260,
    'janssen': 2519901},
   'delta': 227600,
   'quote': 0.551},
  'latestDailyVaccinations': {'date': '2021-08-09T00:00:00.000Z',
   'vaccinated': 55817,
   'firstVaccination': 55817,
   'secondVaccination': 219941},
  'indication': {'age': None,
   'job': None,
   'medical': None,
   'nursingHome': None,
   'secondVaccination': {'age': None,
    'job': None,
    'medical': None,
    'nursingHome': None}},
  'states': {'BW': {'name': 'Baden-WÃ¼rttemberg',
    'administeredVaccinations': 12593218,
    'vaccinated': 6718315,
    'vaccination': {'biontech': 4692626,
     'moderna': 537103,
     'astraZeneca': 1170311,
     'janss

In [36]:
file_update_recovered()
recovered = open('recovered.json')
recovered = json.load(recovered)

Up To Date


In [37]:
recovered

{'data': [{'recovered': 1, 'date': '2020-01-02T00:00:00.000Z'},
  {'recovered': 2, 'date': '2020-01-23T00:00:00.000Z'},
  {'recovered': 2, 'date': '2020-01-28T00:00:00.000Z'},
  {'recovered': 2, 'date': '2020-01-29T00:00:00.000Z'},
  {'recovered': 4, 'date': '2020-01-31T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-01T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-03T00:00:00.000Z'},
  {'recovered': 4, 'date': '2020-02-04T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-06T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-07T00:00:00.000Z'},
  {'recovered': 2, 'date': '2020-02-11T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-17T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-18T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-20T00:00:00.000Z'},
  {'recovered': 1, 'date': '2020-02-24T00:00:00.000Z'},
  {'recovered': 3, 'date': '2020-02-25T00:00:00.000Z'},
  {'recovered': 7, 'date': '2020-02-26T00:00:00.000Z'},
  {'recovered': 23, 'date': '2020-02-27T

In [38]:
#print(recovered["data"][recovered_days["genesen"]]["recovered"])

In [39]:
#recovered["data"].index({'recovered': 3093, 'date': '2020-04-15T00:00:00.000Z'})

**Dictionaries** - phrases defined for extracting the keywords from the input, searching in the json data and generating the output

In [40]:
phrases = {'hello':'Hallo! Ich bin deine neue Freundin, das Dialogsystem! Was möchtest du wissen?', 
        'continue':'Kann ich dir weiterhelfen?', 
        'goodbye':'Vielen Dank für deinen Besuch!', 
        'done':'tschüss'}

states_d = {'schleswig':'SH', 'hamburg':'HH', 'berlin':'BE', 'bayern':'BY', 
            'niedersachsen': 'NI', 'bremen': 'HB', 
            'nordrhein':'NW', 'hessen':'HE', 'rheinland':'RP', 'baden':'BW', 
            'saarland': 'SL', 'brandenburg':'BB', 'mecklenburg':'MV', 'sachsen':'SN',
            'anhalt':'ST', 'thüringen':'TH', 'deutschland':'DE', 'hier':'DE'}
state_names = {'SH':'Schleswig-Hostein', 'HH':'Hamburg', 'BE':'Berlin', 'BY':'Bayern', 
            'NI':'Niedersachsen', 'HB':'Bremen', 
            'NW': 'Nordrhein Westfalen', 'HE':'Hessen', 'RP':'Rheinland Pfalz', 'BW':'Baden Württemberg', 
            'SL':'Saarland', 'BB':'Brandenburg', 'MV':'Mecklenburg Vorpommern', 
            'SN': 'Sachsen', 'ST':'Sachsen-Anhalt', 'TH':'Thüringen', 'DE':'Deutschland'}
vaccines_d = {'biontech':'biontech', 'biontec':'biontech', 
              'moderna':'moderna', 
              'janssen':'janssen', 'jansen':'janssen',
              'delta':'delta',
              'astraZeneca':'astraZeneca', 'astra':'astraZeneca', 'zeneca':'astraZeneca'}
    
vaccine_names = {'biontech':'Biontech', 'moderna':'Moderna', 'janssen':'Janssen', 'delta':'Delta',
              'astraZeneca':'Astra Zeneca'}

recovered_d = {'genesene': 'genesen', 'genesen': 'genesen'}

recovered_days = {'genesen': -1}

emo_dict = {'happiness':'glücklich', 'neutral': 'wie immer', 'anger': 'irritiert', 'sadness': 'traurig', 
            'fear': 'ängstlich', 'boredom':'gelangweilt', 'disgust':'angeekelt'}

**Record a file** - will count as input

In [41]:
def record_file():
    data = sd.rec(int(seconds * sr), samplerate=sr, channels=1)
    sd.wait()  # Wait until recording is finished
    # Convert `data` to 16 bit integers:
    y = (np.iinfo(np.int16).max * (data/np.abs(data).max())).astype(np.int16) 
    write(filename, sr, y)

**Initialise Google Cloud** - for the module speech-to-text / speech recognition; the credentials can be accessed after creating a business Google account

In [42]:
def init_google():
    credentials=r"C:\Users\user\s_dyalog\Google Credentials.json" 
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=credentials

In [43]:
init_google()

**Normalize the transcribed inputs** - there should be no difference between lowercase and uppercase letters

In [44]:
def normalize(in_s):
    return in_s.lower()

**Speech recognition from Google Cloud** - the function transcribes the recorded input and returns it as text

In [45]:
def transcribe():
    client = speech.SpeechClient()
    with io.open(filename, "rb") as audio_file:
        content = audio_file.read()
    audio = speech.RecognitionAudio(content = content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="de-DE",
    )
    response = client.recognize(config=config, audio=audio)
    for result in response.results:
        for index, alternative in enumerate(result.alternatives):
            print("Transcript {}: {}".format(index, alternative.transcript))
            return alternative.transcript
            

**Text-to-speech** - the function turns the text into speech and plays it

In [46]:
def tts(text):
    
    language = 'de'  #language (ISO Code)

    myobj1 = gTTS(text=text, lang=language, slow=False)
    file1 = str("hello" + ".mp3") #generate speech output

    myobj1.save(file1) #save as mp3 
    
    playsound(file1,True)
    os.remove(file1) #playsound

**Input function** - an audio file containing natural language will be recorded, transcribed and returned as text

In [47]:
def speech_input():
    record_file()
    text = transcribe()
    return text

In [48]:
def do_input():
    return speech_input()

**Semantic parsing** - natural language is translated into logical expressions/meaningful representations; keywords are filtered out of the input, in our case the function checks if in the input there are words related to a type of vaccination, a state from Germany or the number of recovered people (the words have been defined in the dictionaries)

In [49]:
def semantic(input_s):
    semantics = {'state':'', 'vaccine':'', 'recovered':'', 'answer':0}
    for key in recovered_d.keys():
        if key in input_s:
            semantics['recovered'] = recovered_d[key]
            break
    for key in states_d.keys():
        if key in input_s:
            semantics['state'] = states_d[key]
            break
    for key in  vaccines_d.keys():
        if key in input_s:
            semantics['vaccine'] =  vaccines_d[key]
            break
    return semantics

**Interprete the semantic content of the input** - the function verifies if in the input it was asked for a specific type of vaccine (if not, the function returns the total number of vaccines) for a specific state (if not, the function returns the vaccine from whole Germany), for the number of recovered people from the previous day. Otherwise the function returns an empty variable

In [50]:
# expects semantics: semantics[0] == bundesland, semantics[1] == vaccine, semantics[2] == recovered
def data(semantics):
    s = semantics['state']
    v = semantics['vaccine']
    r = semantics['recovered']
    
    if r: #number of recovered is asked
        semantics['answer'] = recovered["data"][recovered_days[r]]["recovered"]
    else:
        if s: # state given
            if s != 'DE':
                if v: # and vaccine given
                    semantics['answer'] = vaccinations["data"]["states"][s]['vaccination'][v]
                else: # all vaccines for state
                    semantics['answer'] = vaccinations["data"]["states"][s]['vaccinated']
            else:
                if v: # and vaccine given
                    semantics['answer'] = vaccinations["data"]['vaccination'][v]
                else: # all vaccines for Germany
                    semantics['answer'] = vaccinations['data']['vaccinated']
        else: # no state
            if v: # but vaccine
                semantics['answer'] = vaccinations["data"]['vaccination'][v]
            else: # nothing given
                semantics['answer'] = None
            
    return semantics

In [51]:
#vaccine_names

**Output function** - on the basis of the interpretation done in the semantic parsing module, the function returns an answer that counts as output. If in the input has been asked for information related to corona (vaccines or number of recovered), the function will use the API from RKI for generating the output; otherwise the output will be generated with Eliza. ELIZA is an early natural language processing computer program created to demonstrate the superficiality of communication between humans and machines

In [52]:
def output(semantics, inputs, elz):
    ret = ''
    eliza = 0 
    s = semantics['state']
    v = semantics['vaccine']
    r = semantics['recovered']
    a = semantics['answer']
    if r: #number of recovered is asked 
        r = recovered_days[r]
        ret = 'Gestern gab es {} Genesene in Deutschland'.format(a)
    else:
        if s: # state given
            s = state_names[s]
            if v: # and vaccine given
                v = vaccine_names[v]
                ret = 'Die Impfungen für {} mit {} sind {}'.format(s, v, a)
            else: # all vaccines for state
                ret = 'Die Impfungen für {} sind {}'.format(s, a)
        else: # no state
            if v: # but vaccine
                v = vaccine_names[v]
                ret = 'Die Impfungen in Deutschland mit {} sind {}'.format(v, a)
            else:
                if r: #but recovered
                    r = recovered_days[r]
                    ret = 'Gestern gab es {} Genesene in Deutschland'.format(a)
                else: # nothing given
                    ret = elz.respond(inputs)
                    eliza = 1
    return ret, eliza
    

**Initialize ELIZA** - two files are used here: a python file with the code and a txt file with all the phrases that Eliza will return

In [53]:
def init_eliza():
    root = r"C:\Users\user\s_dyalog"
    elz = eliza.Eliza()
    elz.load(root+"\eliza\deutsch.txt")
    return elz

In [54]:
elz = init_eliza()

**Turns the output into speech and plays it** - the output was generated in the previous function "output" as text

In [55]:
def output_s(text):
    print('output: '+text)
    tts(text)

**Dialog manager function** - responsible for the state and flow of the conversation; all functions are connected here. The dialog system determines also the emotions of the person that records the input, with the program EmoDB

In [56]:
def dialogmanager(elz):
    output_s(phrases['hello']) 
    input_s = do_input()
    input_s = normalize(input_s)
    while input_s and input_s != phrases['done']:
        input_s = normalize(input_s)
        emotion = emoRec.classify(filename)[0] 
        emotion_g = emo_dict[emotion]
        if (emotion_g == 'traurig'):
            output_s('ich merke du bist '+emotion_g)
            output_s('mach dir keine Sorgen! ich bin hier, um mit dir zu reden!')
        if (emotion_g == 'glücklich'):
            output_s('ich merke du bist '+emotion_g)
            output_s('wenn du glücklich bist, dann bin ich auch glücklich!')
        if (emotion_g == 'irritiert'):
            output_s('ich merke du bist '+emotion_g)
            output_s('ich kann dir gerne ein paar Atemübungen empfehlen')
        if (emotion_g == 'gelangweilt'):
            output_s('ich merke du bist '+emotion_g)
            output_s('mach lieber einen Spaziergang! wir können uns später unterhalten!')
            break
            
        semantics = semantic(input_s)
        semantics = data(semantics)
        out_string = output(semantics, input_s, elz)[0] 
       
        output_s(out_string)
        
        if (output(semantics, input_s, elz)[1] == 0):  #the question to continue will be asked only for the corona dialog, because Eliza has its own questions
            output_s(phrases['continue']) 
        input_s = do_input()
        if (input_s):
            input_s = normalize(input_s)
        else:
            output_s(phrases['goodbye'])   
    output_s('Tschüss')   

In [57]:
emoRec = emorec.EmoRec() #in order to determine the emotions, the dialog system uses a python file with the code 

In [None]:
dialogmanager(elz)

output: Hallo! Ich bin deine neue Freundin, das Dialogsystem! Was möchtest du wissen?
Transcript 0: wie heißt du
output: ich merke du bist traurig
output: mach dir keine Sorgen! ich bin hier, um mit dir zu reden!
output: Wir reden über dich, nicht über mich.
Transcript 0: Impfungen in Hamburg
output: Die Impfungen für Hamburg sind 1204585
output: Kann ich dir weiterhelfen?
Transcript 0: Amazon in Deutschland
output: ich merke du bist traurig
output: mach dir keine Sorgen! ich bin hier, um mit dir zu reden!
output: Die Impfungen für Deutschland sind 52010167
output: Kann ich dir weiterhelfen?
Transcript 0: Nächte Impfungen Genesende Menschen
output: ich merke du bist irritiert
output: ich kann dir gerne ein paar Atemübungen empfehlen
output: Gestern gab es 13 Genesene in Deutschland
output: Kann ich dir weiterhelfen?
Transcript 0: sage mir bitte ein paar Atemübungen
output: ich merke du bist glücklich
output: wenn du glücklich bist, dann bin ich auch glücklich!
output: Ich bin nicht sic