# Interaction with the World Homework (#3)
Python Computing for Data Science (c) J Bloom, UC Berkeley 2018

Due Tuesday 2pm, Feb 20, 2018

# 1) Monty: The Python Siri

Let's make a Siri-like program (call it Monty!) with the following properties:
   - record your voice command
   - use a webservice to parse that sound file into text
   - based on what the text, take three different types of actions:
       - send an email to yourself
       - do some math
       - tell a joke

So for example, if you say "Monty: email me with subject hello and body goodbye", it will email you with the appropriate subject and body. If you say "Monty: tell me a joke" then it will go to the web and find a joke and print it for you. If you say, "Monty: calculate two times three" it should response with printing the number 6.

Hint: you can use speed-to-text apps like Houndify (or, e.g., Google Speech https://cloud.google.com/speech/) to return the text (but not do the actions). You'll need to sign up for a free API and then follow documentation instructions for using the service within Python. 

In [1]:
# Populate our namespace
import pyaudio
import numpy as np
import wave
#from houndify import * 
import my_credentials 
#pip install word2number #this is a package used to convert strings of written numbers to ints
from word2number import w2n

import houndify #class provided by houndify in order to interface with thier speach to text engine
import time
from bs4 import BeautifulSoup
from urllib.request import urlopen
import json

from email_example import mail




In [2]:
#We define a function to record audio, we are recycling code from lecture

def record_audio(length_of_recording ,file_name='' ,path = '',sample_rate = 16000):
    """
    Takes input: 
    
    length_of_recording: the desired lenth in seconds of the recording
    path: the path for the .wav file to be saved, if not specified, it will 
    be saved in the current directory
    file_name: the name of the .wav file to be saved, if not specified, it will
    be named "new_wave.wav"
    
    sample_rate: Must be either 8kHz or 16kHz. If not specified, defaults to 16kHz
    
    Records audio from the building mic with sample rate of 44.1kHz for user defined length of time
    A .wav file is then 
    """
    
    #Check if user gave path/file name, if not, then given defaults
    if len(file_name) != 0:    
        WAVE_OUTPUT_FILENAME = path + file_name + '.wav'        
    else:
        WAVE_OUTPUT_FILENAME = path + "new_wave.wav"
        
    chunk = 1024
    
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = sample_rate
    RECORD_SECONDS = length_of_recording                
    p = pyaudio.PyAudio()
    print("* Starting recording")
    stream = p.open(format = FORMAT,
        channels = CHANNELS,
        rate = RATE,
        input = True,
        frames_per_buffer = chunk)
    all = []
    #for i in range(0, int(RATE / chunk * RECORD_SECONDS)):
    try:
        while True:
            data = stream.read(chunk)
            all.append(data)
    except:
        KeyboardInterrupt
            
    print("* done recording")
    stream.close()
    p.terminate()
    
    data = b"".join(all)
    wf = wave.open(WAVE_OUTPUT_FILENAME, "wb")
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    
    return data
    


In [3]:
def convert_to_text(file_name):
    """
    Modified code provided by Houndify that 
    uses the Houndify class to connect to thier
    speach to text engine to convert provided .wav files to 
    text.
    
    Inputs:
    file_name: name of .wav file (and path if not in same directory) to converted
    to text
    
    Returns:
    words: the text returned by Houndify parsed into a list of words
    
    """

    #fname = 'houndify_python3_sdk_1.0.0/test_audio/whatistheweatherthere.wav'
    file_name = file_name + '.wav'
    CLIENT_ID = my_credentials.Houndify_Client_ID
    CLIENT_KEY = my_credentials.Houndify_Client_Key
    AUDIO_FILE = file_name
    BUFFER_SIZE = 512

    

    client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "monty")
    

    #checks to make sure the audio meets the correct criteria to use Houndify
    audio = wave.open(AUDIO_FILE)
    if audio.getsampwidth() != 2:
      print("%s: wrong sample width (must be 16-bit)" % file_name)
    if audio.getframerate() != 8000 and audio.getframerate() != 16000:
      print("%s: unsupported sampling frequency (must be either 8 or 16 khz)" % file_name)
    if audio.getnchannels() != 1:
      print("%s: must be single channel (mono)" % file_name)

    client.setSampleRate(audio.getframerate())
    client.start()

    while True:
      samples = audio.readframes(BUFFER_SIZE)
      if len(samples) == 0: break
      if client.fill(samples): break
      time.sleep(0.032) # simulate real-time so we can see the partial transcripts
    
    results = client.finish() # returns either final response or error
    
    #print(results)
    
    text = results['AllResults'][0]['RawTranscription']#['SpokenResponse']
    words = text.split()
    return words


In [4]:
def get_joke():
    base_url = "https://api.whatdoestrumpthink.com/api/v1/quotes/random"
    url = base_url
    json_result = json.loads(urlopen(url).read().decode("UTF-8"))
    joke = 'A quote from the President of the United States: \n' + json_result['message']
    return joke

In [7]:
def do_task(words):
    print(words)
    email_key = ['mail','email','send','subject','body']
    joke_key = ['joke']
    math_operations = {'times': '*',
                       'multiply':'*', 
                       'add':'+',
                       'plus':'+',
                       'sum':'+',
                       'subtract':'-',
                       'minus':'-',
                       'divide':'/',
                       'divided':'/',
                       'negative':'-',
                       'power':'**',
                       'squared': '**2',
                       'cubed': '**3'}

    numbers = list()

    if any(keyword in words for keyword in email_key):
        print('Sending an email!')
        
        
        try:
            body_indx = words.index('body') #extracting the body of the email
            body_text = words[body_indx+1:]
        except ValueError: 
            body_indx = len(words)
            body_text = 'No Body detected'
        
        
        try:
            subject_indx = words.index('subject') #extracting the subject of the email
            if words[body_indx-1] == 'and': #removing the 'and' before the user says the word body
                subject_text = words[subject_indx+1:body_indx-1]
            else:
                subject_text = words[subject_indx+1:body_indx]
        except ValueError:
            subject_text = 'No subject detected'        
        
        email_dict = {'subject': ' '.join(map(str,subject_text)),
                      'body': ' '.join(map(str,body_text))}
        
        
        mail(sender=my_credentials.GMAIL_USERNAME, 
        pwd=my_credentials.GMAIL_PASSWORD, 
        to=[my_credentials.GMAIL_USERNAME,],
        subject=email_dict['subject'], 
        text=email_dict['body'], 
         # list of files to attach
        )
        
        print('Your email has been sent!')
        #return email_dict
        
    elif any(keyword in words for keyword in joke_key):
        print('One joke coming up!')
        joke = get_joke()
        print(joke)
        #return joke
    
    
    elif any(keyword in words for keyword in math_operations.keys()):
        print('Let\'s do some math!')
     

        operators = list()
        indx = list()
        parsed_words = list()
        equation = list()


        # check for the word 'for' in a math expression and change to 'four'
        for jj in range(len(words)):
            if words[jj] == 'for':
                words[jj] = 'four'


        # Find all the opperators in the expression
        for word in words:
            for key in math_operations.keys():
                if word == key:
                    operators.append(word)
        print(operators)
        # Find the indices of all the opperators            
        for operator in operators:
            a = [i for i, v in enumerate(words) if v == operator]
            indx.append(np.asarray(a))
        index = sorted(list(set(np.concatenate(indx)))) #keep only the unique indices 

        # We now recombine the words in fragments seperated by the operators
        # we do this because words like 'twenty five' or 'one hundred' will get converted 
        # to numbers incorrectly if we convert each word seperatly. 
        parsed_words.append(' '.join(map(str, words[0:index[0]])))
        for ii in range(len(index)-1):
            parsed_words.append(' '.join(map(str, words[index[ii]+1:index[ii+1]])))
        parsed_words.append(' '.join(map(str, words[index[len(index)-1]+1:])))    


        for iword in range(len(parsed_words)):
            try:
                temp = w2n.word_to_num(parsed_words[iword])
            except ValueError:
                print( ' Invalid operation or number encountered. Please ask another question. ')
                return 
            equation.append(temp)    
            if iword < (len(operators)): 
                equation.append(math_operations[operators[iword]])
        print(eval(' '.join(map(str, equation)))) 
    

In [8]:
file_name = 'email2'
time_for_recording = 3
#data = record_audio(time_for_recording, file_name )
#time.sleep(time_for_recording)
#words = convert_to_text(file_name)
words = ['send', 'me', 'an', 'email', 'with', 'subject', 'this', 'is', 'really','not', 'a', 'virus', 'and', 'body', 'i', 'swear', 'this', 'is', 'maybe', 'a', 'virus']
#words1 = ['send','email','subject', 'is', 'twenty', 'five']
#words = ['what', 'is','four', 'to','the','power','of', 'two', 'plus','five','point','six']
return_str = do_task(words)

 


['send', 'me', 'an', 'email', 'with', 'subject', 'this', 'is', 'really', 'not', 'a', 'virus', 'and', 'body', 'i', 'swear', 'this', 'is', 'maybe', 'a', 'virus']
Sending an email!


ConnectionResetError: [Errno 54] Connection reset by peer

# 2) Write a program that identifies musical notes from sound (AIFF) files. 

  - Run it on the supplied sound files (12) and report your program’s results. 
  - Use the labeled sounds (4) to make sure it works correctly. The provided sound files contain 1-3 simultaneous notes from different organs.
  - Save copies of any example plots to illustrate how your program works.
  
  https://piazza.com/berkeley/spring2018/ay250class13410/resources -> Homeworks -> hw3_sound_files.zip

$\textbf{$\Large$ Explination of algorithm}$. For this problem, I will import each audio file, sum the left and right channels to make the audio mono, and then compute the power spectrum of each file. The frequncies are calculated based on the sample rate of the files. Since the human ear percieves pitch logarithmically, I convert the frequency axis to midi number. This can be done by recognizing that octives follow a geometric progression, an there are 12 semitones (half notes) in an octive. We can use this to map frequencies to midi numbers (0-127) each of which corresponds to a musical note. 
$$
m = 12\log_2\left(\frac{f}{A_4}\right)
$$
The flaw in doing this is that this scaling assumes equal temperment, meaning the 12 notes per octive on the instrument in question are equally spaced, this is not the case for most standard instruments. This means that for certian notes, if the instrument is somewhat out of tune, the note could be potentially mislabled. 

In [1]:
#populate the namespace

#pip install SoundFile #This is used to read in the audio files
import soundfile as sf
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
from scipy import fftpack as fft
from os import listdir
import os
import seaborn as sns
import scipy.signal as signal


sns.set_style('darkgrid')



Hints: You’ll want to decompose the sound into a frequency power spectrum. Use a Fast Fourier Transform. Be care about “unpacking” the string hexcode into python data structures. The sound files use 32 bit data. Play around with what happens when you convert the string data to other integer sizes, or signed vs unsigned integers. Also, beware of harmonics.

In [82]:
def return_note_name(freq):
    step = convert_to_half_steps(freq)
    notes = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
    octive_number = step // 12 - 1
    note_name = step % 12
    return notes[note_name]+str(octive_number)


def convert_to_half_steps(freq):
    #C0 = 16.35 #Hz, defined from the standard A440 tuning
    A4 = 440
    C0 = A4
    half_step = np.round(12*np.log2(freq/C0)) + 69
    half_step = half_step.astype(np.int16)
    return half_step

def convert_to_freq(m):
    A4 = 440
    freq = A4*2**((m-69)/12)
    return freq

def remove_dupes(notes):
    good_notes = list()
    #print(notes)
    fundamental = np.min(notes)
    good_notes.append(fundamental)
    for note in notes:
        if np.abs(1-((note/fundamental)%1)) >= 0.01:
            if (note/fundamental)%1 != 0.0:
                good_notes.append(note)
    
    return good_notes
            
def find_notes(freqs, power_spectrum):
    
    weighting = 0.2
    freqs_to_keep = list()
    final_notes = list()
    
    cut_off = np.ones(len(half_steps))*weighting*np.max(power_spectrum)
    #print(cut_off[0])
    indx = np.where(power_spectrum>cut_off)
    half_steps_to_keep = set(half_steps[indx])
    #print(half_steps_to_keep)
    for step in half_steps_to_keep:
        freqs_to_keep.append(convert_to_freq(step))
      
    freqs_no_dupes = remove_dupes(freqs_to_keep)
    
    for fn in freqs_no_dupes:
        final_notes.append(return_note_name(fn))
    return final_notes


In [67]:
#Import all the files in a directory

path = 'data/sound_files/'

data_files = {}

for file in listdir(path):
    filename, file_extension = os.path.splitext(file)
    if file_extension == '.aif':
        data, fs = sf.read(path+file, dtype='int32')
        data_files[file] = [fs,data[:,0]+data[:,1]] #add the sample rate and the data to a dictionary, also make the stereo data mono
    

    
    
N = len(data_files['1.aif'][1])
freqs = fft.rfftfreq(N, d = 1/data_files['1.aif'][0])    
    
index_keep = np.where(freqs >= 0 ) #Trim off values below the lowest musical note


#print(index_keep)

note_list = list()
half_steps = convert_to_half_steps(freqs[index_keep]) #convert frequencies to midi number

# Now create a dictionary for the power spectra

power_spectra = {}
for key in data_files.keys():

    power_spec = np.abs(fft.rfft(data_files[key][1][index_keep]))**2
    #cut_off = np.ones(len(half_steps))*.5*np.max(power_spec)
    power_spectra[key] = power_spec



  del sys.path[0]


In [85]:
#test = 'C4+A4_PopOrgan.aif'

#result = find_notes(freqs, power_spectra[test])
"""cut_off = np.ones(len(freqs))*0.2*np.max(power_spectra['11.aif'])
plt.plot(freqs, power_spectra['11.aif'])
plt.plot(freqs,cut_off)
plt.show()"""

#ind = np.where(power_spectra['11.aif']>cut_off)
#print(freqs[ind])
#find_notes(freqs,power_spectra['11.aif'])

results = {}
for key in power_spectra.keys():
    
    
    
        
    #print(key)
    result = find_notes(freqs, power_spectra[key])

    results[key] = result


print(results)

{'C4+A4_PopOrgan.aif': ['C4', 'A4'], 'A4_PopOrgan.aif': ['A4'], '8.aif': ['F4'], '9.aif': ['G3'], 'F3_PopOrgan.aif': ['F3'], '11.aif': ['E2'], '10.aif': ['C2'], '12.aif': ['C2'], '7.aif': ['A6'], '6.aif': ['C5'], '4.aif': ['C4'], 'F4_CathedralOrgan.aif': ['F4'], '5.aif': ['G2', 'D3'], '1.aif': ['C4', 'G4', 'D4'], '2.aif': ['F3'], '3.aif': ['A4']}


In [90]:
import pprint
pprint.pprint(results)

{'1.aif': ['C4', 'G4', 'D4'],
 '10.aif': ['C2'],
 '11.aif': ['E2'],
 '12.aif': ['C2'],
 '2.aif': ['F3'],
 '3.aif': ['A4'],
 '4.aif': ['C4'],
 '5.aif': ['G2', 'D3'],
 '6.aif': ['C5'],
 '7.aif': ['A6'],
 '8.aif': ['F4'],
 '9.aif': ['G3'],
 'A4_PopOrgan.aif': ['A4'],
 'C4+A4_PopOrgan.aif': ['C4', 'A4'],
 'F3_PopOrgan.aif': ['F3'],
 'F4_CathedralOrgan.aif': ['F4']}


In [59]:
np.min([  82.3125,   82.375 ,   82.375 ,   82.4375 , 246.875 ])



82.3125