In [1]:
import csv
import datetime
import random
import glob
import os
import tensorflow as tf
import tensorflow_hub as hub
from IPython.display import Audio
import numpy as np
import scipy
from scipy.io import wavfile
import soundfile as sf
import resampy
import librosa

  "class": algorithms.Blowfish,


In [1]:
  """Returns list of class names corresponding to score vector."""
def class_names_from_csv(class_map_csv_text):
    class_names = []
    with tf.io.gfile.GFile(class_map_csv_text) as csvfile:
        reader = csv.DictReader(csvfile)
    for row in reader:
        class_names.append(row['display_name'])

    return class_names



In [3]:
  """Resample waveform if required."""
def ensure_sample_rate(original_sample_rate, waveform, desired_sample_rate=16000):
    if original_sample_rate != desired_sample_rate:
        desired_length = int(round(float(len(waveform)) / original_sample_rate * desired_sample_rate))
        waveform = scipy.signal.resample(waveform, desired_length)
    return desired_sample_rate, waveform

In [4]:
def getModel():
    os.environ["TFHUB_CACHE_DIR"] = "\\Users\\chloe\\Documents\\tensorflow"
    model = hub.load('https://tfhub.dev/google/yamnet/1')
    class_map_path = model.class_map_path().numpy()
    class_names = class_names_from_csv(class_map_path)
    return model, class_names

In [5]:
SAMPLE_RATE = 16000.0
"""
Classifies audio data according to the AudioSet Yamnet Data
Code is adapted from the interference.py code from the official documentation 
If the wave is not in the right shape or not sampled in the right sampling rate, the wave is fixed so it fits the data
Then the wave is classified.

The output of the model is a matrix of (# time frames, # classes) classifier scores; the documentation recommends taking the mean across the 0th axis to get an average across time
Thus, we can see the average classifier score across time, not just for a specific timeframe. 

Returned are the means and standard deviations.
"""
def getClassifications(model,wav_data,sr):
    waveform = wav_data / 32768.0  # Convert to [-1.0, +1.0]
    waveform = waveform.astype('float32')
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)
    if sr != SAMPLE_RATE:
        waveform = resampy.resample(waveform, sr, SAMPLE_RATE)
    
    scores, embeddings, spectrogram = model(waveform)
    prediction = np.mean(scores, axis=0) #Averaged across time
    std = np.mean(scores,axis=0)
    return prediction,std

In [6]:
"""
After the model scores all the classes, this function gets the top five classifications and matches them to the class names.
The returned result is an array with (class name,probablity) for the top five. 
"""
def getTopFive(predictions):
    top5_i = np.argsort(predictions)[::-1][:5]
    toRet = []
    for i in top5_i:
        toRet.append(class_names[i])
        toRet.append(predictions[i])
    return toRet
    

In [7]:

model,class_names= getModel()

In [17]:

f = open("../../../Desktop/fwdchickenvocalisationspectralentropy/fullband_entropies.csv")
reader = csv.reader(f)
entropies = {}
for line in reader:
    if "46" in line[0]: print(line[0])
    entropies[line[0]] = float(line[1])

PNET_9C4\T000146.wav
PNET_9C4\T000246.wav
PNET_9C4\T000346.wav
PNET_9C4\T000446.wav
PNET_9C4\T000460.wav
PNET_9C4\T000461.wav
PNET_9C4\T000462.wav
PNET_9C4\T000463.wav
PNET_9C4\T000464.wav
PNET_9C4\T000465.wav
PNET_9C4\T000466.wav
PNET_9C4\T000467.wav
PNET_9C4\T000468.wav
PNET_9C4\T000469.wav
PNET_9C4\T000546.wav
PNET_9C4\T000646.wav
PNET_9C4\T000746.wav
PNET_9C4\T000846.wav
PNET_9C4\T000946.wav


In [16]:
"PNET_9C4\T000046.wav" in entropies

False

In [18]:
files = glob.glob("../../../Desktop/fwdchickenvocalisationspectralentropy/PNET_9C4/*")
pairs = []
for file in files:
    data,sr = sf.read(file, dtype=np.int16)
    p,s = getClassifications(model,data,sr)
    prediction = getTopFive(p)
    filen = file.split("/")[-1]
    if filen not in entropies:
        print(filen)
        continue
    pairs.append((entropies[filen],prediction[0]))

PNET_9C4\T000046.wav
PNET_9C4\T000060.wav
PNET_9C4\T000076.wav
PNET_9C4\T000082.wav
PNET_9C4\T000113.wav
PNET_9C4\T000114.wav
PNET_9C4\T000120.wav
PNET_9C4\T000121.wav
PNET_9C4\T000137.wav
PNET_9C4\T000138.wav
PNET_9C4\T000145.wav
PNET_9C4\T000148.wav
PNET_9C4\T000247.wav
PNET_9C4\T000550.wav
PNET_9C4\T000593.wav
PNET_9C4\T000598.wav
PNET_9C4\T000754.wav
PNET_9C4\T000769.wav
PNET_9C4\T000836.wav
PNET_9C4\T000889.wav
PNET_9C4\T000945.wav
PNET_9C4\T000970.wav
PNET_9C4\T000981.wav
PNET_9C4\T000996.wav
PNET_9C4\T001000.wav


In [4]:
files = glob.glob("Sounds/*/*_split.wav")
f = open("sound_metadata.csv",'w')
writer = csv.writer(f)

In [7]:
for file in files:
    label = file.split("\\")[1].split("Sounds")[0]
    writer.writerow([file,label])
f.close()
    

In [None]:
files = glob.glob("../..//../Desktop/audio/*.wav")

false = 0
total = 0
others = []
dd = {}
for file in files:
    id_ =  file.split("/")[-1].split("-")[1]
    if id_ not in ids: continue
    split, sr = splitFile(file,2)
    for el in split:
        if prediction[0] in names: false +=1
        else:
            others.append(prediction[0])
        if prediction[0] not in dd: dd[prediction[0]]=[]
        dd[prediction[0]].append(file)
        total +=1
false, total, false/total, (total-false)/total

In [None]:
from collections import Counter
x = Counter(others)
x.most_common()

In [None]:
dd['Water']


In [None]:
count = 0
total = 0
for i in range(len(split)):
    section = split[i]
    p,s = getClassifications(model,section,sr)
    print(getTopFive(p))

In [None]:
"""
Need index of Cough ID in class names (to get probability) for cough profile. I assume this won't change from run to run, but just in case, don't want to hardcode it
Also, if we decide to add more categories as labels to cough (throat clearing, for example), this will come in handy
"""
def getCoughIndex():
    for i in range(len(class_names)):
        if class_names[i] == "Cough":
            return i
        

In [None]:
"""
Takes a set of audiodata and returns probablity of cough

TODO: Future versions would have timestamps, but I don't have enough coughs with timestamps to create that kind of data
INCORPORATE standard deviations somehow
"""
COUGH_INDEX = getCoughIndex()
def getCoughProfile(audiodata):
    results = []
    for frame in audiodata:
        p,s = getClassifications(model,frame,sr)
        results.append(p[COUGH_INDEX])
    
    
    return results

In [None]:
files  = glob.glob("../..//../Desktop/Trainingdata/audio_data/cough/*.wav")[0:25]
data = []
for file in files:
    split,sr = splitFile(file,2)
    data.extend(split)
getCoughProfile(data)

In [2]:
import json
jsondata = json.load(open("Datasets/ontology.json"))

In [3]:
d = {}
for el in jsondata:
    id = el['id']
    name = el['name']
    children = el['child_ids']
    d[id] = (children,name)
    

In [11]:
children = d["/m/0dgw9r"][0]
names = []
for el in children:
    names.extend(get_subordinates(d,el))
    
names

['Human voice',
 'Speech',
 'Male speech, man speaking',
 'Female speech, woman speaking',
 'Child speech, kid speaking',
 'Conversation',
 'Narration, monologue',
 'Babbling',
 'Speech synthesizer',
 'Shout',
 'Bellow',
 'Whoop',
 'Yell',
 'Battle cry',
 'Children shouting',
 'Screaming',
 'Whispering',
 'Laughter',
 'Baby laughter',
 'Giggle',
 'Snicker',
 'Belly laugh',
 'Chuckle, chortle',
 'Crying, sobbing',
 'Baby cry, infant cry',
 'Whimper',
 'Wail, moan',
 'Sigh',
 'Singing',
 'Choir',
 'Yodeling',
 'Chant',
 'Mantra',
 'Male singing',
 'Female singing',
 'Child singing',
 'Synthetic singing',
 'Rapping',
 'Humming',
 'Groan',
 'Grunt',
 'Yawn',
 'Whistling',
 'Wolf-whistling',
 'Respiratory sounds',
 'Breathing',
 'Wheeze',
 'Snoring',
 'Gasp',
 'Pant',
 'Snort',
 'Cough',
 'Throat clearing',
 'Sneeze',
 'Sniff',
 'Human locomotion',
 'Run',
 'Shuffle',
 'Walk, footsteps',
 'Digestive',
 'Chewing, mastication',
 'Biting',
 'Gargling',
 'Stomach rumble',
 'Burping, eructation'

In [8]:
def get_subordinates(dictionary, label):
    children, name = dictionary[label]
    if children == []: return [name]
    toRet = [name]
    for el in children:
        toRet.extend(get_subordinates(dictionary, el))
    return toRet
                
        
    

In [10]:
for key in d:
    vals = d[key]
    if  "/m/09hlz4" in vals[0]:
        print(key, vals[1])

/m/0dgw9r Human sounds
