In [1]:
# Cat meow classification 
# Author: Colin Pierce
# Computational Physics course at York University
# last updated 04 Aug 2022
#
#
#
#    Zzzzz  |\      _,,,--,,_        +-----------------------------+
#           /,`.-'`'   ._  \-;;,_    | Why is there always a cat   |
#          |,4-  ) )_   .;.(  `'-'   | on whatever you're editing?|
#         '---''(_/._)-'(_\_)        +-----------------------------+
#
# Art by Patrick Roberts

In [2]:
# import seaborn as sns
import matplotlib.pyplot as plt
# import plotly 
import math

import pandas as pd
import numpy as np
import os
# import playsound as playsound
import ipywidgets as widgets
from ipywidgets import *
import random 
from IPython.display import Audio

from scipy.io.wavfile import read
from scipy import signal
# import plotly.graph_objs as go
# from plotly.offline import plot, iplot

from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
# dataset = https://zenodo.org/record/4008297; https://link.springer.com/chapter/10.1007/978-3-030-67835-7_20
# consists of 4 kHz meows when the cat was being brushed, in isolation in an unfamiliar environment, or waiting for food 

# USER: this worksheet saves to ".../[root]/dataset/..."

In [4]:
# begin with EDA on audio file data to motivate the appropriate transformations (warning is just for metadata)

dic = {}
names = []
totals = [0,0,0]
times = []

root, dirs, files = next(os.walk(".\dataset"))
for i in files:
    sit = i[0] # situation of meow (B, F, I = Brushing, Food, Isolation)
    rate, data = read(".\dataset\%s"%i)
    length = len(data)/rate
    dic[i]=[rate, data, length, sit] 
    names.append(i)
    times.append(length)
    
    if sit == "B":
        totals[0]+=1
    elif sit == "F":
        totals[1]+=1
    else:
        totals[2]+=1
        
plt.hist(times, bins=20, color="green")
# plt.grid()
plt.xlabel("Length [s]")
plt.ylabel("Amount")
plt.title("Distribution of times")

stats_d = [max(times)-min(times), np.var(times), np.std(times)] # measures of dispersion

print("'Brushing' samples: %d, 'Food' samples: %d, 'Isolation' samples: %d\n"%(totals[0],totals[1],totals[2]))
print("Mean, Median sample length = %f sec, %f sec"%(round(np.average(times),2),np.median(times)))
print("Range, Variance, Standard Deviation = %f, %f, %f"%(stats_d[0],stats_d[1],stats_d[2]))
    

StopIteration: 

In [None]:
# Play random cat meow from database

rand_choice = names[random.randint(0,len(names))]
print("Sample meow: '%s'"%rand_choice)
Audio(filename = ".\dataset\%s"%rand_choice)

In [None]:
# Waveform plot for meows

def show_wave(db_index):
    name = names[db_index]
    # print("Meow choice: '%s'"%name )
    time = np.arange(0,dic[name][2],1/dic[name][0]) 
    data = dic[name][1]
    
    plt.plot(time,data)
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
    plt.title("file: '%s'"%name)

interact(show_wave, db_index=widgets.IntSlider(value=0, min=0, max=len(names)-1, step=1))

In [None]:
# Spectrogram plot for meows

def show_spec(db_index):
    name = names[db_index]
    frequencies, times, spectrogram = signal.spectrogram(dic[name][1], dic[name][0])
    
    plt.pcolormesh(times, frequencies, spectrogram)#, cmap='Greys')
    # plt.imshow(spectrogram)
    # plt.colorbar()
    plt.ylabel('Frequency')
    plt.xlabel('Time')
    plt.title("file: '%s'"%name)

interact(show_spec, db_index=widgets.IntSlider(value=0, min=0, max=len(names)-1, step=1))

# Notice the harmonics in the meows (Eg, meow number 39, 81, 94 or 174)
# (This exists in the human voice as well and is what we will try to learn)

# Can click on db_index number to type meow index

In [None]:
# The data needs to be simplified. 
# The most audible harmonic is the loudest one. We will standardize the data by selecting the loudest power output per meow.
# We will approximate meow length by finding the range of the closest points within a close enough power range (80% of max)

In [None]:
# Find all neighboring points appropriately close to the max power output of the spectrogram

def find_neighbors(m, i, j): # grid-crawling helper function
    d1 = [(i+1, j), (i+1, j), (i+1,j-1), (i,j+1), (i,j-1), (i-1, j+1), (i-1, j+1), (i-1, j-1)]
    d2a = [(i+2, j), (i+2, j+1), (i+2, j+2), (i+2, j-1), (i+2, j-2), (i+1, j-2), (i+1, j+2), (i, j+2), (i, j-2)]
    d2b = [(i-1, j+2), (i-1, j-2), (i-2,j+2), (i-2,j+1), (i-2,j), (i-2,j-1), (i-2,j-2)]
    return d1 + d2a + d2b

dic2 = {} 
index=1
for j in names:
    sit = j[0] # meow class
    
    pts = [] # distributions
    pts_t = []
    powers = []
    ts = []
    frequencies, times, spectrogram = signal.spectrogram(dic[j][1], dic[j][0]) 
    
    #ind = np.unravel_index(np.argmax(spectrogram, axis=None), spectrogram.shape) 
    ind = np.where(spectrogram == spectrogram.max()) # find loudest frequency index
    ind = (ind[0][0], ind[1][0])
    
    lf = frequencies[ind[0]] # find time, frequency points
    lf_time = times[ind[1]]
    
    power = spectrogram[ind[0]][ind[1]] # find its loudness
    
    init_power = power
    neighbors = find_neighbors(spectrogram, ind[0], ind[1]) # find loudest point neighbors
    
    conv = 0 # convergence criteria
    while abs(power-init_power)<(0.8*init_power): # power criteria
        if conv>10: 
            break
        pts.append(ind) 
        pts_t.append((lf_time, lf))
        powers.append(power)
        ts.append(lf_time)
        
        neighbors = find_neighbors(spectrogram, ind[0], ind[1]) # find neighbors
        
        for pie in neighbors: # prevent backwards crawl
            if pie in pts:
                indd = neighbors.index(pie)
                del neighbors[indd]
        
        sub_powers = [] # find loudest of neighbors
        for foo in neighbors:
            sub_power = spectrogram[foo[0]][foo[1]]
            sub_powers.append(sub_power)
        
        power = max(sub_powers) # find loudest
        ind = neighbors[sub_powers.index(power)] # find loudest index
        lf = frequencies[ind[0]] # find time, frequency points
        lf_time = times[ind[1]]
        
        conv+=1    
        
    print("Completed: %d/%d    "%(index,len(names)), end="\r")
    index+=1
    
    rnge = max(ts)-min(ts)
    dic2[j]=[lf, rnge, pts, pts_t, powers, sit] 
    # key: loudest freq, range(times), pts, pts (time,freq), lf's of pts,  class

df = pd.DataFrame(dic2)
df.to_pickle("data_transform.pkl")

In [None]:
# Observe our extracted data superimposed on the spectrograms

df = pd.read_pickle("data_transform.pkl")

def show_spec_mod(db_index):
    name = names[db_index]
    frequencies, times, spectrogram = signal.spectrogram(dic[name][1], dic[name][0])
    
    ptss = df[name][2]
    xs = []
    ys = []
    
    for j in ptss:
        ys.append(frequencies[j[0]])
        xs.append(times[j[1]])
        
    coords = [(xs[i], ys[i]) for i in range(len(xs))]
    print("Points: ", end='')
    print(coords)
    
    plt.pcolormesh(times, frequencies, spectrogram)#, cmap='Greys')
    plt.scatter(xs, ys, color="red")
    plt.ylabel('Frequency')
    plt.xlabel('Time')
    plt.title("file: '%s'"%name)
    
interact(show_spec_mod, db_index=widgets.IntSlider(value=0, min=0, max=len(names)-1, step=1))

In [None]:
df

In [None]:
# Plot meow features space

import matplotlib.patches as mpatches

labels = []
for r in df:
    xs = []
    ys = []
    
    frequencies, times, spectrogram = signal.spectrogram(dic[r][1], dic[r][0])
    
    pts = df[r][2]
    for i in pts:
        ys.append(frequencies[i[0]])
        xs.append(times[i[1]])
        
    sit = df[r][5]
    labels.append(sit)
    colors = {"B":"red", "F":"blue", "I":"green"} 

    #if len(set(xs))==len(xs):
    plt.scatter(xs, ys, s=10, c=colors[sit], label = str(sit), alpha=0.75)
        
red = mpatches.Patch(color='red', label='Brushing')
blue = mpatches.Patch(color='blue', label='Feeding')
green = mpatches.Patch(color='green', label='Isolation')

plt.title("Meow feature space")
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.legend(handles=[red, blue, green], loc='upper right')

In [None]:
# Plot loudest frequency and approx meow length

labels = []
for r in df:
    xs = []
    ys = []
    xs.append(df[r][1])
    ys.append(df[r][0])
    
    sit = df[r][5]
    labels.append(sit)
    colors = {"B":"red", "F":"blue", "I":"green"} 

    #if len(set(xs))==len(xs):
    plt.scatter(xs, ys, s=10, c=colors[sit], label = str(sit), alpha=0.75)
    
red = mpatches.Patch(color='red', label='Brushing')
blue = mpatches.Patch(color='blue', label='Feeding')
green = mpatches.Patch(color='green', label='Isolation')

plt.title("Power feature space")
plt.xlabel("Length")
plt.ylabel("Max power")
plt.legend(handles=[blue, red, green], loc='upper right')

In [None]:
# Transpose dataframe for KNN

pwrs = []
lens = []
clss = []

key = {"B":0, "F":1, "I":2}
for j in df:
    pwrs.append(df[j][0])
    lens.append(df[j][1])
    clss.append(key[df[j][5]])

df2 = {"power":pwrs, "lengths": lens, "class":clss}
df2 = pd.DataFrame(df2)
df2.shape
df2

In [None]:
# Build KNN classifier

X_data = df2.drop(['class'],axis=1)
data = pd.DataFrame(X_data,columns=['power', 'length'])
y_data = df2['class']

X_train, X_test, y_train, y_test = train_test_split(df2, y_data,test_size=0.2, random_state = 1)

knn = KNeighborsClassifier()

knn.fit(X_train,y_train)

ypred=knn.predict(X_test)

# Test model

res= confusion_matrix(y_test, ypred)
print('Confusion matrix:')
print(res)
print("\n")

res2 = classification_report(y_test, ypred)
print('Classification report:')
print(res2)

res3 = accuracy_score(y_test,ypred)
print("Accuracy:", res3)

In [None]:
# Redo with different train/test split

X_data = df2.drop(['class'],axis=1)
data = pd.DataFrame(X_data,columns=['power', 'length'])
y_data = df2['class']

X_train, X_test, y_train, y_test = train_test_split(df2, y_data,test_size=0.3, random_state = 3)

knn = KNeighborsClassifier()

knn.fit(X_train,y_train)

ypred=knn.predict(X_test)

# Test model

res= confusion_matrix(y_test, ypred)
print('Confusion matrix:')
print(res)
print("\n")

res2 = classification_report(y_test, ypred)
print('Classification report:')
print(res2)

res3 = accuracy_score(y_test,ypred)
print("Accuracy:", res3)

In [None]:
# Obtain similar accuracy for different splits
# The accuracy is not bad considering it was trained on just 2 columns

# The model is likely underfit and not deployable
# A future approach would be to use a different dataset and perhaps expand on the meow identifying algorithm

# This dataset is likely flawed - The meows are low resolution to begin with and the dataset's claims of correct binning is questionable