In [47]:
import librosa, os
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm


from IPython.display import Audio

##### Function to get dominant frequency from audio sample

In [5]:
def get_dom_freq(file_name):
  audio_samples, sample_rate = librosa.load(file_name, sr=None)
  w = np.fft.fft(audio_samples)
  freqs = np.fft.fftfreq(len(w))
  idx = np.argmax(np.abs(w))
  freq = freqs[idx]
  freq_in_hertz = abs(freq * sample_rate)
  return freq_in_hertz

In [2]:
audio_dataset_path='/content/drive/MyDrive/'
metadata=pd.read_excel('/content/drive/MyDrive/Data.xlsx')

##### Extract dominant frequency from each sample in dataset
Add to pandas dataframe for analysis

In [6]:
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path),str(row["Path"]))
    final_class_labels=row["Category"]
    data=get_dom_freq(file_name)
    extracted_features.append([data,final_class_labels])

871it [06:56,  2.09it/s]


In [8]:
df=pd.DataFrame(extracted_features,columns=['feature','class'])
df.head()

Unnamed: 0,feature,class
0,438.575899,A
1,438.508303,A
2,438.182419,A
3,438.052647,A
4,437.858559,A


##### Get Mean, Median and Range of dominant frequencies for each category

In [20]:
categories = df['class'].unique()
actual_exp_vals = {'A': 440.00, 'B': 493.88, 'C_high': 523.25, 'C_low': 261.63, 
                   'D': 293.66, 'E': 329.63, 'F': 349.23, 'G': 392.00, 'backg': 'NA'}
for i in categories:
  temp_df = df[(df['class']==i)]
  print(i)
  print("Expected", actual_exp_vals[i])
  print("Mean", temp_df['feature'].mean())
  print("Median", temp_df['feature'].median())
  print("Range", temp_df['feature'].min(), "-", temp_df['feature'].max(), temp_df['feature'].max()-temp_df['feature'].min())
  print()

A
Expected 440.0
Mean 452.1485930945203
Median 438.8838955654455
Range 437.21017813552095 - 881.8124634548258 444.6022853193048

B
Expected 493.88
Mean 515.7970914521438
Median 497.3439269826157
Range 90.24435617678114 - 1512.7505098986292 1422.506153721848

C_high
Expected 523.25
Mean 755.5064335770558
Median 525.4554943783282
Range 49.68012650247528 - 2650.378205885829 2600.698079383354

C_low
Expected 261.63
Mean 262.3392614368394
Median 262.1367759614783
Range 260.5750909405855 - 265.7553241588614 5.18023321827593

D
Expected 293.66
Mean 298.1430780344393
Median 298.1437767395249
Range 295.36662168241116 - 300.2252862243014 4.858664541890221

E
Expected 329.63
Mean 329.65892819324006
Median 329.5522646696039
Range 328.768597808927 - 330.7071575622783 1.938559753351342

F
Expected 349.23
Mean 356.99625930564633
Median 357.02970485811966
Range 352.63538302485796 - 361.06228403766846 8.426901012810504

G
Expected 392.0
Mean 408.5151253597809
Median 396.1291196099269
Range 390.65195081

In [44]:
def get_top_five_freq(file_name):
  audio_samples, sample_rate = librosa.load(file_name, sr=None)
  w = np.fft.fft(audio_samples)
  freqs = np.fft.fftfreq(len(w))
  print(freqs[:10])
  print(freqs[-10:])
  for i in range(5):
    idx = np.argmax(np.abs(w))
    freq = freqs[idx]
    freq_in_hertz = abs(freq * sample_rate)
    print(freq_in_hertz)
    w = np.delete(w, idx)
    freqs = np.delete(freqs, idx)
  return freq_in_hertz

In [45]:
get_top_five_freq('/content/drive/MyDrive/Notes/C (low)/1648419817170.wav')

[0.00000000e+00 7.64561065e-06 1.52912213e-05 2.29368320e-05
 3.05824426e-05 3.82280533e-05 4.58736639e-05 5.35192746e-05
 6.11648852e-05 6.88104959e-05]
[-7.64561065e-05 -6.88104959e-05 -6.11648852e-05 -5.35192746e-05
 -4.58736639e-05 -3.82280533e-05 -3.05824426e-05 -2.29368320e-05
 -1.52912213e-05 -7.64561065e-06]
261.64502958851324
261.64502958851324
261.98220101839536
261.98220101839536
261.3078581586312


261.3078581586312