In [1]:
import pandas as pd
import librosa 
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from FeatureExtractor import FeatureExtractor

In [2]:
%matplotlib inline

In [3]:
FRAME_SIZE = 512
HOP_LENGTH = 256

In [4]:
sound_db = pd.read_csv('./db/UrbanSound8K.csv')

In [5]:
sound_db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   slice_file_name  8732 non-null   object 
 1   fsID             8732 non-null   int64  
 2   start            8732 non-null   float64
 3   end              8732 non-null   float64
 4   salience         8732 non-null   int64  
 5   fold             8732 non-null   int64  
 6   classID          8732 non-null   int64  
 7   class            8732 non-null   object 
dtypes: float64(2), int64(4), object(2)
memory usage: 545.9+ KB


In [6]:
sound_db.describe()

Unnamed: 0,fsID,start,end,salience,fold,classID
count,8732.0,8732.0,8732.0,8732.0,8732.0,8732.0
mean,116033.493816,38.645409,42.253312,1.347,5.385937,4.592877
std,57991.017218,74.292126,74.369669,0.476043,2.84682,2.894544
min,344.0,0.0,0.105962,1.0,1.0,0.0
25%,69942.25,3.0,6.839398,1.0,3.0,2.0
50%,118279.0,10.376492,14.0,1.0,5.0,4.0
75%,166942.0,35.131372,38.866979,2.0,8.0,7.0
max,209992.0,600.125356,604.125356,2.0,10.0,9.0


In [7]:
sound_db.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [8]:
class FeatureExtractor:
    @staticmethod
    def amplitude_envelope(signal: np.ndarray, frame_size: int, hop_length: int) -> np.array:
        ae = []
        for i in range(0, len(signal), hop_length):
            ae.append(max(signal[i:i + frame_size]))
        return np.array(ae)

    @staticmethod
    def root_mean_square(signal: np.ndarray, frame_size: int, hop_length: int) -> np.array:
        return librosa.feature.rms(y=signal, frame_length=frame_size, hop_length=hop_length)

    @staticmethod
    def zero_crossing_rate(signal: np.ndarray, frame_size: int, hop_length: int) -> np.ndarray:
        return librosa.feature.zero_crossing_rate(y=signal, frame_length=frame_size, hop_length=hop_length)

    @staticmethod
    def normalize_feature(feature: np.ndarray) -> float:
        return np.sqrt(np.sum(feature ** 2))

In [9]:
class AudioDataLoader:
    def load_from_df(self, df: pd.DataFrame):
        entries = []
        for i in range(len(df)):
            if i > 10:
                break
            
            data = dict()
            file_name, fold, class_ = df.iloc[i][["slice_file_name", "fold", "class"]]

            y, sr = librosa.load(f'./db/fold{fold}/{file_name}')
            data['class'] = class_
            data['fold'] = fold
            data['amplitude_envelope'] = FeatureExtractor.normalize_feature(FeatureExtractor.amplitude_envelope(y, frame_size=FRAME_SIZE, hop_length=HOP_LENGTH))
            data['root_mean_square'] = FeatureExtractor.normalize_feature(FeatureExtractor.root_mean_square(y, frame_size=FRAME_SIZE, hop_length=HOP_LENGTH))
            data['zero_crossing_rate'] = FeatureExtractor.normalize_feature(FeatureExtractor.zero_crossing_rate(y, frame_size=FRAME_SIZE, hop_length=HOP_LENGTH))
                
            entries.append(data)
        
        return entries

In [10]:
data_loader = AudioDataLoader()
new_data_list = data_loader.load_from_df(sound_db)
new_data_list

[{'class': 'dog_bark',
  'fold': 5,
  'amplitude_envelope': 2.5011067,
  'root_mean_square': 0.85973305,
  'zero_crossing_rate': 0.701484631675251},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_envelope': 0.20763576,
  'root_mean_square': 0.07694757,
  'zero_crossing_rate': 2.7154218977469546},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_envelope': 0.14198396,
  'root_mean_square': 0.056997657,
  'zero_crossing_rate': 1.999477318175537},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_envelope': 0.2545674,
  'root_mean_square': 0.09267064,
  'zero_crossing_rate': 2.646289055292365},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_envelope': 0.15470709,
  'root_mean_square': 0.060585294,
  'zero_crossing_rate': 2.3008832147927967},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_envelope': 0.17995471,
  'root_mean_square': 0.0806928,
  'zero_crossing_rate': 1.5342353213377127},
 {'class': 'children_playing',
  'fold': 5,
  'amplitude_e

In [11]:
features_data_frame = pd.DataFrame(new_data_list)
print(features_data_frame)

               class  fold  amplitude_envelope  root_mean_square  \
0           dog_bark     5            2.501107          0.859733   
1   children_playing     5            0.207636          0.076948   
2   children_playing     5            0.141984          0.056998   
3   children_playing     5            0.254567          0.092671   
4   children_playing     5            0.154707          0.060585   
5   children_playing     5            0.179955          0.080693   
6   children_playing     5            0.123882          0.053665   
7   children_playing     5            0.137885          0.059891   
8   children_playing     5            0.149603          0.065169   
9           car_horn    10            0.889928          0.321656   
10          car_horn    10            0.838890          0.310045   

    zero_crossing_rate  
0             0.701485  
1             2.715422  
2             1.999477  
3             2.646289  
4             2.300883  
5             1.534235  
6       

In [12]:
features_data_frame.head()

Unnamed: 0,class,fold,amplitude_envelope,root_mean_square,zero_crossing_rate
0,dog_bark,5,2.501107,0.859733,0.701485
1,children_playing,5,0.207636,0.076948,2.715422
2,children_playing,5,0.141984,0.056998,1.999477
3,children_playing,5,0.254567,0.092671,2.646289
4,children_playing,5,0.154707,0.060585,2.300883


In [13]:
features_data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   class               11 non-null     object 
 1   fold                11 non-null     int64  
 2   amplitude_envelope  11 non-null     float32
 3   root_mean_square    11 non-null     float32
 4   zero_crossing_rate  11 non-null     float64
dtypes: float32(2), float64(1), int64(1), object(1)
memory usage: 480.0+ bytes


In [14]:
features_data_frame.describe()

Unnamed: 0,fold,amplitude_envelope,root_mean_square,zero_crossing_rate
count,11.0,11.0,11.0,11.0
mean,5.909091,0.507286,0.185277,1.682394
std,2.0226,0.718446,0.244904,0.656144
min,5.0,0.123882,0.053665,0.701485
25%,5.0,0.145793,0.060238,1.376374
50%,5.0,0.179955,0.076948,1.46118
75%,5.0,0.546729,0.201358,2.15018
max,10.0,2.501107,0.859733,2.715422


In [15]:
features_data_frame.to_csv('features_data.csv', index=False)