In [38]:
import pandas as pd
import librosa 
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from FeatureExtractor import FeatureExtractor

In [39]:
%matplotlib inline

In [40]:
FRAME_SIZE = 512
HOP_LENGTH = 256

In [41]:
sound_db = pd.read_csv('./db/UrbanSound8K.csv')

In [42]:
sound_db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   slice_file_name  8732 non-null   object 
 1   fsID             8732 non-null   int64  
 2   start            8732 non-null   float64
 3   end              8732 non-null   float64
 4   salience         8732 non-null   int64  
 5   fold             8732 non-null   int64  
 6   classID          8732 non-null   int64  
 7   class            8732 non-null   object 
dtypes: float64(2), int64(4), object(2)
memory usage: 545.9+ KB


In [43]:
sound_db.describe()

Unnamed: 0,fsID,start,end,salience,fold,classID
count,8732.0,8732.0,8732.0,8732.0,8732.0,8732.0
mean,116033.493816,38.645409,42.253312,1.347,5.385937,4.592877
std,57991.017218,74.292126,74.369669,0.476043,2.84682,2.894544
min,344.0,0.0,0.105962,1.0,1.0,0.0
25%,69942.25,3.0,6.839398,1.0,3.0,2.0
50%,118279.0,10.376492,14.0,1.0,5.0,4.0
75%,166942.0,35.131372,38.866979,2.0,8.0,7.0
max,209992.0,600.125356,604.125356,2.0,10.0,9.0


In [44]:
sound_db.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [45]:
class FeatureExtractor:
    @staticmethod
    def amplitude_envelope(signal: np.ndarray, frame_size: int, hop_length: int) -> np.array:
        ae = []
        for i in range(0, len(signal), hop_length):
            ae.append(max(signal[i:i + frame_size]))
        return np.array(ae)

    @staticmethod
    def root_mean_square(signal: np.ndarray, frame_size: int, hop_size: int) -> np.array:
        return librosa.feature.rms(y=signal, frame_length=frame_size, hop_length=hop_size)

    @staticmethod
    def normalize_feature(feature: np.ndarray) -> float:
        return np.sqrt(np.sum(feature ** 2))

In [46]:
class AudioDataLoader:
    def load_from_df(self, df: pd.DataFrame):
        entries = []
        for i in range(len(df)):
            data = dict()
            file_name, fold, class_ = df.iloc[i][["slice_file_name", "fold", "class"]]
            
            y, sr = librosa.load(f'./db/fold{fold}/{file_name}')
            data['class'] = class_
            data['fold'] = fold
            data['amplitude_envelope'] = FeatureExtractor.normalize_feature(FeatureExtractor.amplitude_envelope(y, frame_size=FRAME_SIZE, hop_length=HOP_LENGTH))
                
            entries.append(data)
        
        return entries

In [47]:
data_loader = AudioDataLoader()
new_data_list = data_loader.load_from_df(sound_db)
new_data_list

[{'class': 'dog_bark', 'fold': 5, 'amplitude_envelope': 2.5011067},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.20763576},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.14198396},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.2545674},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.15470709},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.17995471},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.12388224},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.13788542},
 {'class': 'children_playing', 'fold': 5, 'amplitude_envelope': 0.14960274},
 {'class': 'car_horn', 'fold': 10, 'amplitude_envelope': 0.889928},
 {'class': 'car_horn', 'fold': 10, 'amplitude_envelope': 0.8388904},
 {'class': 'car_horn', 'fold': 10, 'amplitude_envelope': 0.6344568},
 {'class': 'car_horn', 'fold': 10, 'amplitude_envelope': 0.9530522},
 {'class': 'car_horn', 'fold': 10, 'amplit

In [48]:
features_data_frame = pd.DataFrame(new_data_list)
print(features_data_frame)

                 class  fold  amplitude_envelope
0             dog_bark     5            2.501107
1     children_playing     5            0.207636
2     children_playing     5            0.141984
3     children_playing     5            0.254567
4     children_playing     5            0.154707
...                ...   ...                 ...
8727          car_horn     7            0.242281
8728          car_horn     7            0.654602
8729          car_horn     7            0.835565
8730          car_horn     7            0.381664
8731          car_horn     7            0.457979

[8732 rows x 3 columns]


In [49]:
features_data_frame.head()

Unnamed: 0,class,fold,amplitude_envelope
0,dog_bark,5,2.501107
1,children_playing,5,0.207636
2,children_playing,5,0.141984
3,children_playing,5,0.254567
4,children_playing,5,0.154707


In [50]:
features_data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   class               8732 non-null   object 
 1   fold                8732 non-null   int64  
 2   amplitude_envelope  8732 non-null   float32
dtypes: float32(1), int64(1), object(1)
memory usage: 170.7+ KB


In [51]:
features_data_frame.describe()

Unnamed: 0,fold,amplitude_envelope
count,8732.0,8732.0
mean,5.385937,3.245677
std,2.84682,2.799315
min,1.0,0.008834
25%,3.0,1.179502
50%,5.0,2.509311
75%,8.0,4.473047
max,10.0,18.870834


In [56]:
features_data_frame.to_csv('features_data.csv', index=False)