# Musical Instrument's Sound Classifier 

### Dataset : Kaggle's [Musical Instrument's Sound Dataset](https://www.kaggle.com/datasets/soumendraprasad/musical-instruments-sound-dataset) 7.2GB

In [39]:
import os
import torch 
import torchaudio
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from torch import nn

In [40]:
'''
class Musical_Dataset(Dataset):
    
    def __init__(self, annotations_file, audio_dir, transform=None):
        self.annotations_file = annotations_file
        self.audio_dir = audio_dir
        self.transform  = transform
        
    def __len__(self):
        return len(self.annotations_file)

    def __getitem__(self, idx):
        
        label = self.annotations_file.iloc[idx, 1]
        
        audio_path = os.path.join(self.audio_dir, self.annotations_file.iloc[idx, 0])
        audio, sr = torchaudio.load(audio_path)
        
        return audio, label
'''

'\nclass Musical_Dataset(Dataset):\n    \n    def __init__(self, annotations_file, audio_dir, transform=None):\n        self.annotations_file = annotations_file\n        self.audio_dir = audio_dir\n        self.transform  = transform\n        \n    def __len__(self):\n        return len(self.annotations_file)\n\n    def __getitem__(self, idx):\n        \n        label = self.annotations_file.iloc[idx, 1]\n        \n        audio_path = os.path.join(self.audio_dir, self.annotations_file.iloc[idx, 0])\n        audio, sr = torchaudio.load(audio_path)\n        \n        return audio, label\n'

In [41]:
root_path = "/Users/akhilsharma/Datasets/Musical_Instrument"
audio_dir = os.path.join(root_path, 'Train_submission', 'Train_submission')
labels_csv = pd.read_csv(root_path + "/Metadata_Train.csv")

labels_csv

Unnamed: 0,FileName,Class
0,1-E1-Major 00.wav,Sound_Guitar
1,1-E1-Major 01.wav,Sound_Guitar
2,1-E1-Major 02.wav,Sound_Guitar
3,1-E1-Major 03.wav,Sound_Guitar
4,1-E1-Major 04.wav,Sound_Guitar
...,...,...
2624,strange-piano-73881.wav,Sound_Piano
2625,the-last-piano-112677.wav,Sound_Piano
2626,this-is-war-version-e-95411.wav,Sound_Piano
2627,tired-ghosts-piano-65013.wav,Sound_Piano


In [42]:
# mds = Musical_Dataset(annotations_file=labels_csv, 
#                       audio_dir=audio_dir)

## DIVIDING THE AUDIO IN CHUNKS 
we will break the audio in chunks and save them to the new location and also reduce the size of original dataset to train the model using small number of data

- Reducing/Converting all the audio in `8000` sample-rate 
- Converting all mono audio into `single-channel` audio 
- dividing all the audio in `1-SECOND`  of audio

In [43]:
REQUIRED_SAMPLE_RATE = 8000
REQUIRED_DURATION_SECOND = 1

AUDIO_CHUNKS_DESTINATION_PATH = "/Users/akhilsharma/Datasets/Musical_Instrument/New_Train/audio"
METADATA_CHUNKS_CSV_DESTINATION_PATH = "/Users/akhilsharma/Datasets/Musical_Instrument/New_Train/metadata.csv"

target_csv = pd.read_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH)
target_csv.head()

Unnamed: 0,FileName,Class


In [44]:
## Create a empty CSV file
target_csv = pd.read_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH)

new_data = {
    'FileName': [],
    'Class': []
}
    
new_data = pd.DataFrame(new_data)
new_data.to_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH, index=False)

In [45]:
def divide_single_audio_and_save(signal, sr, audio_name, label):

    signal = resample_to_8K(signal, sr)
    signal = merge_if_required(signal) 
    cnt = 0
    
    NO_OF_TOTAL_SAMPLES = REQUIRED_SAMPLE_RATE * REQUIRED_DURATION_SECOND
    
    for idx in range(0, signal.shape[1], NO_OF_TOTAL_SAMPLES):
        audio_part = signal[:, idx: idx+NO_OF_TOTAL_SAMPLES]
        
        if audio_part.shape[1] == NO_OF_TOTAL_SAMPLES: 
            
            # save the audio file
            save(signal=audio_part,  
                audio_name= audio_name, 
                index = cnt, 
                label=label
            )
            
            cnt += 1
            
    return audio


def save(signal, index, audio_name, label):
    
    audio_name = str(index) + "_" + audio_name    
    destination_path = os.path.join(AUDIO_CHUNKS_DESTINATION_PATH, audio_name)
    torchaudio.save(destination_path , signal, sample_rate=REQUIRED_SAMPLE_RATE)


    # save the label to csv
    save_metadata(filename=audio_name, 
                  label=label)
    
def save_metadata(filename, label):
        
    target_csv = pd.read_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH)
    
    new_data = { 
        'FileName': [filename],
        'Class': [label]
    }
    
    new_data = pd.DataFrame(new_data)
    updated_df = pd.concat([target_csv, new_data], ignore_index=True)
    updated_df.to_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH, index=False)


def resample_to_8K(audio, sr):
    """
    Resample an audio signal to the (8000) required sample rate.
    """
    audio = torchaudio.functional.resample(audio, orig_freq=sr, new_freq=REQUIRED_SAMPLE_RATE)
    return audio

def merge_if_required(audio):
    """ 
    Merge an audio signal if it has multiple channels. 
    """
    if audio.shape[0] > 1:
        audio = torch.mean(audio, dim=0, keepdim=True)
    return audio

Iterate over every audio and **'Divide the audio-files and save it in the new directory with their metadata'**

In [46]:
from tqdm import tqdm

TOTAL_DATASET = len(labels_csv)
# TOTAL_DATASET = 2

for idx in tqdm(range(TOTAL_DATASET)): 
    
    audio_name = labels_csv.iloc[idx, 0] # audio (eg. file_name.wav)
    label = labels_csv.iloc[idx, 1] # label (eg. "Sound_Guitar")
    
    audio_path = os.path.join(audio_dir, audio_name) 
    audio, sr = torchaudio.load(audio_path)
    
    #? Divide the audio and save it in the new directory with their metadata 
    divide_single_audio_and_save(audio, sr, audio_name, label)

100%|██████████| 2629/2629 [25:28<00:00,  1.72it/s]


In [82]:
new_metadata_df = pd.read_csv(METADATA_CHUNKS_CSV_DESTINATION_PATH)
new_metadata_df

Unnamed: 0,FileName,Class
0,0_1-E1-Major 00.wav,Sound_Guitar
1,1_1-E1-Major 00.wav,Sound_Guitar
2,0_1-E1-Major 01.wav,Sound_Guitar
3,1_1-E1-Major 01.wav,Sound_Guitar
4,0_1-E1-Major 02.wav,Sound_Guitar
...,...,...
45826,9_toy-piano-27311.wav,Sound_Piano
45827,10_toy-piano-27311.wav,Sound_Piano
45828,11_toy-piano-27311.wav,Sound_Piano
45829,12_toy-piano-27311.wav,Sound_Piano


In [83]:
files = os.listdir(AUDIO_CHUNKS_DESTINATION_PATH)
print("Total Audio files available : ", len(files))
files[:10]

Total Audio files available :  36129


['31_classical_1_60BPM.wav',
 '29_ROOM_room7_MUS_scale_DEV_stereomic.wav',
 '2_WaveDrum02_45SD (6).wav',
 '14_ROOM_room3_MUS_pachelbel_DEV_lg.wav',
 '12_ROOM_room5_MUS_scale_DEV_iphone.wav',
 '9_ROOM_room5_MUS_bartok_DEV_lg.wav',
 '11_WaveDrum02_39KD (116).wav',
 '1_WaveDrum02_39KD (6).wav',
 '13_ROOM_room4_MUS_swing_DEV_iphone.wav',
 '10_dr-tribal-percussion-triplet-loop-high-passed-106bpm-25935.wav']

In [84]:
new_metadata_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45831 entries, 0 to 45830
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   FileName  45831 non-null  object
 1   Class     45831 non-null  object
dtypes: object(2)
memory usage: 716.2+ KB


In [85]:
new_metadata_df["Class"].unique()

array(['Sound_Guitar', 'Sound_Drum', 'Sound_Violin', 'Sound_Piano'],
      dtype=object)

In [93]:
mp = {"Sound_Guitar":0,  'Sound_Drum':0 , 'Sound_Violin':0, 'Sound_Piano':0}
for i in ls: 
    mp[i] = mp[i] + 1
mp

{'Sound_Guitar': 9722,
 'Sound_Drum': 9692,
 'Sound_Violin': 9692,
 'Sound_Piano': 16725}

In [89]:
# Check for missing values
print(new_metadata_df.isnull().sum())

FileName    0
Class       0
dtype: int64


In [92]:
files_in_dir = os.listdir(AUDIO_CHUNKS_DESTINATION_PATH)

print("Total Audio files available : ", len(files))
files[:5]

Total Audio files available :  36129


['31_classical_1_60BPM.wav',
 '29_ROOM_room7_MUS_scale_DEV_stereomic.wav',
 '2_WaveDrum02_45SD (6).wav',
 '14_ROOM_room3_MUS_pachelbel_DEV_lg.wav',
 '12_ROOM_room5_MUS_scale_DEV_iphone.wav']

In [108]:
Sound_Guitar_DF = new_metadata_df[new_metadata_df.Class == "Sound_Guitar"]

In [None]:
Sound_Drum_DF = new_metadata_df[new_metadata_df.Class == "Sound_Drum"]

In [104]:
Sound_Violin_DF = new_metadata_df[new_metadata_df.Class == "Sound_Violin"]

In [105]:
Sound_Piano_DF = new_metadata_df[new_metadata_df.Class == "Sound_Piano"]