In [None]:
!pip install opendatasets librosa soundfile



## Import Library

In [None]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/franciscoaliss/music-instrument-stems")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: bondantm
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/franciscoaliss/music-instrument-stems
Downloading music-instrument-stems.zip to ./music-instrument-stems


100%|██████████| 912M/912M [00:04<00:00, 219MB/s]





In [None]:
import os
import librosa
import soundfile as sf

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
import seaborn as sns
from sklearn.metrics import confusion_matrix


import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [None]:
acoustic_guitar_dir = '/content/music-instrument-stems/mega_augmented_ds/mega_augmented_ds/Acoustic Guitar'
piano_dir = '/content/music-instrument-stems/mega_augmented_ds/mega_augmented_ds/Piano'

audio_data = []
labels = []

def load_audio_data(directory, label):
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):
            filepath = os.path.join(directory, filename)
            try:
                # Load audio file
                data, sample_rate = librosa.load(filepath)
                audio_data.append(data)
                labels.append(label)
            except Exception as e:
                print(f"Error loading {filepath}: {e}")

load_audio_data(acoustic_guitar_dir, 'Acoustic Guitar')
load_audio_data(piano_dir, 'Piano')

# You can now process the audio_data and labels further as needed
print(f"Loaded {len(audio_data)} audio files.")
print(f"Labels: {np.unique(labels)}")

Loaded 2128 audio files.
Labels: ['Acoustic Guitar' 'Piano']


# Task
Perform feature extraction, undersampling (optional), scaling, and data splitting on the audio data loaded from "/content/music-instrument-stems/mega_augmented_ds/mega_augmented_ds/Acoustic Guitar" and "/content/music-instrument-stems/mega_augmented_ds/mega_augmented_ds/Piano".

## Feature extraction

### Subtask:
Extract relevant features from the audio data, such as Mel-Frequency Cepstral Coefficients (MFCCs).


**Reasoning**:
Extract MFCC features from the loaded audio data and store the mean of the MFCCs for each audio file in a list, then convert the list to a NumPy array.



In [None]:
features = []
for data in audio_data:
    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=data, sr=22050) # Using default sample rate of librosa
    # Calculate the mean of MFCCs across the time axis
    mfccs_processed = np.mean(mfccs.T, axis=0)
    features.append(mfccs_processed)

# Convert the features list to a NumPy array
features = np.array(features)

print(f"Extracted features shape: {features.shape}")

Extracted features shape: (2128, 20)


## Under sampling (optional)

### Subtask:
If the dataset is imbalanced, consider applying undersampling to balance the classes.


**Reasoning**:
Check the distribution of the labels to determine if undersampling is needed.



In [None]:
unique_labels, counts = np.unique(labels, return_counts=True)
print("Label distribution before undersampling:")
for label, count in zip(unique_labels, counts):
    print(f"{label}: {count}")

# Check for imbalance (e.g., if the ratio between the largest and smallest class is significant)
if counts.max() / counts.min() > 1.5: # Using a threshold of 1.5 as an example for imbalance
    print("\nDataset is imbalanced, applying undersampling.")
    from imblearn.under_sampling import RandomUnderSampler
    rus = RandomUnderSampler(random_state=42)
    features_resampled, labels_resampled = rus.fit_resample(features, labels)
    features = features_resampled
    labels = labels_resampled
    print("Label distribution after undersampling:")
    unique_labels, counts = np.unique(labels, return_counts=True)
    for label, count in zip(unique_labels, counts):
        print(f"{label}: {count}")
else:
    print("\nDataset is balanced or the imbalance is not significant enough for undersampling.")


Label distribution before undersampling:
Acoustic Guitar: 836
Piano: 1292

Dataset is imbalanced, applying undersampling.
Label distribution after undersampling:
Acoustic Guitar: 836
Piano: 836


## Scaling

### Subtask:
Scale the extracted features to a standard range.


**Reasoning**:
Import StandardScaler, instantiate it, fit it to the features, and transform the features.



In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(features)
features_scaled = scaler.transform(features)

print(f"Scaled features shape: {features_scaled.shape}")

Scaled features shape: (1672, 20)


## Data splitting

### Subtask:
Split the data into training and testing sets.


**Reasoning**:
Split the scaled features and labels into training and testing sets.



**Reasoning**:
Convert the labels list to a NumPy array before printing the shape.



In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

y_labels_array = np.array(labels)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, y_labels_array, test_size=0.2, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (1337, 20)
Shape of X_test: (335, 20)
Shape of y_train: (1337,)
Shape of y_test: (335,)
