**Musical Instrument Chord Classification (Audio)**

https://www.kaggle.com/datasets/deepcontractor/musical-instrument-chord-classification

In [1]:
!unzip /content/archive.zip -d /content/

Archive:  /content/archive.zip
  inflating: /content/Audio_Files/Major/Major_0.wav  
  inflating: /content/Audio_Files/Major/Major_1.wav  
  inflating: /content/Audio_Files/Major/Major_10.wav  
  inflating: /content/Audio_Files/Major/Major_100.wav  
  inflating: /content/Audio_Files/Major/Major_101.wav  
  inflating: /content/Audio_Files/Major/Major_102.wav  
  inflating: /content/Audio_Files/Major/Major_103.wav  
  inflating: /content/Audio_Files/Major/Major_104.wav  
  inflating: /content/Audio_Files/Major/Major_105.wav  
  inflating: /content/Audio_Files/Major/Major_106.wav  
  inflating: /content/Audio_Files/Major/Major_107.wav  
  inflating: /content/Audio_Files/Major/Major_108.wav  
  inflating: /content/Audio_Files/Major/Major_109.wav  
  inflating: /content/Audio_Files/Major/Major_11.wav  
  inflating: /content/Audio_Files/Major/Major_110.wav  
  inflating: /content/Audio_Files/Major/Major_111.wav  
  inflating: /content/Audio_Files/Major/Major_112.wav  
  inflating: /content/A

### Kaggle code

In [2]:
import os
import IPython
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import wavfile
from scipy.fft import fft, fftfreq
from scipy.signal import spectrogram, find_peaks
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
def find_harmonics(path, print_peaks=False):
    fs, X = wavfile.read(path)
    N = len(X)
    X_F = fft(X)
    X_F_onesided = 2.0/N * np.abs(X_F[0:N//2])
    freqs = fftfreq(N, 1/fs)[:N//2]
    freqs_50_index = np.abs(freqs - 50).argmin()

    h = X_F_onesided.max()*5/100
    peaks, _ = find_peaks(X_F_onesided, distance=10, height = h)
    peaks = peaks[peaks>freqs_50_index]
    harmonics = np.round(freqs[peaks],2)

    if print_peaks:
        i = peaks.max() + 100
        plt.plot(freqs[:i], X_F_onesided[:i])
        plt.plot(freqs[peaks], X_F_onesided[peaks], "x")
        plt.xlabel('Frequency [Hz]')
        plt.show()
    return harmonics

In [4]:
find_harmonics("/content/Audio_Files/Major/Major_102.wav")

array([ 155.56,  196.  ,  278.22,  311.11,  349.78,  392.  ,  466.67,
        494.22,  556.44,  588.44,  622.67,  699.11,  778.67,  784.89,
        834.67,  935.11,  988.44, 1049.78, 1091.11, 1112.89, 1178.67,
       1248.  , 1376.  , 1391.56, 1399.11, 1481.78, 1573.78, 1670.22,
       1949.33, 1976.44, 2449.33, 2471.56])

In [5]:
path = "/content/Audio_Files"
data = []
max_harm_length = 0 # i will keep track of max harmonic length for naming columns

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        foldername = os.path.basename(dirname)
        full_path = os.path.join(dirname, filename)
        freq_peaks = find_harmonics(full_path)

        max_harm_length = max(max_harm_length, len(freq_peaks))

        cur_data = [foldername, filename]
        cur_data.extend([freq_peaks.min(), freq_peaks.max(), len(freq_peaks)])
        cur_data.extend(freq_peaks)

        data.append(cur_data)

In [6]:
cols = ["Chord Type", "File Name", "Min Harmonic", "Max Harmonic", "# of Harmonics"]
for i in range(max_harm_length):
    cols.append("Harmonic {}".format(i+1))

# Creating DataFrame
df = pd.DataFrame(data, columns=cols)
df.head()

Unnamed: 0,Chord Type,File Name,Min Harmonic,Max Harmonic,# of Harmonics,Harmonic 1,Harmonic 2,Harmonic 3,Harmonic 4,Harmonic 5,...,Harmonic 29,Harmonic 30,Harmonic 31,Harmonic 32,Harmonic 33,Harmonic 34,Harmonic 35,Harmonic 36,Harmonic 37,Harmonic 38
0,Minor,Minor_300.wav,185.0,2200.45,22,185.0,262.27,329.55,370.45,440.0,...,,,,,,,,,,
1,Minor,Minor_198.wav,110.0,2102.27,27,110.0,155.45,220.0,262.27,311.36,...,,,,,,,,,,
2,Minor,Minor_145.wav,97.73,876.82,12,97.73,175.0,195.91,233.18,294.55,...,,,,,,,,,,
3,Minor,Minor_322.wav,104.55,1980.45,24,104.55,175.0,209.09,247.27,294.55,...,,,,,,,,,,
4,Minor,Minor_31.wav,130.91,1052.73,15,130.91,155.91,261.82,311.36,316.36,...,,,,,,,,,,


In [7]:
df_original = df.copy()
df = df_original.copy()

for i in range(1,21):
    curr_interval = "Interval {}".format(i)
    curr_harm = "Harmonic {}".format(i+1)
    prev_harm = "Harmonic {}".format(i)
    df[curr_interval] = df[curr_harm].div(df[prev_harm], axis=0)

df.head()

Unnamed: 0,Chord Type,File Name,Min Harmonic,Max Harmonic,# of Harmonics,Harmonic 1,Harmonic 2,Harmonic 3,Harmonic 4,Harmonic 5,...,Interval 11,Interval 12,Interval 13,Interval 14,Interval 15,Interval 16,Interval 17,Interval 18,Interval 19,Interval 20
0,Minor,Minor_300.wav,185.0,2200.45,22,185.0,262.27,329.55,370.45,440.0,...,1.054807,1.067165,1.060634,1.060626,1.178848,1.005891,1.193523,1.116856,1.044688,1.143669
1,Minor,Minor_198.wav,110.0,2102.27,27,110.0,155.45,220.0,262.27,311.36,...,1.064956,1.164495,1.008248,1.011083,1.121254,1.059214,1.122513,1.039836,1.014166,1.127717
2,Minor,Minor_145.wav,97.73,876.82,12,97.73,175.0,195.91,233.18,294.55,...,1.250974,,,,,,,,,
3,Minor,Minor_322.wav,104.55,1980.45,24,104.55,175.0,209.09,247.27,294.55,...,1.188273,1.059743,1.180138,1.010395,1.118704,1.062008,1.12197,1.048961,1.192211,1.006782
4,Minor,Minor_31.wav,130.91,1052.73,15,130.91,155.91,261.82,311.36,316.36,...,1.19141,1.008144,1.167439,1.145405,,,,,,


In [8]:
for i in range(2,14):
    curr_interval = "Interval {}_1".format(i)
    curr_harm = "Harmonic {}".format(i)
    df[curr_interval] = df[curr_harm].div(df["Harmonic 1"], axis=0)

In [9]:
df["Chord Type"] = df["Chord Type"].replace("Major", 1)
df["Chord Type"] = df["Chord Type"].replace("Minor", 0)

columns = ["Interval 4_1", "Interval 5_1", "Interval 6_1"]
train_X, val_X, train_y, val_y = train_test_split(df[columns], df["Chord Type"], test_size=0.40, random_state=0)

train_X.head()

  df["Chord Type"] = df["Chord Type"].replace("Minor", 0)


Unnamed: 0,Interval 4_1,Interval 5_1,Interval 6_1
585,1.890964,2.003173,2.522391
403,1.886064,2.11405,2.37572
532,1.789367,2.004342,2.530587
104,2.0,2.246505,2.378428
303,2.0,2.386936,2.835853


In [10]:
lr = LogisticRegression(random_state=0)
knn = KNeighborsClassifier()
svc = SVC(random_state=0)
gnb = GaussianNB()
dtc = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)

score_lr = cross_val_score(lr, train_X, train_y, cv=10).mean()
score_knn = cross_val_score(knn, train_X, train_y, cv=10).mean()
score_svc = cross_val_score(svc, train_X, train_y, cv=10).mean()
score_gnb = cross_val_score(gnb, train_X, train_y, cv=10).mean()
score_dtc = cross_val_score(dtc, train_X, train_y, cv=10).mean()
score_rfc = cross_val_score(rfc, train_X, train_y, cv=10).mean()

In [11]:
print("Cross Val Score for Logistic Regression: {:.2f}".format(score_lr))
print("Cross Val Score for KNeighbors Classifier: {:.2f}".format(score_knn))
print("Cross Val Score for SVC: {:.2f}".format(score_svc))
print("Cross Val Score for Gaussian NB: {:.2f}".format(score_gnb))
print("Cross Val Score for Decision Tree Classifier: {:.2f}".format(score_dtc))
print("Cross Val Score for Random Forest Classifier: {:.2f}".format(score_rfc))

Cross Val Score for Logistic Regression: 0.59
Cross Val Score for KNeighbors Classifier: 0.85
Cross Val Score for SVC: 0.63
Cross Val Score for Gaussian NB: 0.59
Cross Val Score for Decision Tree Classifier: 0.89
Cross Val Score for Random Forest Classifier: 0.89


In [12]:
total_acc = []
for i in range(200):
    classifier = RandomForestClassifier(random_state=i)

    classifier.fit(train_X, train_y)
    pred_y = classifier.predict(val_X)
    acc = accuracy_score(val_y, pred_y)
    total_acc.append(acc)
print("Accuracy Score: {:.4f}".format(np.mean(total_acc)))

Accuracy Score: 0.9038


In [13]:
columns = ["Interval 1", "Interval 2", "Interval 3", "Interval 4"]
columns.extend(["Interval 4_1", "Interval 5_1", "Interval 6_1"])
train_X, val_X, train_y, val_y = train_test_split(df[columns], df["Chord Type"], test_size=0.40, random_state=0)

total_acc2 = []
for i in range(200):
    classifier = RandomForestClassifier(random_state=i)

    classifier.fit(train_X, train_y)
    pred_y = classifier.predict(val_X)
    acc = accuracy_score(val_y, pred_y)
    total_acc.append(acc)
print("Accuracy Score: {:.4f}".format(np.mean(total_acc)))

Accuracy Score: 0.9195


In [14]:
cm = confusion_matrix(val_y, pred_y)
acc = accuracy_score(val_y, pred_y)

print("Confusion Matrix:")
print(cm)
print("Accuracy Score: {:.2f}".format(acc))

Confusion Matrix:
[[133  12]
 [  7 192]]
Accuracy Score: 0.94


### Basic instruments code

In [15]:
import librosa
import numpy as np
from tqdm import tqdm

SAMPLE_RATE = 22050
DURATION = 4
MAX_LEN = SAMPLE_RATE * DURATION

def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)

    y, _ = librosa.effects.trim(y)
    y = librosa.util.normalize(y)
    y = librosa.effects.harmonic(y)

    if len(y) < MAX_LEN:
        y = np.pad(y, (0, MAX_LEN - len(y)), mode='constant')
    else:
        y = y[:MAX_LEN]

    # Tonnetz — "гармонические координаты"
    y_harmonic = librosa.effects.harmonic(y)
    tonnetz = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
    tonnetz_mean = tonnetz.mean(axis=1)
    tonnetz_std = tonnetz.std(axis=1)

    features = np.concatenate([
        tonnetz_mean, tonnetz_std,
    ])
    return features

In [16]:
import os
from glob import glob

base_dir = '/content/Audio_Files'
classes = ['Major', 'Minor']
paths, labels = [], []

for label in classes:
    folder = os.path.join(base_dir, label)
    files = glob(os.path.join(folder, '*.wav'))
    paths.extend(files)
    labels.extend([label] * len(files))

X = []
for path in tqdm(paths):
    features = extract_features(path)
    X.append(features)

X = np.array(X)
y = np.array(labels)

100%|██████████| 859/859 [07:15<00:00,  1.97it/s]


In [17]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

le = LabelEncoder()
y_encoded = le.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
import xgboost as xgb

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=200),
    'SVM (RBF)': SVC(kernel='rbf', C=5, gamma='scale'),
    'KNN (k=5)': KNeighborsClassifier(n_neighbors=1),
    'Naive Bayes': GaussianNB(priors=[0.575, 0.425]),
    'Decision Tree': DecisionTreeClassifier(),
    'XGBoost': xgb.XGBClassifier(eval_metric='mlogloss')
}

print("🔍 Cross-Validation Accuracy (5-fold):\n")

for name, model in models.items():
    scores = cross_val_score(model, X_scaled, y_encoded, cv=5, scoring='accuracy')
    print(f"{name:20s}: {scores.mean():.4f} ± {scores.std():.4f}")

🔍 Cross-Validation Accuracy (5-fold):

Logistic Regression : 0.6915 ± 0.0286
Random Forest       : 0.8731 ± 0.0086
SVM (RBF)           : 0.8522 ± 0.0224
KNN (k=5)           : 0.8440 ± 0.0254
Naive Bayes         : 0.7230 ± 0.0313
Decision Tree       : 0.8417 ± 0.0109
XGBoost             : 0.8580 ± 0.0178
