In [186]:
from tqdm import tqdm
import torch
import torchaudio
import os
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.utils.data import DataLoader
import pandas as pd

In [187]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [188]:
TARGET_LENGTH = 256
TRAINING_DATA_PATH = 'data/Training'
TESTING_DATA_PATH = 'data/Test'
BATCH_SIZE = 32
INPUT_SIZE = TARGET_LENGTH

In [189]:

def get_file_list(dir, item_list = []):
    for item in os.listdir(dir):
        path = os.path.join(dir, item)
        if os.path.isdir(path):
            items = get_file_list(path, item_list)
            for item in items:
                item_list.append(item)
        else:
            item_list.append(path)
    return list(set(item_list))

In [190]:
train_list = get_file_list(TRAINING_DATA_PATH)
test_list = get_file_list(TESTING_DATA_PATH)
class_list = os.listdir(TRAINING_DATA_PATH)

In [191]:
def getDataset(item_list):
    dataset = []
    for item in tqdm(item_list):
        # Load an audio file
        waveform, sample_rate = torchaudio.load(item, normalize=True)

        # Extract pitch using Harmonic Product Spectrum (HPS)
        pitch = torchaudio.functional.detect_pitch_frequency(waveform, sample_rate, frame_time=0.02)
        label = os.path.basename(os.path.dirname(item))
        if pitch.shape[0] > 1:
            pitch = torch.mean(pitch, dim=0, keepdim=True)
        pitch = pitch[0]
        dataset.append((
            pitch.max().numpy(),
            pitch.min().numpy(),
            pitch.mean().numpy(),
            pitch.std().numpy(),
            pitch.median().numpy(), 
            len(pitch),
            pitch.max().numpy() - pitch.min().numpy(),
            label,
            class_list.index(label)
        ))
    df = pd.DataFrame(dataset, columns=['pitch_max', 'pitch_min', 'pitch_mean', 'pitch_std', 'pitch_median','pitch_length', 'pitch_range', 'label', 'label_class'])
    return df

In [192]:
train_data = getDataset(train_list)
test_data = getDataset(test_list)

100%|██████████| 1440/1440 [01:54<00:00, 12.62it/s]
100%|██████████| 1760/1760 [02:05<00:00, 13.99it/s]


In [193]:
all_data = pd.concat([train_data, test_data])

In [194]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [195]:
X, y = all_data[['pitch_max', 'pitch_min', 'pitch_mean', 'pitch_std', 'pitch_median',
         'pitch_length', 'pitch_range']], all_data['label_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=all_data['label_class'])

In [196]:
logs = []

In [197]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", KNeighborsClassifier())
])

In [198]:
parameter = {
    "algo__n_neighbors": range(1, 51),
    "algo__weights": ['uniform','distance'],
    "algo__p": [1,2]
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "KNeighborsClassifier"
])

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [199]:
import pickle

In [200]:
pickle.dump(model, open('models/knnc.pkl', 'wb'))

In [201]:
from sklearn.ensemble import RandomForestClassifier

In [202]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", RandomForestClassifier())
])

In [203]:
parameter = {
    "algo__max_samples": range(1, 20),
    "algo__max_depth": range(1,20)
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "RandomForestClassifier"
])

Fitting 3 folds for each of 361 candidates, totalling 1083 fits


In [204]:
pickle.dump(model, open('models/rfc.pkl', 'wb'))

In [205]:
from sklearn.svm import SVC

In [206]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", SVC())
])

In [207]:
parameter = {
    "algo__tol": [0.0001, 0.001, 0.01, 0.1],
    "algo__C": [1.0, 2.0]
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "SVC"
])

Fitting 3 folds for each of 8 candidates, totalling 24 fits


In [208]:
pickle.dump(model, open('models/svc.pkl', 'wb'))

In [209]:
from sklearn.linear_model import LogisticRegression

In [210]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", LogisticRegression(multi_class='multinomial'))
])

In [211]:
parameter = {
    "algo__tol": [0.0001, 0.001, 0.01, 0.1],
    "algo__C": [1.0, 2.0]
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "LogisticRegression"
])

Fitting 3 folds for each of 8 candidates, totalling 24 fits


In [212]:
pickle.dump(model, open('models/lr.pkl', 'wb'))

In [213]:
from sklearn.neural_network import MLPClassifier

In [214]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", MLPClassifier())
])

In [215]:
parameter = {
    "algo__tol": [0.0001, 0.001, 0.01, 0.1],
    "algo__max_iter": [1000, 2000]
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "MLPClassifier"
])

Fitting 3 folds for each of 8 candidates, totalling 24 fits


In [216]:
pickle.dump(model, open('models/mlp.pkl', 'wb'))

In [217]:
from sklearn.ensemble import GradientBoostingClassifier

In [218]:
pipeline = Pipeline([
    ("prep", StandardScaler()),
    ("algo", GradientBoostingClassifier())
])

In [219]:
parameter = {
    "algo__subsample": [.1,.2,.3,.4,.5, .6, .7, .8, .9, 1.0],
    "algo__max_depth": range(1,8)
}

model = GridSearchCV(pipeline, parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)
best_params_ = model.best_params_
best_score_ = model.best_score_
score_test = model.score(X_test, y_test)
score_train = model.score(X_train, y_train)
logs.append([
    best_params_,
    best_score_,
    score_test,
    score_train,
    "GradientBoostingClassifier"
])

Fitting 3 folds for each of 70 candidates, totalling 210 fits


In [220]:
pickle.dump(model, open('models/gbc.pkl', 'wb'))

In [221]:
log_df = pd.DataFrame(logs, columns=['best_params', 'best_score', 'score_test', 'score_train', 'model'])
log_df.to_csv('logs.csv', index=False)
log_df

Unnamed: 0,best_params,best_score,score_test,score_train,model
0,"{'algo__n_neighbors': 1, 'algo__p': 1, 'algo__...",0.67773,0.817187,0.970313,KNeighborsClassifier
1,"{'algo__max_depth': 11, 'algo__max_samples': 19}",0.44531,0.439063,0.471484,RandomForestClassifier
2,"{'algo__C': 2.0, 'algo__tol': 0.01}",0.203904,0.21875,0.276953,SVC
3,"{'algo__C': 1.0, 'algo__tol': 0.1}",0.179291,0.175,0.194531,LogisticRegression
4,"{'algo__max_iter': 2000, 'algo__tol': 0.0001}",0.355855,0.371875,0.439063,MLPClassifier
5,"{'algo__max_depth': 6, 'algo__subsample': 0.6}",0.822655,0.885938,0.974609,GradientBoostingClassifier
