In [1]:
from img2vec_pytorch import Img2Vec
import os
import warnings
from PIL import Image
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

<h2>Prepare Data</h2>

In [2]:
img2vec = Img2Vec()
scaler = StandardScaler()  
data_dir = "./Spectrograms"
train_dir = os.path.join(data_dir, 'training')
val_dir = os.path.join(data_dir, 'testing')

data = {}
for j, dir_ in enumerate([train_dir, val_dir]):
    features = []
    labels = []
    for category in os.listdir(dir_):
        for img_path in os.listdir(os.path.join(dir_, category)):
            img_path_ = os.path.join(dir_, category, img_path)
            img = Image.open(img_path_).convert('RGB')
            img_features = img2vec.get_vec(img)
            features.append(img_features)
            labels.append(category)
    data[['training_data', 'validation_data'][j]] = features
    data[['training_labels', 'validation_labels'][j]] = labels
scaler.fit(data['training_data'])
data['training_data'] = scaler.transform(data['training_data'])



<h2>Train Model</h2>

In [3]:
warnings.filterwarnings('ignore')
#vvv look at hyper-parameters here when we get to tuning vvv
#https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier
parameters = {
    'hidden_layer_sizes': [(10,), (20,), (10,5), (20,5), (2,5)],
    'activation': ['relu'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.001, 0.01, 0.0001, 0.0005],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'max_iter': [200, 300],
    'batch_size': [1, 10, 20]
}
model = MLPClassifier()
clf = GridSearchCV(model, parameters, cv = 10, scoring = "accuracy")
clf.fit(data['training_data'], data['training_labels'])
print(clf.best_params_)
print(clf.best_score_)
#{'activation': ['relu'], 'alpha': [0.001], 'batch_size': [1], 'hidden_layer_sizes': [(10,)], 'learning_rate': ['invscaling'], 'max_iter': [200], 'solver': ['adam']}
#0.875

{'activation': 'relu', 'alpha': 0.001, 'batch_size': 1, 'hidden_layer_sizes': (20,), 'learning_rate': 'invscaling', 'max_iter': 200, 'solver': 'adam'}
0.8761904761904763


<h2>Test Performance</h2>

In [4]:
y_pred = clf.predict(scaler.transform(data['validation_data']))
score = accuracy_score(y_pred, data['validation_labels'])
print(score)

0.8235294117647058


<h2>Save Model</h2>

In [159]:
#will do once model is trained properly