In [8]:
# Import pertinent ML functions
from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score

# Import the Models
from sklearn.linear_model import LogisticRegression

# Import other important libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import loguniform
import pickle

gtzan_feature_file = '../Data/gtzan_features.csv'

def load_gtzan_set(filename):
  df = pd.read_csv(filename)
  X, y = np.split(df.to_numpy(),[-1],axis=1)
  X = X.astype(np.float64)
  y = y.T[0]
  return X, y, df.columns.values

log_reg_hp = {
  'penalty': ['l2', 'l1'],
  'C': loguniform(0.01, 100),
  'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
  'multi_class': ['ovr', 'multinomial'],
}

scaling_to_test = [None, 'MinMax', 'MaxAbs']

def full_data_experiment(scaling, model, hp_space):
  # Configure the outer cross-validation procedure
  outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
  outer_results = {'scaling': scaling, 'acc': [], 'best_params': []}

  iter = 1

  for train_ix, test_ix in outer_cv.split(gtzan_X):
    print(iter)
    # Split data
    X_train, X_test = gtzan_X[train_ix, :], gtzan_X[test_ix, :]
    y_train, y_test = gtzan_y[train_ix], gtzan_y[test_ix]

    # Scale the input data if it applies
    if scaling is not None:
      scaler = MinMaxScaler() if scaling == 'MinMax' else MaxAbsScaler()
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

    # Configure the inner cross-validation procedure
    cv_inner = KFold(n_splits=4, shuffle=True, random_state=1)

    # Define the Random Search & refit best model on whole training set
    search = RandomizedSearchCV(model, hp_space, scoring='accuracy', n_iter=100, cv=cv_inner, verbose=True, refit=True, n_jobs=-1)
 
    # Execute the Random Search with the PCA-transformed train set
    result = search.fit(X_train, y_train)

    # Get the best performing model fit on the whole training set
    best_model = result.best_estimator_

    # Evaluate model on the PCA-transformed test set
    preds = best_model.predict(X_test)
 
    # Evaluate the model
    acc = accuracy_score(y_test, preds)
 
    # Store the results
    outer_results['acc'].append(acc)
    outer_results['best_params'].append(result.best_params_)

    iter += 1

  return outer_results

In [12]:
gtzan_X, gtzan_y, gtzan_feature_list = load_gtzan_set(gtzan_feature_file)

In [13]:
log_reg_results = []
base_log_reg_model = LogisticRegression(fit_intercept=True)

In [15]:
results = full_data_experiment(scaling=None, model=base_log_reg_model, hp_space=log_reg_hp)
log_reg_results.append(results)

1
Fitting 4 folds for each of 100 candidates, totalling 400 fits


 0.70037037 0.1837037  0.18296296 0.15       0.16148148 0.72777778
 0.21777778        nan 0.69703704 0.47148148        nan 0.16074074
 0.46703704 0.52296296        nan 0.18407407        nan 0.51740741
        nan        nan 0.69444444        nan        nan 0.15037037
 0.52481481 0.68481481        nan        nan        nan        nan
 0.4662963  0.72925926 0.68703704 0.7               nan 0.16074074
 0.18407407 0.68481481        nan 0.18407407        nan 0.16111111
        nan        nan        nan        nan 0.15037037        nan
 0.47259259        nan 0.21740741        nan 0.14962963        nan
 0.51888889 0.72592593        nan        nan        nan        nan
 0.52185185 0.18407407        nan        nan        nan        nan
 0.15037037 0.69518519 0.47074074 0.69777778 0.69888889 0.16074074
        nan 0.72407407 0.15074074 0.51703704 0.15074074 0.15
 0.16074074        nan        nan 0.69888889 0.68518519        nan
        nan 0.16074074        nan 0.15       0.69555556 0.4737037
  

2
Fitting 4 folds for each of 100 candidates, totalling 400 fits


KeyboardInterrupt: 

In [None]:
print('Done')