In [None]:
# Import pertinent ML functions
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score

# Import the Models
from sklearn.ensemble import GradientBoostingClassifier

# Import other important libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import loguniform
import pickle

gtzan_feature_file = '../Data/gtzan_features.csv'

def load_gtzan_set(filename):
  df = pd.read_csv(filename)
  X, y = np.split(df.to_numpy(),[-1],axis=1)
  X = X.astype(np.float64)
  y = y.T[0]
  return X, y, df.columns.values

grad_boost_hp = {
  'learning_rate': [0.001, 0.01, 0.1],
  'max_depth': [3, 5, 7],
  'max_features': ['sqrt', 'log2', None]
}

scaling_to_test = [None, 'MinMax', 'MaxAbs']

def full_data_experiment(scaling, model, hp_space):
  # Configure the outer cross-validation procedure
  outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
  outer_results = {'scaling': scaling, 'acc': [], 'best_params': []}

  iter = 1

  for train_ix, test_ix in outer_cv.split(gtzan_X):
    print(iter)
    # Split data
    X_train, X_test = gtzan_X[train_ix, :], gtzan_X[test_ix, :]
    y_train, y_test = gtzan_y[train_ix], gtzan_y[test_ix]

    # Scale the input data if it applies
    if scaling is not None:
      scaler = MinMaxScaler() if scaling is 'MinMax' else MaxAbsScaler()
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

    # Configure the inner cross-validation procedure
    cv_inner = KFold(n_splits=4, shuffle=True, random_state=1)

    # Define the Random Search & refit best model on whole training set
    search = GridSearchCV(model, hp_space, scoring='accuracy', cv=cv_inner, verbose=True, refit=True, n_jobs=-1)
 
    # Execute the Random Search with the PCA-transformed train set
    result = search.fit(X_train, y_train)

    # Get the best performing model fit on the whole training set
    best_model = result.best_estimator_

    # Evaluate model on the PCA-transformed test set
    preds = best_model.predict(X_test)
 
    # Evaluate the model
    acc = accuracy_score(y_test, preds)
 
    # Store the results
    outer_results['acc'].append(acc)
    outer_results['best_params'].append(result.best_params_)

    iter += 1

  return outer_results



In [None]:
gtzan_X, gtzan_y, gtzan_feature_list = load_gtzan_set(gtzan_feature_file)

In [None]:
gb_results = []
base_gb_model = GradientBoostingClassifier()

In [None]:
for scaling in scaling_to_test:
  print(str(scaling))
  results = full_data_experiment(scaling=scaling, model=base_gb_model, hp_space=grad_boost_hp)
  gb_results.append(results)

None
1
Fitting 4 folds for each of 27 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 16.0min
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed: 40.3min finished


2
Fitting 4 folds for each of 27 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
