In [2]:
import torch
from kymatio.torch import Scattering1D
import os
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from VoicesDataset import VoicesDataset # A class for data loading.
from utils import * 

import pickle

import decimal
from sklearn import svm
from sklearn import model_selection
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [3]:
def prepare_data(data,labels):
    # Prepare signals for scattering tranform.
    labels = labels.transpose()
    data = data.transpose()
    data = torch.tensor(data)
    L = data.shape[1]
    # Chop signals
    T = 2 ** 14
    data = data[:, int(L / 2 - T / 2):int(L / 2 + T / 2)]
    data = data.contiguous()
    data = data.type(dtype=torch.float32)
    return data, labels

In [4]:
def runHyperparametersOpt(order_coeffs, type_feats):
    # Load both datasets
    # Path to the dataset:
    ds_path = '/content/gdrive/MyDrive/scatter_signal_typing/'
    ds = VoicesDataset("saar", p = ds_path)
    data_saar, labels_saar = ds[:]
    data_saar, labels_saar = prepare_data(data_saar,labels_saar)
    ds = VoicesDataset("meei", p = ds_path)
    data_meei, labels_meei = ds[:]
    data_meei, labels_meei = prepare_data(data_meei,labels_meei)

    max_acc_results = dict()  
    # Parameters and hyperparameters array
    fs = 25000.0
    J = np.array([i for i in range(3,15)])
    G = np.arange(0, 1.1, .1)
  

    acc = np.zeros((len(J),len(G)))
    std = np.zeros((len(J),len(G)))
    use_cuda = torch.cuda.is_available()
    
    if use_cuda:
      print('Using cuda...')
    else:
      print('Not using cuda')

    max_acc = 0  
    for j in range(len(J)):
      # print(J[j])
      for g in range(len(G)):
        X_meei = get_feature_vector(data_meei, J[j], G[g], order_coeffs, type_feats)
        X_saar = get_feature_vector(data_saar, J[j], G[g], order_coeffs, type_feats)
        
        # Partition data into hyperparameters determination and validation sets.
        X_val_meei, X_hp_meei, y_val_meei, y_hp_meei = model_selection.train_test_split(X_meei, labels_meei,
                                                                                        test_size=0.2, shuffle=True,
                                                                                        random_state = 3) 
        X_val_saar, X_hp_saar, y_val_saar, y_hp_saar = model_selection.train_test_split(X_saar, labels_saar,
                                                                                        test_size=0.2, shuffle=True,
                                                                                        random_state = 3) 
        X_hp = np.concatenate((X_hp_meei, X_hp_saar), axis=0)
        y_hp = np.concatenate((y_hp_meei, y_hp_saar), axis=0)
        results = hp_fun(X_hp, y_hp)
        acc[j,g], std[j,g] = results['accuracy'] 
        # Save data and hp corresponding to the best results for latter.
        if acc[j,g] > max_acc:
            max_acc = acc[j,g]
            std_max_acc = std[j,g]
            max_acc_results['accuracy'] = (acc[j,g], std[j,g])
            max_acc_results['Jmax'] = J[j]
            max_acc_results['Gmax'] = G[g]
            max_acc_results['meei_val_set'] = (y_val_meei, X_val_meei) 
            max_acc_results['saar_val_set'] = (y_val_saar, X_val_saar) 
            # X_meei_max = X_meei
            #X_saar_max = X_saar

    hp_ind = np.where(acc == np.amax(acc))
    print('J max = {}, g max = {}'.format(max_acc_results['Jmax'], max_acc_results['Gmax']))
    print(max_acc)
    print(std_max_acc)
    return max_acc_results


In [5]:
st_coeffs = [['order0', 'order1'],['order0', 'order2'],['order0', 'order1', 'order2']]
type_feats = [['tv'],['mean'],['tv','mean']]

orders_dict = dict()
list_of_results = list()
max_acc = np.zeros((len(st_coeffs),len(type_feats)))

for i, orders in enumerate(st_coeffs):
  arch_dict = dict()
  for j, tf in enumerate(type_feats):
    print(orders+tf)
    max_acc_results = runHyperparametersOpt(orders, tf)
    list_of_results.append(max_acc_results)
    max_acc[i,j], std_max_acc = max_acc_results['accuracy']
    arch_dict['+'.join(tf)]  = [max_acc[i,j], std_max_acc]
  orders_dict['+'.join(orders)] = arch_dict


['order0', 'order1', 'tv']
Using cuda...
J max = 6, g max = 0.0
0.7432941176470589
0.05014678952642696
['order0', 'order1', 'mean']
Using cuda...
J max = 12, g max = 0.2
0.6050980392156864
0.10175968101236642
['order0', 'order1', 'tv', 'mean']
Using cuda...
J max = 6, g max = 0.0
0.7114509803921569
0.07089623436878696
['order0', 'order2', 'tv']
Using cuda...
J max = 7, g max = 0.5
0.8065882352941177
0.04450550208113645
['order0', 'order2', 'mean']
Using cuda...
J max = 14, g max = 0.0
0.7669803921568628
0.06526621319763813
['order0', 'order2', 'tv', 'mean']
Using cuda...
J max = 7, g max = 0.6000000000000001
0.8145882352941175
0.049761229192975626
['order0', 'order1', 'order2', 'tv']
Using cuda...
J max = 7, g max = 0.4
0.7985882352941177
0.05426668877118256
['order0', 'order1', 'order2', 'mean']
Using cuda...
J max = 14, g max = 0.1
0.7551372549019607
0.05866205287509281
['order0', 'order1', 'order2', 'tv', 'mean']
Using cuda...
J max = 8, g max = 0.1
0.7988235294117647
0.075285947269

In [6]:
dic2df(orders_dict)

Unnamed: 0,Unnamed: 1,tv,mean,tv+mean
order0+order1,0,0.743294,0.605098,0.711451
order0+order1,1,0.050147,0.10176,0.070896
order0+order2,0,0.806588,0.76698,0.814588
order0+order2,1,0.044506,0.065266,0.049761
order0+order1+order2,0,0.798588,0.755137,0.798824
order0+order1+order2,1,0.054267,0.058662,0.075286


In [9]:
file_path = '/content/gdrive/MyDrive/scatter_signal_typing/'
ind = 6# np.argmax(max_acc, axis=None)
print(ind)
print(max_acc[np.unravel_index(ind, max_acc.shape)])

# Save dictionary in disc to use later in the validation.
with open(file_path + 'results_hp.pkl', 'wb') as f:
    pickle.dump(list_of_results[ind], f)

6
0.7985882352941177
