In [None]:
import sys
import os
import numpy as np
from pathlib import Path
os.chdir(Path(os.getcwd()).resolve().parents[1])
import setup
from methods import grid_selection_amanda_dynamic
from sklearn.model_selection import ParameterGrid



def writeResults(datasetID, bestScore, bestParams, clfName, distanceMetric, beta=None):
    if distanceMetric == 'BBD' and isinstance(beta,list):
        path = "results/batch/dynamic/gridsearch_amanda_dynamic_{}_EVL-{}-ALLBETAS.txt".format(distanceMetric,clfName)
    elif distanceMetric == 'BBD':
        path = "results/batch/dynamic/gridsearch_amanda_dynamic_{}{:.3f}_EVL-{}.txt".format(distanceMetric, beta,clfName)
    else:
        path = "results/batch/dynamic/gridsearch_amanda_dynamic_{}_EVL-{}.txt".format(distanceMetric,clfName)
        
    file = open(path,"a") 
    string = "{}: {} using {} \n".format(datasetID, bestScore, bestParams)
    file.write(string)
    file.close() 


def main(distanceMetric, beta=None):
    is_windows = sys.platform.startswith('win')
    sep = '\\'
    
    if is_windows == False:
        sep = '/'

    path = os.getcwd()+sep+'data'+sep
    
    #loading datasets
    datasets = [setup.loadCDT, setup.loadCHT, setup.load2CDT, setup.load2CHT, setup.load4CR, setup.load4CRE_V1, 
                setup.load4CRE_V2, setup.load5CVT, setup.loadCSurr, setup.load4CE1CF, setup.loadUG_2C_2D, setup.loadMG_2C_2D, 
                setup.loadFG_2C_2D, setup.loadUG_2C_3D, setup.loadUG_2C_5D, setup.loadGEARS_2C_2D, setup.loadCheckerBoard, 
                setup.loadElecData, setup.loadNOAADataset, setup.loadKeystroke]
    
    #arrClfName = ['SGD', 'NB', 'RF', 'LP', 'KNN']
    arrClfName = ['LP']
    if (distanceMetric == 'BBD') and (beta is None):
        raise Exception(("AMANDA-DCP with BBD must have a beta value. beta = {}").format(str(beta)))
    
    if beta is list:
        allBetas = True
    else:
        allBetas=False
        
    for clfName in arrClfName:
        if distanceMetric == 'BBD' and allBetas:
            print("**************** BEGIN of {}-{} results ****************".format(clfName, distanceMetric))
        elif distanceMetric == 'BBD':
            print("**************** BEGIN of {}-{}{:.3f} results ****************".format(clfName, distanceMetric, beta))            
        else:
            print("**************** BEGIN of {}-{} results ****************".format(clfName, distanceMetric))

        poolSize = None
        isBatchMode = True
        #testing grid search
        for i in range(len(datasets)):
            batches=47
            if i==len(datasets)-2:
                batches=24
            elif i==len(datasets)-1:
                batches=4
            
            finalScore = 0
            best_grid={}
            dataValues, dataLabels, description = datasets[i](path, sep)

            #Train-test split
            availableQty = int(0.5*len(dataLabels))
            availableLabels = dataLabels[:availableQty] 
            availableData = dataValues[:availableQty]

            # 70/30 train/test data
            initialLabeledData = int(0.1*len(availableLabels))
            sizeOfBatch = int((len(availableLabels)-initialLabeledData)/batches)

            print("{}: {} batches of {} instances".format(description, batches, sizeOfBatch))

            tuned_params = [{"sizeOfBatch":[sizeOfBatch], 
                             "batches":[batches], "poolSize":[poolSize], "isBatchMode":[isBatchMode], 
                             "initialLabeledData":[initialLabeledData], "clfName":[clfName],
                            "distanceMetric":[distanceMetric], "beta":[beta]}]
            if clfName == 'LP' or clfName == 'KNN':
                tuned_params[0].update({"K":[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]})

            for g in ParameterGrid(tuned_params):
                averageAccuracy=0
                gs = grid_selection_amanda_dynamic.run(**g)

                try:
                    gs.fit(availableData, availableLabels)
                    averageAccuracy = np.mean(gs.predict())
                    print(averageAccuracy, g)
                    if finalScore < averageAccuracy:
                        finalScore = averageAccuracy
                        best_grid = g
                except Exception:
                    print("An error occured in ", description, g)
                    #raise Exception

            print(finalScore)
            print(best_grid)
            print("=======================================================================================================")

            writeResults(description, finalScore, best_grid, clfName, distanceMetric, beta)
        
        if distanceMetric == 'BBD' and allBetas:
            print("**************** BEGIN of {}-{} results ****************".format(clfName, distanceMetric))
        elif distanceMetric == 'BBD':
            print("******** END of {}-{}{:.3f} results ********".format(clfName, distanceMetric, beta))
        else:
            print("******** END of {}-{} results ********".format(clfName, distanceMetric))
    
if __name__ == "__main__":
    distanceMetric = 'BBD'
    betas = [-50, -1, -0.5, -0.1, 1.001, 1.1, 2, 3, 5, 10, 50]
    
    # Grid search for each beta value - BBD
    for beta in betas:
        main(distanceMetric, beta)
        print('\n\n\n')
        
    # grid search for Hellinger
    distanceMetric = 'Hellinger'
    main(distanceMetric, beta)
    print('\n\n\n')
    
    distanceMetric = 'Hellinger2'
    main(distanceMetric, beta)
    print('\n\n\n')


  from numpy.core.umath_tests import inner1d
  dataValues = pd.DataFrame.as_matrix(dataValues)


**************** BEGIN of LP-BBD-50.000 results ****************
One Class Diagonal Translation. 2 Dimensional data.: 47 batches of 153 instances
99.36127659574468 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
99.36127659574468 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
99.34723404255321 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
99.34723404255321 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
99.37510638297874 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800

  dataValues = pd.DataFrame.as_matrix(dataValues)


69.33617021276596 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
74.48170212765959 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
77.59659574468085 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
77.72191489361701 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
96.0640425531915 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
96.21702127659574 {'K': 7, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetri

  dataValues = pd.DataFrame.as_matrix(dataValues)


53.21829787234042 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
53.12106382978723 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
53.162978723404265 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
53.148936170212764 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
53.16276595744681 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
53.12106382978723 {'K': 7, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMe

  dataValues = pd.DataFrame.as_matrix(dataValues)


53.06553191489362 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
52.56531914893618 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
52.565319148936176 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
52.57936170212767 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
52.551276595744675 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 800, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 153}
52.565531914893626 {'K': 7, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceM

  dataValues = pd.DataFrame.as_matrix(dataValues)


Four Classes Rotating Separated. Bidimensional.: 47 batches of 1382 instances
29.121702127659574 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 7220, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1382}
31.351063829787233 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 7220, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1382}
29.099999999999998 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 7220, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1382}
28.524680851063827 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 7220, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1382}
29.081489361702126 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 7220, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch'

  dataValues = pd.DataFrame.as_matrix(dataValues)


Four Classes Rotating with Expansion V1. Bidimensional.: 47 batches of 1196 instances
23.86851063829788 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 6250, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1196}
22.470638297872338 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 6250, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1196}
22.42191489361702 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 6250, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1196}
22.240851063829787 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 6250, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1196}
22.306382978723406 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 6250, 'isBatchMode': True, 'poolSize': None, 'sizeOf

  dataValues = pd.DataFrame.as_matrix(dataValues)


Four Classes Rotating with Expansion V2. Bidimensional.: 47 batches of 1752 instances
19.580425531914894 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 9150, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1752}
19.138085106382977 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 9150, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1752}
19.560851063829787 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 9150, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1752}
19.21744680851064 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 9150, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1752}
19.48978723404255 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 9150, 'isBatchMode': True, 'poolSize': None, 'sizeOf

  dataValues = pd.DataFrame.as_matrix(dataValues)


23.2731914893617 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 1200, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 229}
25.522553191489354 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 1200, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 229}
22.883191489361703 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 1200, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 229}
23.506382978723398 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 1200, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 229}
23.54361702127659 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 1200, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 229}
23.441276595744675 {'K': 7, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'dist

  dataValues = pd.DataFrame.as_matrix(dataValues)


72.43659574468086 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 2764, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 529}
82.65659574468086 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 2764, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 529}
71.88510638297873 {'K': 4, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 2764, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 529}
72.87489361702127 {'K': 5, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 2764, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 529}
73.71170212765956 {'K': 6, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 2764, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 529}
84.71574468085105 {'K': 7, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanc

  dataValues = pd.DataFrame.as_matrix(dataValues)


Four Classes Expanding and One Class Fixed. Bidimensional.: 47 batches of 1658 instances
97.79340425531917 {'K': 2, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 8662, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1658}
97.77553191489365 {'K': 3, 'batches': 47, 'beta': -50, 'clfName': 'LP', 'distanceMetric': 'BBD', 'initialLabeledData': 8662, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 1658}
