# Entorno de pruebas automatizadas

## Configuración del entorno

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from packages.importer import *
from packages.c45 import *
from packages.binaryTree import *
import packages.classifier as cl
import statistics as stats
import time

## Importar datos

In [2]:
def import_problem(path, sep, lineTerminator, testPer):
    problem = import_csv(path, sep, lineTerminator, testPer)
    return problem

## Construir árbol de decisión

In [3]:
def build_tree(problem, threshold, gainFunc):    
    data = problem.data
    testData = problem.testData
    attributes = problem.attributes
    classes = problem.classes
    classcolumn = problem.classcolumn
    tree = decisionTree(data, attributes, classes, classcolumn, BinaryTree(), threshold = threshold, gainFunc = gainFunc)
    return tree

## Calcular precisión

In [4]:
def calc_accuracy(tree, attributes, testData):
    classifier = cl.Classifier(attributes)
    accuracy = classifier.classifyDataFrame(tree, testData)
    return accuracy

## Ejecutar pruebas

In [7]:
def test_exe(path, sep, lineTerminator, testPer, threshold, gainFunc, n):
    accuracy_array = []
    start = time.time()
    for i in range(n):
        problem = import_problem(path, sep, lineTerminator, testPer)
        tree = build_tree(problem, threshold, gainFunc)
        accuracy = calc_accuracy(tree, problem.attributes, problem.testData)
        accuracy_array.append(accuracy)
        print('Corrida ' + str(i + 1) + ' | Precisión: ' + str(accuracy))
    end = time.time()
    totalTime = end - start
    maxAcc = max(accuracy_array)
    minAcc = min(accuracy_array)
    meanAcc = stats.mean(accuracy_array)
    stDevAcc = stats.pstdev(accuracy_array)
    dic = {
        'results': accuracy_array,
        'max': maxAcc,
        'min': minAcc,
        'mean': meanAcc,
        'stDev': stDevAcc,
        'time': totalTime
    }
    return dic

## Ejecuciones paralelas

### Prueba 1

In [11]:
path = 'datasets/test_2.csv'
sep = ','
lineTerminator = '\n'
testPer = 0.1
threshold = 0.1
gainFunc = 'gain'
n = 5
dic = test_exe(path, sep, lineTerminator, testPer, threshold, gainFunc, n)
for key, value in dic.items():
    print(key, ': ', value)

Corrida 1 | Precisión: 0.94
Corrida 2 | Precisión: 0.98
Corrida 3 | Precisión: 0.97
Corrida 4 | Precisión: 0.99
Corrida 5 | Precisión: 0.96
results :  [0.94, 0.98, 0.97, 0.99, 0.96]
max :  0.99
min :  0.94
mean :  0.968
stDev :  0.017204650534085267
time :  79.98787903785706
