In [1]:
import pandas as pd
import numpy as np
import Orange
import pickle
from sklearn.model_selection import StratifiedKFold #Ayudara a dividir los dataframes k veces para la validacion cruzada

### Cargue de los datasets

In [2]:
tic_tac_toe_data =Orange.data.Table("Datasets/Tic-Tac-Toe Endgame Data Set/Tic-Tac-Toe Endgame Data Set.xlsx")
credit_approval_data = Orange.data.Table("Datasets/Credit Approval Data Set/Credit Approval Data Set.xlsx")
wine_data = Orange.data.Table("Datasets/Wine Dataset/Wine Dataset.xlsx")

### Importando modelos arbol J48 creados en orange

In [3]:
tic_tac_toe_model_url="Datasets/Tic-Tac-Toe Endgame Data Set/Tree.pkcls"
credit_approval_model_url="Datasets/Credit Approval Data Set/Tree.pkcls"
wine_model_url="Datasets/Wine Dataset/Tree.pkcls"

with open(tic_tac_toe_model_url, "rb") as model1:
    tic_tac_toe_tree = pickle.load(model1)
    
with open(credit_approval_model_url, "rb") as model2:
    credit_approval_tree = pickle.load(model2)
    
with open(wine_model_url, "rb") as model3:
    wine_tree = pickle.load(model3)

In [4]:
tic_tac_toe_tree

TreeModel(data=<?>, root=<Orange.tree.DiscreteNode at 0x19e4dc9f1f0>)

In [5]:
credit_approval_tree

TreeModel(data=<?>, root=<Orange.tree.DiscreteNode at 0x19e4e5bdd30>)

In [6]:
wine_tree

TreeModel(data=<?>, root=<Orange.tree.NumericNode at 0x19e4e5c7f10>)

### Se crean las funciones para hacer prueba por reescritura, leave one out y cross validation

Funcion para reescritura

In [7]:
def reescritura(datos, arbol, class_column_name):
    original_target = datos.get_column_view(class_column_name)[0].astype('int').tolist()
    predicciones = arbol(datos).tolist()
    #print('________________________________')

    #print('Original: '+str(original_target))
    #print('Predicciones: '+str(predicciones))
    
    #print('_______________________________')
    good_predictions=0
    for t,p in zip(original_target, predicciones):
        if t == p:
           good_predictions=good_predictions+1
    
    exactitud = good_predictions/len(original_target)
    return  exactitud

Funciones para validacion cruzada

In [8]:
def promedio(lst):
    return sum(lst) / len(lst)

In [9]:
def sub_arbol(data, class_column_name, train_index, test_index):
    train = data[min(train_index):max(train_index)]
    test = data[min(test_index):max(test_index)]
    tr = Orange.classification.TreeLearner()
    arbol = tr(train)
    exactitud = reescritura(test, arbol, class_column_name)
    return exactitud

In [10]:
def cross_validation(data, class_column_name, ndivisiones=5):
    X=pd.DataFrame(np.zeros((len(data), 1)))
    y=pd.DataFrame(np.zeros((len(data), 1)))
    skf = StratifiedKFold(n_splits=ndivisiones)
    lista_exactitud = []
    for k, (train_index, test_index) in enumerate(skf.split(X, y)):
        train_index = train_index.tolist()
        test_index=test_index.tolist()
        #print(len(train_index))
        #print(len(test_index))
        exactitud = sub_arbol(data, class_column_name, train_index, test_index)
        lista_exactitud.append(exactitud)
    return lista_exactitud

Funcion para leave one out

In [11]:
#def leave_one_out(datos):

#### Analisis para Tic-Tac-Toe Endgame Data Set (Datos Simbolicos)

Reescritura

In [12]:
exactitud1 = reescritura(tic_tac_toe_data, tic_tac_toe_tree, 'Class')

In [13]:
print(f'En reescritura la exactitud es {exactitud1}')

En reescritura la exactitud es 0.941544885177453


Validacion cruzada

In [14]:
lista_exactitud_tic_tac_toe=cross_validation(tic_tac_toe_data, 'Class', 5)

In [15]:
print(lista_exactitud_tic_tac_toe)
print('En validacion cruzada la exactitud es',promedio(lista_exactitud_tic_tac_toe))

[0.4397905759162304, 1.0, 1.0, 1.0, 0.3157894736842105]
En validacion cruzada la exactitud es 0.7511160099200882


#### Analisis para Credit Approval Data Set (Dataset con missing values)

Reescritura

In [16]:
exactitud2 = reescritura(credit_approval_data, credit_approval_tree, 'Class')

In [17]:
print(f'En reescritura la exactitud es {exactitud2}')

En reescritura la exactitud es 0.936231884057971


Validacion Cruzada

In [18]:
lista_exactitud_credit_approval=cross_validation(credit_approval_data, 'Class', 5)

In [19]:
print(lista_exactitud_credit_approval)
print('En validacion cruzada la exactitud es',promedio(lista_exactitud_credit_approval))

[0.6423357664233577, 0.9708029197080292, 0.9708029197080292, 0.9781021897810219, 0.8467153284671532]
En validacion cruzada la exactitud es 0.8817518248175183


#### Analisis para wine dataset

Reescritura

In [20]:
exactitud3 = reescritura(wine_data,wine_tree,'target')

In [21]:
print(f'En reescritura la exactitud es {exactitud3}')

En reescritura la exactitud es 0.9887640449438202


Validacion Cruzada

In [22]:
lista_exactitud_wine=cross_validation(wine_data, 'target', 5)

In [23]:
print(lista_exactitud_wine)
print('En validacion cruzada la exactitud es',promedio(lista_exactitud_wine))

[0.9142857142857143, 0.9428571428571428, 1.0, 1.0, 0.7058823529411765]
En validacion cruzada la exactitud es 0.9126050420168067
