In [2]:
import pandas as pd
import numpy as np
import random 
import matplotlib.pyplot as plt

dataset = ["titanic_train","iris"]

In [3]:
def LeerDatos(filename : str, separa : str, header = True):
    if (header):
        data = pd.read_csv(filename + ".csv", sep =separa, header = 0)
    else:
        data = pd.read_csv(filename+ ".csv", sep = separa, header = None)
    #data = data.sort_values(data.columns[1])
    return data.to_numpy()

In [4]:
def Normalizar_Datos(data : np.array):
    #normal = np.empty_like(data)
    for i in range (0,np.size(data[0])):
        media = np.mean(data[:,i])
        desvi =np.std(data[:,i])
        data[:,i] = (data[:,i] - media)/desvi
    return data

In [5]:
def Crear_k_folds(data : np.array , k:int, clases: []):
    folds = []
    tot_clase = []
    prop_clase = [] #Acumulado de indices
    pre_fold = []

    m = data.shape[0]# np.size(data[:,-1]) #numero de datos
    #n = np.size(data[0])
    for i in clases:
        tot_clase.append(np.count_nonzero( data[:,-1] == i))

    prop_clase.append(tot_clase[0])
    for i in range (1, len(tot_clase)):
        prop_clase.append( prop_clase[i-1] + tot_clase[i])

    pos_ini = 0
    for i in range(0, len(clases)):
        pre_fold.append(np.array_split(data[pos_ini:prop_clase[i]], k))
        pos_ini = prop_clase[i]
    
    for i in range (0,k):
        temp = np.empty( (0,np.size(data[0])) )
        for j in range(0,len(clases)):
            temp = np.vstack( (temp,pre_fold[j][i]))
        folds.append(temp)
            
    return folds

In [6]:
def Sigmoidal(X:np.array, theta:np.array):
    pot = X.dot(theta)
    return 1/(1+ np.exp(-pot))

In [7]:
def Calcular_Funcion_Costo(X: np.array, y:np.array):
    #J(theta) = -1/m[ SUM( y* log(h(x)) + (1-y)*log(1-h(x)))
    m = np.size(X[:,0]) #numero de datos
    costo = 0
    for i in range(0, len(X[0])):
        costo += -1/m * ( np.sum( y[i].dot(np.log(X[i])) + (1-y[i]).dot( np.log(1-X[i]))) )
    return costo

In [8]:
def ds(D : np.array):
    return D*(1-D)

In [9]:
def GenerarW( num_capas : int, dim_capas = []):
    W = {}
    for i in range(0,num_capas+1):
        if (i == 0):
            temp = np.random.randn( dim_capas[i], dim_capas[i+1] )
            W[i] = temp
        if (i != 0):
            temp = np.random.randn( dim_capas[i]+1, dim_capas[i+1] )
            W[i] = temp       
    return W


In [10]:
def Forward (X: np.array, W : {}): 
    A = {}
    h_l = X
    A[0] = h_l
    for i in range(0, len(W)):
        if (i == len(W)-1):
            h_l = Sigmoidal(h_l, W[i])
        else:
            h_l = Sigmoidal(h_l, W[i])
            bias = np.ones( (np.size(h_l[:,0]),1) )
            h_l = np.hstack( (bias,h_l) )
     
        A[i+1] = h_l   
    return A

In [11]:
def Backward (X: np.array, y: np.array, W:{}, A:{}, tasa_apren:float):
    #Actualizacion de W (pesos) de la red por back-propagation   
    #deriv J(theta) = a^l* delta^(l+1)
    #g'(z) = a * (1-a)
    m = np.size(X[:,-1])
    delta_t = (A[len(A)-1] - y) #* ds(A[len(A)-1])
    for i in range(len(W)-1,-1,-1):
        R = tasa_apren* ((A[i].T.dot(delta_t))/ m)
        if (i == len(W)-1):
            W[i]-= R #tasa_apren* (A[i].T.dot(delta_t))/ m
            delta_t = ds(A[i])*(delta_t.dot(W[i].T))
        else:
            R = R[:,1:]
            W[i]-= R # tasa_apren* (A[i].T.dot(delta_t))/ m
        if (i != len(W)-1  and i != 0):
            delta_t = ds(A[i])*(delta_t[:,1:].dot(W[i].T))

In [12]:
def Gradiente_Descendiente(X: np.array, y:np.array, W:{},
                          num_itera:int, tasa_apren:float):
    arr_costo = np.empty(num_itera, dtype =float)
    A = {}
    num_capas = len(W)
 
    for it in range(0, num_itera):
        A = Forward(X, W)
        arr_costo[it] = Calcular_Funcion_Costo(A[num_capas], y)
        Backward(X, y, W, A, tasa_apren)      
    return A[num_capas], arr_costo, W

In [13]:
def TransformacionOneShot(y: np.array, clases:[]):
    num_clases = len(clases)
    vec_clases = np.empty((0,num_clases), dtype = int)
    for i in y:
        idx = clases.index(i)
        vec = [0] * num_clases
        vec[idx] = 1
        vec_clases = np.vstack ((vec_clases, vec))
    return vec_clases

def OneShot_Salida(y:np.array):
    y_cat = np.zeros_like(y)
    max = np.argmax(y, axis = 1)
    for i in range(0, len(max)):
        y_cat[i,max[i]] = 1
    return y_cat

In [14]:
def CalculoParametros(folds:[], k:int, iteraciones:int, alpha:float,
 num_clases:int, num_capa_hidden:int, num_neurona: int, clases:[]):
    arr_costo = []
    arr_theta = []
    arr_test = []
    for test_i in range(0, k):
        test = folds[test_i] 
        train = np.zeros( (0,np.size(folds[0][0])) )
        for train_i in range (0, k):         
            if (train_i == test_i):
                continue
            else:
                train = np.vstack( (train,folds[train_i]) )
            
        costo = []
        X_train = train[:,:-1]
        X_train = X_train.astype('float64')
        X_train = Normalizar_Datos(X_train)

        N = np.size(X_train[:,-1]) #tamaño batch
        D_in = np.size(X_train[0]) #dimension entrada
        D_out = num_clases

        #Generacion array de capas
        array_capas = []
        array_capas.append(D_in)
        for i in range(0, num_capa_hidden):
            array_capas.append(num_neurona)
        array_capas.append(D_out)

        W = GenerarW(num_capa_hidden, array_capas)

        y_train = TransformacionOneShot( train[:,-1], clases)

        theta, costo, W = Gradiente_Descendiente(X_train, y_train, W, iteraciones, alpha)
        arr_theta.append(theta)
        arr_costo.append(costo)
        arr_test.append(test)
      
    return theta, arr_costo, arr_test, W

In [15]:
def Calcular_Accuracy(X:np.array, y:np.array, theta:np.array):
    y_calc = Forward(X, theta)
    y_calc = OneShot_Salida(y_calc[len(y_calc)-1])
    aciertos = 0
    
    for i in (y - y_calc):
        if (np.count_nonzero(i) == 0):
            aciertos += 1
    return aciertos/np.size(y[:,0])
    
def PromedioAccuracy(test:np.array, theta, k, clases):
    accu = np.zeros(k)
    for i in range(0,k):
        X_test = test[i][:,:-1]
        X_test = X_test.astype('float64')
        X_test = Normalizar_Datos(X_test)

        y_test = TransformacionOneShot(test[i][:,-1], clases)

        accu[i] = Calcular_Accuracy(X_test, y_test, theta)
    return accu.mean()

In [16]:
#EXPERIMENTO 1 

In [17]:
#1.1 TITANIC 

In [18]:
#Leer solo los datos necesarios
titanic = LeerDatos(dataset[0],',')
titanic_train = np.c_[titanic[:,0:2],titanic[:,4:6],titanic[:,-3:-2],titanic[:,-1:]]

#quitamos los datos nulos
titanic_train = pd.DataFrame(titanic_train).dropna()
titanic_train = titanic_train.sort_values(titanic_train.columns[1])

#dividimos para normalizar 
ids  = titanic_train.to_numpy()[:,0:1]
survived  = titanic_train.to_numpy()[:,1:2]
sex  = titanic_train.to_numpy()[:,2:3]
age  = titanic_train.to_numpy()[:,3:4]
fare = titanic_train.to_numpy()[:,4:5]
embarked = titanic_train.to_numpy()[:,5:]

#reemplazamos female/male por 1/0
sex = pd.DataFrame(sex).replace({"male": 0, "female": 1})
#reemplazamos Q/S/C por 1/2/3
embarked = pd.DataFrame(embarked).replace({"Q": 1, "S": 2, "C": 3})

age_nor = Normalizar_Datos(age)
fare_nor = Normalizar_Datos(fare)

#juntamos la data
titanic_train_ = np.array(np.c_[ids,sex,age_nor,fare_nor,embarked,survived]) #id,survived,sex,age,fare,embarked 
#print(titanic_train_)

In [19]:
clases = [0,1]
num_clases = len(clases);k = 3

#creamos los folks
fold_titanic = Crear_k_folds(titanic_train_,k,clases)

alpha = [0.5, 0.75,0.9,1.0]
iteraciones = [500,750,1000]

numero_capas = [1,2,3]
num_neurona = [16,32,64]


Matriz_accurracy_prom = np.empty( (len(alpha),len(iteraciones)))

for nc in numero_capas:
    for nn in num_neurona:
        for tasa in range(0,len(alpha)):
            for it in range(0, len(iteraciones)):
                theta, dummy, test, W = CalculoParametros(fold_titanic, k, iteraciones[it], alpha[tasa], num_clases, nc, nn, clases)
                Matriz_accurracy_prom[tasa,it] = PromedioAccuracy(test, W, k, clases)
        print("Con",nc,"capas y ",nn,"neuronas")
        print(pd.DataFrame(Matriz_accurracy_prom, index = alpha, columns = iteraciones))

Con 1 capas y  16 neuronas
          500       750       1000
0.50  0.775260  0.785088  0.793527
0.75  0.776673  0.776649  0.803342
0.90  0.792126  0.786494  0.797699
1.00  0.773848  0.799117  0.789307
Con 1 capas y  32 neuronas
          500       750       1000
0.50  0.782287  0.790720  0.799135
0.75  0.800518  0.801942  0.807520
0.90  0.793497  0.804713  0.796275
1.00  0.789313  0.808927  0.800488
Con 1 capas y  64 neuronas
          500       750       1000
0.50  0.796340  0.810351  0.808956
0.75  0.800524  0.803301  0.803295
0.90  0.794904  0.797687  0.803283
1.00  0.804713  0.778020  0.806102
Con 2 capas y  16 neuronas
          500       750       1000
0.50  0.797693  0.787865  0.796263
0.75  0.785082  0.801895  0.807491
0.90  0.794850  0.797681  0.800470
1.00  0.800476  0.815918  0.806079
Con 2 capas y  32 neuronas
          500       750       1000
0.50  0.800488  0.804678  0.807491
0.75  0.806102  0.815936  0.810310
0.90  0.762567  0.811699  0.776584
1.00  0.748496  0.775195 

In [None]:
#1.2 ESPECIES DE FLORES(IRIS Setosa -Iris versicolor- Iris virginica) 
#Dataset contiene 5 columnas 1-Longitud del sépalo en centímetros. 2-Ancho del sépalo en centímetros.
#3-Longitud del pétalo en centímetros. 4-Ancho del pétalo en centímetros.
#5-Clase.
# url https://unipython.com/clasificacion-multiclase-de-especies-de-flores/

In [21]:
especie_flores = LeerDatos(dataset[1], separa = ',')
#quitamos los datos nulos ordenamos por clase 
especie_flores = pd.DataFrame(especie_flores).dropna()
especie_flores = especie_flores.sort_values(especie_flores.columns[-1])


#dividimos para normalizar 
lng_sep  = Normalizar_Datos(especie_flores.to_numpy()[:,0:1])
anch_sep  = Normalizar_Datos(especie_flores.to_numpy()[:,1:2])
lng_pet  = Normalizar_Datos(especie_flores.to_numpy()[:,2:3])
anch_pet = Normalizar_Datos(especie_flores.to_numpy()[:,3:4])
clase  = especie_flores.to_numpy()[:,4:]

clase = pd.DataFrame(clase).replace({"Iris-setosa": 1, "Iris-versicolor": 2, "Iris-virginica": 3})


#juntamos la data
especie_flores = np.array(np.c_[lng_sep,anch_sep,lng_pet,anch_pet,clase]) #id,survived,sex,age,fare,embarked 
#print(iris)

In [22]:
clases = [1,2,3]
num_clases = len(clases);k = 3

#creamos los folds
fold_iris = Crear_k_folds(especie_flores,k,clases)

alpha = [0.5, 0.75,0.9,1.0]
iteraciones = [200,300,350]

numero_capas = [1,2,3]
num_neurona = [8,10,12]


Matriz_accurracy_prom = np.empty( (len(alpha),len(iteraciones)))

for nc in numero_capas:
    for nn in num_neurona:
        for tasa in range(0,len(alpha)):
            for it in range(0, len(iteraciones)):
                theta, dummy, test, W = CalculoParametros(fold_iris, k, iteraciones[it], alpha[tasa], num_clases, nc, nn, clases)
                Matriz_accurracy_prom[tasa,it] = PromedioAccuracy(test, W, k, clases)
        print ("Con",nc,"capa y ",nn,"neuronas")
        print(pd.DataFrame(Matriz_accurracy_prom, index = alpha, columns = iteraciones))

Con 1 capa y  8 neuronas
           200       300       350
0.50  0.906748  0.933284  0.926618
0.75  0.926618  0.933284  0.946356
0.90  0.933284  0.933284  0.940229
1.00  0.919542  0.959837  0.940229
Con 1 capa y  10 neuronas
           200       300       350
0.50  0.899804  0.926618  0.926618
0.75  0.926618  0.933284  0.933284
0.90  0.913415  0.926618  0.940229
1.00  0.926618  0.953431  0.940229
Con 1 capa y  12 neuronas
           200       300       350
0.50  0.919951  0.939820  0.933284
0.75  0.926618  0.933284  0.946765
0.90  0.933284  0.933284  0.940229
1.00  0.933284  0.946765  0.959837
Con 2 capa y  8 neuronas
           200       300       350
0.50  0.959967  0.979984  0.979984
0.75  0.979984  0.973039  0.979984
0.90  0.979984  0.979984  0.979984
1.00  0.973039  0.979984  0.979984
Con 2 capa y  10 neuronas
           200       300       350
0.50  0.953170  0.966503  0.979984
0.75  0.979984  0.979984  0.979984
0.90  0.979984  0.973448  0.979984
1.00  0.979984  0.979984  0.9799