In [72]:
#numpy y pandas
import numpy as np
import pandas as pd

#Conjunto de datos
from sklearn.datasets import load_breast_cancer

#Algoritmo K-Medias
from sklearn.cluster import KMeans

#Validacion Cruzada
from sklearn.model_selection import StratifiedKFold

#Normalizacion de datos
from sklearn.preprocessing import MinMaxScaler

#Moda
from scipy.stats import mode

#Tasa
from sklearn.metrics import accuracy_score

In [73]:
#Cargamos los datos
datos = load_breast_cancer()
datos

#Creamos un DataFrame para visualizar los datos
df = pd.DataFrame(data=datos.data, columns=datos.feature_names)
df

df.insert(loc=len(df.columns), column='Target', value=datos.target)
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


#### Escalado de los datos

In [74]:
X = MinMaxScaler().fit_transform(datos.data)
y = datos.target
X.shape, y.shape

((569, 30), (569,))

Las redes de funcion de base radial estan formadas por 3 capas: una de entrada, una única capa oculta y una capa de salida. El primer parámetro que hay que decidir es el número de neuronas de la capa oculta. Para entrenar la capa oculta utilizaremos el algoritmo K-Medias y para ello necesitamos el numero de clusters que coincidira con el numero de neuronas.

##### Validacion Cruzada (K=10)

In [75]:
K = 10 #10 particiones
kfold = StratifiedKFold(n_splits=K)

min_clusters = 2
max_clusters = 30

#Tabla de resultados
tabla = pd.DataFrame(columns=['n_clusters', 'Tasa_acierto'])

for c in range(min_clusters, max_clusters+1):
    
    #Creacion del agrupamiento de K-Medias
    kmedias = KMeans(n_clusters=c, random_state=0)
    tasa = 0.0
    
    #Validacion Cruzada
    for train_index, test_index in kfold.split(X, y):
        
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        #Entrenamiento
        kmedias.fit(X[train_index], y[train_index])
        
        moda = -1 * np.ones(c, dtype=int)
        
        for i in range(c):
            moda[i] = mode(y_train[np.argwhere(kmedias.labels_ == i)])[0].astype(int)
        
        #Prediccion
        y_predict = moda[kmedias.predict(x_test)]
        
        #Suma de las 10 tasas de acierto
        tasa += accuracy_score(y_test, y_predict)
        
    #Realizamos la media de la tasa de aciertos
    tasa /= 10
    tabla.loc[c] = [c, tasa]
    
tabla    

Unnamed: 0,n_clusters,Tasa_acierto
2,2.0,0.926222
3,3.0,0.905138
4,4.0,0.880576
5,5.0,0.924467
6,6.0,0.88584
7,7.0,0.88938
8,8.0,0.910432
9,9.0,0.915695
10,10.0,0.924436
11,11.0,0.931454


In [76]:
#Cluster optimo y maxima tasa de acierto
cluster_optimo = tabla.Tasa_acierto.argmax()
print("Cluster optimo = ", cluster_optimo + min_clusters, " con tasa de acierto = ", tabla.Tasa_acierto.iloc[cluster_optimo])

Cluster optimo =  28  con tasa de acierto =  0.9578320802005011


#### RBF con SVM en la capa de salida

Una vez encontrado el numero de neuronas optimo, procedemos a realizar el entrenamiento de la RBF mediante validacion cruzada.
Para el entrenamiento de la capa de salida, utilizaremos el algoritmo SVM con función núcleo polinómica.
Finalmente, para encontrar el grado optimo utilizaremos el metodo de ensayo y error.

In [77]:
#Distancias euclideas
from sklearn.metrics.pairwise import euclidean_distances

#SVM
from sklearn.svm import SVC

In [78]:
min_degree = 1
max_degree = 5 #Para exponentes mayores que 5 el sistema empieza a sobrecargarse

tablaFinal = pd.DataFrame(columns=['Grado','Tasa_de_Acierto'])

for c in range(min_degree, max_degree+1):
    
    tasa = 0.0
        
    for train_index, test_index in kfold.split(X,y):
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        #Agrupamiento de K-Means ////Sumarle al n_clusters = cluster_optimo + min_clusters
        kmedias = KMeans(n_clusters=cluster_optimo, random_state=0)
        #Entrenamiento
        kmedias.fit(x_train)
        #Centros de cada cluster
        centros = kmedias.cluster_centers_

        #Matriz de distancias --- Sigma
        distancias = euclidean_distances(centros)
        sigma = np.sum(distancias, axis=0) / (centros.shape[0]-1)
        
        #Salida capa oculta -- Datos entrenamiento
        x_rbf_train = np.zeros((x_train.shape[0], centros.shape[0]), dtype=float)
        for i in range(x_train.shape[0]):
            for j in range(centros.shape[0]):
                x_rbf_train[i][j] = np.exp(-np.sum((x_train[i] - centros[j])**2) / (2.0*(sigma[j]**2)))
        
        #Salida capa oculta -- Datos test
        x_rbf_test = np.zeros((x_test.shape[0], centros.shape[0]), dtype=float)
        for i in range(x_test.shape[0]):
            for j in range(centros.shape[0]):
                x_rbf_test[i][j] = np.exp(-np.sum((x_test[i] - centros[j])**2) / (2.0*(sigma[j]**2)))
                
            
        #SVM --  Máquina de Vectores Soporte de función núcleo polinómica
        svm = SVC(kernel='poly', degree=c)
        svm.fit(x_rbf_train, y_train) #Entrenamiento
        #Prediccion
        y_predict = svm.predict(x_rbf_test)
        
        tasa += accuracy_score(y_test, y_predict)
    
    #Media de tasas
    tasa /= 10
    tablaFinal.loc[c] = [c, tasa]
    
#Tabla con resultados finales
tablaFinal
        

Unnamed: 0,Grado,Tasa_de_Acierto
1,1.0,0.942011
2,2.0,0.936748
3,3.0,0.927976
4,4.0,0.917419
5,5.0,0.917419


#### Grado Optimo y Max Tasa de acierto

In [79]:
grado_optimo = tablaFinal.Tasa_de_Acierto.argmax()
print("Grado optimo = ", grado_optimo + min_degree, " con tasa de acierto = ", tablaFinal.Tasa_de_Acierto.iloc[grado_optimo])


Grado optimo =  1  con tasa de acierto =  0.9420112781954886
