In [12]:
# Paso 1. Librerias y carga de datos

from sklearn.model_selection import train_test_split # Para separar los datos en entrenamiento y prueba
from sklearn.preprocessing import StandardScaler # Para escalar los datos
from sklearn.datasets import load_breast_cancer # Para los datos que voy a usar
from sklearn.neural_network import MLPClassifier # Libreria Red Neuronal
from sklearn.metrics import classification_report, confusion_matrix # para evaluación

In [2]:
cancer = load_breast_cancer()

In [3]:
cancer.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [4]:
# Paso 2. Análisis exploratorio de mis datos
print(cancer['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    worst/largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 0 is Mean Radius, field
    10 is Radius SE, field 20 is Worst Radius.

    - 

In [5]:
cancer['data'].shape

(569, 30)

In [6]:
cancer['target']

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [7]:
# Paso 3. División de dataset y entrenamiento del modelo

X = cancer['data']
y = cancer['target']

In [8]:
# Separar en datos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [9]:
# Creando un escalador para normalizr los datos, ya que las redes neuronales son muy sensibles a las escalas
scaler = StandardScaler()

In [10]:
# Usando matriz de entrenamiento para escalar
scaler.fit(X_train)

In [11]:
# Aplicar la transformación (escalación) a mis matrices de entrenamiento y prueba
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
# Modelo vacio
mlp = MLPClassifier(hidden_layer_sizes = (30,30,30))

In [14]:
# Entrenar el modelo con los datos de entrenamiento
mlp.fit(X_train, y_train)

In [15]:
# Para ver como quedaron los parametros

mlp.get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (30, 30, 30),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [16]:
# Paso 4. Predicciones

predictions = mlp.predict(X_test)

In [17]:
# Paso 5. Evaluación

print(confusion_matrix(y_test, predictions))

[[52  1]
 [ 3 87]]


In [18]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.95      0.98      0.96        53
           1       0.99      0.97      0.98        90

    accuracy                           0.97       143
   macro avg       0.97      0.97      0.97       143
weighted avg       0.97      0.97      0.97       143



In [20]:
len(mlp.coefs_)

4

In [21]:
len(mlp.coefs_[0])

30

In [23]:
mlp.coefs_[0][25]

array([-0.02634381,  0.05517396,  0.25797666, -0.03366445,  0.36609903,
       -0.04926053, -0.0045399 , -0.29108736,  0.18973636, -0.09792686,
       -0.16237463,  0.23076933, -0.14956234, -0.12499331, -0.22927803,
       -0.02116007, -0.24913856, -0.25125275, -0.06664379,  0.29840983,
       -0.09377574,  0.22892971,  0.34508768, -0.08866781,  0.11232771,
       -0.14118469, -0.23735256,  0.0711737 ,  0.2913568 , -0.24223307])

In [24]:
len(mlp.intercepts_)

4

In [25]:
len(mlp.intercepts_[0])

30

In [27]:
mlp.intercepts_[0][25]

-0.009689792659113277