In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn import svm

In [2]:
headers = [
    "age","sex","cp","trestbps","chol","fbs","restecg",
    "thalach","exang","oldpeak","slope","ca","thal",
    "num - the predicted attribute"
]

data = pd.read_csv("processed.cleveland.csv", header=None, names = headers)
# Eliminamos las filas que contienen datos malos o desconocidos
data = data[ (data["ca"] != '?') & (data["thal"] != '?')]
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num - the predicted attribute
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,57.0,0.0,4.0,140.0,241.0,0.0,0.0,123.0,1.0,0.2,2.0,0.0,7.0,1
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3


In [3]:
# Separar los inputs del output
X = data.iloc[0:,:13]

# Como son valores del 0 al 4 convertimos la data para tener mejores resultados
d = data.iloc[0:,13]

y = []
for i in d.values:
    aux = [0 for _ in range(5)]
    aux[i] = 1
    y.append(aux)
y = np.array(y)

# Obetener datos de entrenamiento y datos de prueba
X_train, X_test, y_train, y_test = train_test_split(X.values, y, random_state=1, test_size=0.25)

## Clasificacion sin preprocesar la data

In [4]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.38666666666666666

In [5]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.3466666666666667

In [6]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.36

In [7]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.5466666666666666

In [8]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.09,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.5466666666666666

In [9]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='relu',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.5466666666666666

In [10]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='adam',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.49333333333333335

In [11]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='relu',
    solver='adam',
    learning_rate_init=0.001,
    momentum = 0.9,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.5066666666666667

In [12]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='relu',
    solver='adam',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(X.values, y)

clf.score(X_test,y_test)

0.52

### Clasificacion con procesamiento escalando los datos entre 0 y 1 dividiendo entre el maximo de cada campo.

In [13]:
max_scaler = preprocessing.MaxAbsScaler()
x_scaled = max_scaler.fit_transform(X.values)
Xmax = pd.DataFrame(x_scaled)

# Obetener datos de entrenamiento y datos de prueba
X_train, X_test, y_train, y_test = train_test_split(Xmax, y, random_state=1, test_size=0.25)

In [18]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [19]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [20]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.9,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [21]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [22]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.0001,
    momentum = 0.1,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)



0.5466666666666666

In [23]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='tanh',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.4266666666666667

In [24]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='adam',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)

0.5333333333333333

In [25]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='tanh',
    solver='adam',
    learning_rate_init=0.001,
    momentum = 0.1,
    max_iter=3000).fit(Xmax, y)

clf.score(X_test,y_test)



0.6

### Clasificacion con procesamiento escalando los datos entre 0 y 1 usando la tecnica del MinMax de cada campo.

In [26]:
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(X.values)
Xminmax = pd.DataFrame(x_scaled)

# Obetener datos de entrenamiento y datos de prueba
X_train, X_test, y_train, y_test = train_test_split(Xminmax, y, random_state=1, test_size=0.25)

In [27]:
clf = MLPClassifier(
    hidden_layer_sizes=(20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)

0.5066666666666667

In [28]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [29]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.001,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [30]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.1,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)

0.5466666666666666

In [31]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='tanh',
    solver='sgd',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)



0.9333333333333333

In [32]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='tanh',
    solver='adam',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)

0.9866666666666667

In [33]:
clf = MLPClassifier(
    hidden_layer_sizes=(20,20),
    activation='logistic',
    solver='adam',
    learning_rate_init=0.01,
    momentum = 0.9,
    max_iter=3000).fit(Xminmax, y)

clf.score(X_test,y_test)



1.0