In [58]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler


<h1>Carga de datos</h1>

In [59]:
data = load_breast_cancer()


In [60]:
column_names = data.feature_names
print(column_names)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [61]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df["Diagnosis"] = data.target

In [62]:
X = df.drop(columns="Diagnosis")
y = df["Diagnosis"]

In [63]:
dataframe_resultados=pd.DataFrame(columns=["Arbol de decision","MLP","Regresion logistica","SGD Clasificador","SVM","Bosque aleatorio","Kneightbours","Red neuronal"])

In [64]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [65]:
print(df.dtypes)

mean radius                float64
mean texture               float64
mean perimeter             float64
mean area                  float64
mean smoothness            float64
mean compactness           float64
mean concavity             float64
mean concave points        float64
mean symmetry              float64
mean fractal dimension     float64
radius error               float64
texture error              float64
perimeter error            float64
area error                 float64
smoothness error           float64
compactness error          float64
concavity error            float64
concave points error       float64
symmetry error             float64
fractal dimension error    float64
worst radius               float64
worst texture              float64
worst perimeter            float64
worst area                 float64
worst smoothness           float64
worst compactness          float64
worst concavity            float64
worst concave points       float64
worst symmetry      

In [66]:
element_counts = df.groupby('Diagnosis').size()
print(element_counts)

Diagnosis
0    212
1    357
dtype: int64


In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<h1>Hold out con Arbol de decision</h1>

In [68]:
model = DecisionTreeClassifier(random_state=42)


In [69]:
model.fit(X_train, y_train)

In [70]:
y_pred = model.predict(X_test)

In [71]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados.at[0,"Arbol de decision"]=float(accuracy)

Precisión del modelo: 0.95


<h1>Hold out con MLP</h1>

In [72]:
from sklearn.neural_network import MLPClassifier

In [73]:
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
model.fit(X_train, y_train)

In [74]:
y_pred = model.predict(X_test)

In [75]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["MLP"]=accuracy

Precisión del modelo: 0.97


<h1>Hold out con regresion logistica</h1>

In [76]:
from sklearn.linear_model import LogisticRegression


In [77]:
algoritmo = LogisticRegression()

In [78]:
algoritmo.fit(X_train, y_train)


In [79]:

y_pred = algoritmo.predict(X_test)

In [80]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["Regresion logistica"]=accuracy

Precisión del modelo: 0.97


<h1>Hold out con SGD Clasificador</h1>

In [81]:
from sklearn.linear_model import SGDClassifier

In [82]:
model = SGDClassifier(loss='log', max_iter=1000, random_state=42)

In [83]:
model.fit(X_train, y_train)



In [84]:
y_pred = model.predict(X_test)

In [85]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["SGD Clasificador"]=accuracy


Precisión del modelo: 0.94


<h1>Hold out con SVM</h1>

In [86]:
from sklearn.svm import SVC

In [87]:
svm = SVC(kernel='linear', random_state=42)

In [88]:

svm.fit(X_train, y_train)

In [89]:
y_pred = svm.predict(X_test)

In [90]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión en el conjunto de prueba: {accuracy:.2f}')
dataframe_resultados["SVM"]=accuracy

Precisión en el conjunto de prueba: 0.96


<h1>Hold out con bosque aleatorio</h1>

In [91]:
from sklearn.ensemble import RandomForestClassifier

In [92]:
model = RandomForestClassifier(n_estimators=100, random_state=42)


In [93]:
model.fit(X_train, y_train)

In [94]:
y_pred = model.predict(X_test)

In [95]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["Bosque aleatorio"]=accuracy

Precisión del modelo: 0.96


<h1>Hold out con Kneightbours</h1>

In [96]:
from sklearn.neighbors import KNeighborsClassifier

In [97]:
knn_classifier = KNeighborsClassifier(n_neighbors=10)

In [98]:
knn_classifier.fit(X_train, y_train)

In [99]:
y_pred = knn_classifier.predict(X_test)

In [100]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["Kneightbours"]=accuracy



Precisión del modelo: 0.96


<h1>Hold out con una red neuronal</h1>

In [101]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score

In [102]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [103]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [104]:
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2107a7da8d0>

In [105]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")



In [106]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy:.2f}")
dataframe_resultados["Red neuronal"]=accuracy

Precisión del modelo: 0.96


<h1>Resultado final</h1>

In [107]:
print(dataframe_resultados)

  Arbol de decision       MLP  Regresion logistica  SGD Clasificador      SVM   
0          0.947368  0.973684             0.973684          0.938596  0.95614  \

   Bosque aleatorio  Kneightbours  Red neuronal  
0          0.964912       0.95614      0.964912  


In [108]:
media = dataframe_resultados.iloc[0].mean()
print(media)


0.9594298245614035


In [109]:
dataframe_resultados['Arbol de decision'] = dataframe_resultados['Arbol de decision'].astype(float)

In [110]:
print(dataframe_resultados.dtypes)

Arbol de decision      float64
MLP                    float64
Regresion logistica    float64
SGD Clasificador       float64
SVM                    float64
Bosque aleatorio       float64
Kneightbours           float64
Red neuronal           float64
dtype: object


In [111]:
max_values = dataframe_resultados.max(axis=1)
max_columns = dataframe_resultados.idxmax(axis=1)
print(max_columns[0])

MLP
