# Preparação dos dados

In [2]:
from sklearn.metrics import classification_report
from sklearn import datasets

cancer = datasets.load_breast_cancer()

In [3]:
print("Features: ", cancer.feature_names)
print("Labels: ", cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


In [4]:
cancer.data.shape

(569, 30)

In [5]:
cancer.target.shape

(569,)

In [6]:
print(cancer.data[:5])

[[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
  1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
  6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
  1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
  4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 1.326e+03 8.474e-02 7.864e-02 8.690e-02
  7.017e-02 1.812e-01 5.667e-02 5.435e-01 7.339e-01 3.398e+00 7.408e+01
  5.225e-03 1.308e-02 1.860e-02 1.340e-02 1.389e-02 3.532e-03 2.499e+01
  2.341e+01 1.588e+02 1.956e+03 1.238e-01 1.866e-01 2.416e-01 1.860e-01
  2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 1.203e+03 1.096e-01 1.599e-01 1.974e-01
  1.279e-01 2.069e-01 5.999e-02 7.456e-01 7.869e-01 4.585e+00 9.403e+01
  6.150e-03 4.006e-02 3.832e-02 2.058e-02 2.250e-02 4.571e-03 2.357e+01
  2.553e+01 1.525e+02 1.709e+03 1.444e-01 4.245e-01 4.504e-01 2.430e-01
  3.613e-01 8.758e-02]
 [1.142e+01 2.038e+01 7.758e+01 3.861e+02 1.425e-01 2.839e-01 2.414

In [7]:
print(cancer.target[:5])

[0 0 0 0 0]


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3, random_state=109)

# SVM

In [9]:
from sklearn import svm

def model_svm(x_train, y_train, x_test, y_test):
  C = 1.0

  models = (svm.LinearSVC(C=C, max_iter=10000),
            svm.SVC(kernel='linear', C=C),
            svm.SVC(kernel='rbf', gamma=0.7, C=C))
  
  for clf in models:
    clf.fit(x_train, y_train)
    prediction = clf.predict(x_test) 
    print(classification_report(y_test, prediction))

model_svm(X_train, y_train, X_test, y_test)



              precision    recall  f1-score   support

           0       0.95      0.94      0.94        63
           1       0.96      0.97      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171

              precision    recall  f1-score   support

           0       0.94      0.97      0.95        63
           1       0.98      0.96      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.63      1.00      0.77       108

    accuracy                           0.63       171
   macro avg       0.32      0.50      0.39       171
weighted avg       0.40      0.63      0.49       171



  _warn_prf(average, modifier, msg_start, len(result))


# KNN

In [8]:
from sklearn.neighbors import KNeighborsClassifier

def model_knn(x_train, y_train, x_test, y_test):

  knn = KNeighborsClassifier(algorithm='auto', n_neighbors=8, p=1, weights='distance')

  knn.fit(x_train, y_train) 
  y_predict = knn.predict(x_test)

  print(classification_report(y_test, y_predict))

model_knn(X_train, y_train, X_test, y_test)

              precision    recall  f1-score   support

           0       0.98      0.95      0.97        63
           1       0.97      0.99      0.98       108

    accuracy                           0.98       171
   macro avg       0.98      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171



 # Regressão Linear

In [21]:
from sklearn.linear_model import LinearRegression

def model_linear_regression(x_train, y_train, x_test, y_test):

  lr = LinearRegression().fit(x_train, y_train)

  prediction = lr.predict(x_test)
  prediction = prediction.astype(int)
  print(classification_report(y_test, prediction))

model_linear_regression(X_train, y_train, X_test, y_test)

              precision    recall  f1-score   support

           0       0.46      1.00      0.63        63
           1       1.00      0.32      0.49       108

    accuracy                           0.57       171
   macro avg       0.73      0.66      0.56       171
weighted avg       0.80      0.57      0.54       171



# Redes Neurais

In [49]:
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

def model_mlp(x_train, y_train, x_test, y_test):

  input_shape = (x_train.shape[1],)

  # Create the model
  model = Sequential()
  model.add(Dense(2048, input_shape=input_shape, activation='linear'))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(loss = 'binary_crossentropy',  optimizer = 'adam', metrics=['accuracy'])
  
  model.summary()

  model.fit(x_train, y_train,
            epochs = 20,
            batch_size = int(len(x_train) * 0.05),
            verbose = True)
  
  y_predict = model.predict(x_test)
  y_predict = y_predict.astype(int)

  print(classification_report(y_test, y_predict))

model_mlp(X_train, y_train, X_test, y_test)

Model: "sequential_32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_73 (Dense)             (None, 2048)              63488     
_________________________________________________________________
dense_74 (Dense)             (None, 1)                 2049      
Total params: 65,537
Trainable params: 65,537
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
              precision    recall  f1-score   support

           0       1.00      0.83      0.90        63
           1       0.91      1.00      0.95       108

    accuracy                           0.94       171
   macro avg       0.95      0.91      0.93       171
weighted

# Comparação dos resultados

|   Modelo  | LinearSVC | SVC - Linear | SVC - RBF |  KNN | Regressão Linear | Rede Neural |
|:---------:|:---------:|:------------:|:---------:|:----:|:----------------:|:-----------:|
| Precision |    0.96   |     0.96     |    0.32   | **0.98** |       0.73       |     0.95    |
|   Recall  |    0.95   |     **0.97**     |    0.50   | **0.97** |       0.66       |     0.91    |
|  F1 Score |    0.96   |     0.96     |    0.39   | **0.97** |       0.56       |     0.93    |