In [None]:
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
breast_cancer.keys()

In [None]:
X, y = breast_cancer.data, breast_cancer.target
X.shape, y.shape

In [None]:
import matplotlib.pyplot as plt

for i in range(3):
    plt.xlabel(f'{i}: {breast_cancer.feature_names[i]}')
    plt.ylabel('Classificação')
    plt.scatter(X[:,i], y)
    plt.show()


In [None]:
X27 = X[:,27:28]
plt.xlabel(f'27: {breast_cancer.feature_names[27]}')
plt.ylabel('Classificação')
plt.scatter(X27, y)
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Adaptação da regressão linear
class ClassificadorLinear():
    def __init__(self):
        self.lrmodel = LinearRegression()
    
    def fit(self, X, y):
        self.lrmodel.fit(X, y)
    def predict(self, X):
        yPred = self.lrmodel.predict(X)
        yPred = (yPred>0.5).astype(int) # ajuste para respostas discretas (binário)
        return yPred

plt.xlabel(f'27: {breast_cancer.feature_names[27]}')
plt.ylabel('Classificação')
plt.scatter(X27, y)

modelo = ClassificadorLinear()
modelo.fit(X27, y)
yPred = modelo.predict(X27)
erro = mean_squared_error(y, yPred)

plt.plot(X27, yPred, 'ro', label=f'erro: {erro}', alpha=0.5)
plt.legend()
plt.show()

In [None]:
# Proporção de acertos em relação aos erros
acuracia = sum(y==yPred)/len(y)
acuracia, 1-erro # a acurácia é o complemento do erro

In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y, yPred))

In [None]:
# Reclassificação multivariado
modelo = ClassificadorLinear()
modelo.fit(X, y)

In [None]:
# Análise da função de erro
import numpy as np

delta = [0.125, 0.1, 0.02, 0.005, 25, 25, 50, 50, 20, 50, 10, 4, 1, 0.05, 600,
200, 100, 200, 200, 1000, 0.1, 0.1, 0.02, 0.002, 25, 10, 10, 15, 10, 30, 1]

def plot(i):
    erros = []
    coef = modelo.lrmodel.coef_[i]
    ws = np.linspace(coef-delta[i], coef+delta[i], 101)
    for w in ws:
        modelo.lrmodel.coef_[i] = w
        yPred = modelo.predict(X)
        erro = mean_squared_error(y, yPred)
        erros.append(erro)  
    modelo.lrmodel.coef_[i] = coef
    plt.ylabel('Erro')
    plt.xlabel(f'W[{i}]')
    plt.plot(ws, erros)
    plt.show()

for i in range(len(modelo.lrmodel.coef_)):
    plot(i)

In [None]:
# Aproximação na característica 13
i = 13
erros = []
coef = modelo.lrmodel.coef_[i]
ws = np.linspace(coef-0.007, coef+0.002, 101)
for w in ws:
    modelo.lrmodel.coef_[i] = w
    yPred = modelo.predict(X)
    erro = mean_squared_error(y, yPred)
    erros.append(erro)  
modelo.lrmodel.coef_[i] = coef
plt.ylabel('Erro')
plt.xlabel(f'W[{i}]')
plt.plot(ws, erros)
plt.show()