# Predict breast cancer

In [5]:
import numpy as np
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt

## Classe de regressão logística
*Faz o trabalho pesado em regressão logística.*

In [6]:
class LogisticRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        
        return np.array(y_predicted_cls)
    
    def plot(self, X, y, legend):
        # only plots if X refers to exactly 2 variables
        if X.shape[1] != 2:
            raise ValueError("Can plot only for X's that refers to exactly 2 vars.")
        
        slope = -(self.weights[0]/self.weights[1])
        intercept = -(self.bias/self.weights[1])
        predictions = self.predict(X)

        sns.set_style('white')
        sns.scatterplot(x = X[:,0], y= X[:,1], hue=y.reshape(-1), style=predictions.reshape(-1));

        ax = plt.gca()
        ax.autoscale(False)
        x_vals = np.array(ax.get_xlim())
        y_vals = intercept + (slope * x_vals)
        plt.plot(x_vals, y_vals, c="k");
        
        plt.xlabel(legend[0])
        plt.ylabel(legend[1])

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

## Importando a base
*Importa a base, seleciona as variáveis, limpa, normaliza.*


In [18]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

df = datasets.load_breast_cancer()
X, y = bc.data, bc.target

# ======================================================== #
# =================== Seleção de Dados =================== #
# ======================================================== #
dados = {
    'X' : ['Temperature (K)', 'Radius(R/Ro)'],
    'y' : 'Spectral Class',
    'normalizada' : False
}
# ======================================================== #


df = df[ dados['X']+[dados['y']] ]
df = df.dropna()

if not dados['normalizada']:
    for col in dados['X']:
        df[[col]] = df[[col]]/df[[col]].mean()

X = df[ dados['X'] ].to_numpy()
y = df[[ dados['y'] ]].to_numpy()
y = np.hstack((y)).T

df.sample(5)

## Aplicando a regressão logística
*Aplica a função e avalia a precisão da previsão.*

In [19]:
regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X, y)
predictions = regressor.predict(X)

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

print(f"A precisão do modelo é: {accuracy(y, predictions)}")

A precisão do modelo é: 0.9086115992970123


# Plotando os resultados visualmente
*~ se você estiver analisando exatamente 2 vars numéricas ~*

In [20]:
try:
    regressor.plot(X, y, dados['X'])
except:
    print("Sem visualização disponível.")

Sem visualização disponível.


## Mostrando os pesos
*Cria uma tabela mostrando os pesos de avaliação das variáveis numéricas.*

In [23]:
norma_pesos = pd.DataFrame(regressor.weights)/pd.DataFrame(regressor.weights).abs().sum()
norma_pesos = norma_pesos[0].values.tolist()

dfpesos = pd.DataFrame({'Pesos':norma_pesos}, index = df.feature_names)

dfpesos

Unnamed: 0,Pesos
mean radius,0.038284
mean texture,0.054792
mean perimeter,0.223411
mean area,0.101491
mean smoothness,0.000358
mean compactness,-0.000134
mean concavity,-0.000659
mean concave points,-0.000282
mean symmetry,0.000681
mean fractal dimension,0.000283
