In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score

import pandas as pd
import numpy as np

In [4]:
cancer_data = load_breast_cancer()

In [5]:
df = pd.DataFrame(data=cancer_data.data, columns=cancer_data.feature_names)
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [6]:
cancer_data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [7]:
cancer_data.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [8]:
df['target'] = cancer_data.target

In [9]:
df.target.value_counts()

1    357
0    212
Name: target, dtype: int64

In [10]:
lr = LogisticRegression()

In [11]:
trainX,testX,trainY,testY = train_test_split(cancer_data.data, cancer_data.target)

In [None]:
lr.fit(trainX,trainY)

In [13]:
lr.coef_

array([[ 1.92815758e+00,  2.13074538e-01, -1.15102003e-01,
         1.52496777e-04, -8.12940852e-02, -3.65928734e-01,
        -5.05247622e-01, -2.06508014e-01, -1.19032970e-01,
        -2.52075001e-02,  7.93950122e-02,  1.13003402e+00,
         3.33962915e-01, -1.13994953e-01, -7.58094825e-03,
        -7.94438314e-02, -1.08238658e-01, -2.69434175e-02,
        -3.34150639e-02, -7.62561226e-03,  1.99897424e+00,
        -4.03294190e-01, -1.73663553e-01, -2.66187901e-02,
        -1.46595021e-01, -1.15420823e+00, -1.37408648e+00,
        -3.92722957e-01, -3.98028355e-01, -1.16589292e-01]])

In [25]:
lr.intercept_

array([0.23928864])

In [24]:
pd.DataFrame({'features':cancer_data.feature_names, 'weights': lr.coef_.flatten()})

Unnamed: 0,features,weights
0,mean radius,1.195204
1,mean texture,0.408101
2,mean perimeter,0.427945
3,mean area,-0.024553
4,mean smoothness,-0.041764
5,mean compactness,-0.202502
6,mean concavity,-0.285056
7,mean concave points,-0.115887
8,mean symmetry,-0.054523
9,mean fractal dimension,-0.010852


In [11]:
pred = lr.predict(testX)

In [28]:
pd.DataFrame({'target':testY,'pred':pred})

Unnamed: 0,target,pred
0,1,1
1,1,1
2,1,1
3,1,1
4,0,0
...,...,...
138,0,0
139,0,0
140,1,1
141,1,1


In [31]:
pred_sigmoid = lr.predict_proba(testX)[:,1]
pd.DataFrame({'target':testY,'pred':pred_sigmoid})

Unnamed: 0,target,pred
0,1,0.999821
1,1,0.962946
2,1,0.998955
3,1,0.996431
4,0,0.001079
...,...,...
138,0,0.008639
139,0,0.000016
140,1,0.999288
141,1,0.907092


In [32]:
accuracy_score(y_pred=pred,y_true=testY)

0.9440559440559441

In [33]:
confusion_matrix(y_pred=pred,y_true=testY)

array([[47,  3],
       [ 5, 88]])

In [15]:
import numpy as np
def sigmoid(x):
    return 1.0/(1+np.exp(-x))

In [16]:
sigmoid(0.5)

0.6224593312018546

In [14]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x))

In [15]:
softmax(np.array([-0.3,-0.4]))

array([0.52497919, 0.47502081])