In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier

## Загрузим готовый датасет Breast Cancer из библиотеки Sklearn и разделим данные на train/test

In [2]:
X, y = load_breast_cancer(return_X_y = True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, shuffle=True, random_state=0)

# Стандартизируем данные
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Логистическая регрессия при помощи класса SGDClassifier без регуляризции

In [12]:
log_regr = SGDClassifier(loss='log', penalty='none')
log_regr.fit(X_train, y_train)
y_train_pred = log_regr.predict(X_train)
y_test_pred = log_regr.predict(X_test)

print("Train accuracy: ", accuracy_score(y_train,y_train_pred))
print("Test accuracy: ", accuracy_score(y_test,y_test_pred))

print("Weights:\n", log_regr.coef_)

Train accuracy:  0.9855072463768116
Test accuracy:  0.9767441860465116
Weights:
 [[ 10.20712073   8.95340795  11.29235593   1.27580353 -14.83434428
   47.61296673 -19.76841317 -18.53293534  18.6009443    6.96687455
  -48.03775949  13.5234203  -24.32877841 -48.2574854  -20.26202862
   -7.06322515  22.98762279 -17.97388348   5.06754774  56.36120389
  -26.24376563 -44.43656363 -19.06519628 -33.66196518  -0.51913422
   -0.87187305 -40.00442683 -28.1889278  -21.63179411 -33.29478491]]


## Ridge (L2) Regression

In [9]:
l2_regr = SGDClassifier(loss='log', penalty='l2')
l2_regr.fit(X_train, y_train)
y_train_pred = l2_regr.predict(X_train)
y_test_pred = l2_regr.predict(X_test)

print("Train accuracy: ", accuracy_score(y_train,y_train_pred))
print("Test accuracy: ", accuracy_score(y_test,y_test_pred))

print("Weights:\n", l2_regr.coef_)

Train accuracy:  0.9710144927536232
Test accuracy:  0.9651162790697675
Weights:
 [[-0.10127783  1.41553919 -0.25770555 -0.8575228  -3.7076381   6.40737434
  -5.95500384 -2.68721969  0.56348117 -1.00740511 -6.47083069  3.04150005
   0.29469494 -7.95054702 -5.32356981 -6.23943089  6.5160247  -4.52789456
   1.17338667  6.7137584  -5.64797438 -8.29592967 -3.89301757 -6.21792999
   1.95827474  2.27485214 -4.16384192 -2.70012369 -5.22840436 -4.98825651]]


## Lasso (L1) Regression

In [10]:
l1_regr = SGDClassifier(loss='log', penalty='l1')
l1_regr.fit(X_train, y_train)
y_train_pred = l1_regr.predict(X_train)
y_test_pred = l1_regr.predict(X_test)

print("Train accuracy: ", accuracy_score(y_train,y_train_pred))
print("Test accuracy: ", accuracy_score(y_test,y_test_pred))

print("Weights:\n", l1_regr.coef_)

Train accuracy:  0.9772256728778468
Test accuracy:  0.9767441860465116
Weights:
 [[  3.66955265   8.57642098   5.64785026   0.67948295 -10.6129727
   31.82832578 -11.89978457 -22.67939807   7.41824761   1.01920679
  -39.02620545  12.04097489  -7.52987554 -31.37398718  -9.56137386
    0.          12.02618676 -12.72498356   4.81901328  20.30293437
  -17.26567868 -34.30721459  -8.76044688 -17.95370176   3.38342163
    1.06965294 -17.90649628 -17.53204094 -16.32889517 -23.2018923 ]]


## Ridge (L1+L2) Regression

In [11]:
elnet_regr = SGDClassifier(loss='log', penalty='elasticnet')
elnet_regr.fit(X_train, y_train)
y_train_pred = elnet_regr.predict(X_train)
y_test_pred = elnet_regr.predict(X_test)

print("Train accuracy: ", accuracy_score(y_train,y_train_pred))
print("Test accuracy: ", accuracy_score(y_test,y_test_pred))

print("Weights:\n", elnet_regr.coef_)

Train accuracy:  0.9834368530020704
Test accuracy:  0.9883720930232558
Weights:
 [[ 0.65932487  1.54335721  0.73675036  0.         -3.40895205  7.2407822
  -5.59826303 -2.06118327  0.65428027 -0.29701998 -7.82872853  3.76994261
   0.         -9.27892925 -2.56332695 -3.78734593  5.90569538 -4.11615785
   1.28678221  6.99551621 -5.28213541 -7.8081117  -2.98098677 -6.47469341
   1.12660205  3.75008475 -4.29632965 -3.66658408 -3.88359172 -5.46727081]]
