# Logistic Regression by Means of Scikit

In [1]:
from sklearn import datasets
import numpy as np

iris = datasets.load_iris()
X = iris.data[:, [2,3]]
y = iris.target
print('Class labels:', np.unique(y))

Class labels: [0 1 2]


## Train-Test Split and Feature Standardization

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=1, stratify=y)

In [3]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

## Logistic Regression Implementation & C-regularization

In [4]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=100.0, random_state=1, solver='lbfgs',
                       multi_class='ovr')
lr.fit(X_train_std, y_train)

In [5]:
lr.predict_proba(X_test_std[:3, :])

array([[3.81527885e-09, 1.44792866e-01, 8.55207131e-01],
       [8.34020679e-01, 1.65979321e-01, 3.25737138e-13],
       [8.48831425e-01, 1.51168575e-01, 2.62277619e-14]])

In [8]:
lr.predict_proba(X_test_std[:3, :]).argmax(axis=1)

array([2, 0, 0])

In [10]:
lr.predict(X_test_std[:3, :])

array([2, 0, 0])

In [11]:
lr.predict(X_test_std[0, :].reshape(1,-1)) # Need two-dimensional array

array([2])

## Accuracy

In [12]:
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % lr.score(X_test_std, y_test))

Accuracy: 0.978
