In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
import warnings

warnings.simplefilter("ignore")

iris = load_iris()  # This is of type Bunch - a subclass of dict that supports attribute-style access (a la JavaScript)
print(iris.keys())

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])


**Last column of all arrays in first dimension - petal width. Note we use `iris.data[:, 3:]` instead of `iris.data[:, 3]` which is conceptually equivalent in order to get a shape of (150,1) as opposed to (150,) - estimators-predictors in Scikit-Learn work with bi-dimensional ndarrays for X (shape (n_samples, n_features))**

In [2]:
X = iris.data[:, 3:]

**Target is a ndarray of shape (150,) containing multiclass labels: 0, 1, 2 and 3. Transform to binary labels: True if Iris-Virginica, False otherwise**

In [3]:
y = (iris.target == 2)

log_reg = LogisticRegression()
log_reg.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

**Let's predict on a set of 20 instances with petal width a float between 0 and 3. The petal width of Iris-Virginica flowers ranges from 1.4 cm to 2.5 cm while the other iris flowers generally have a smaller petal width, ranging from 0.1 cm to 1.8 cm.**

In [5]:
X_new = np.random.uniform(0, 3, (20, 1))

print(f"Trained with a single feature. Logistic Regression Predicted Probabilities:{log_reg.predict_proba(X_new)}")
print(f"Trained with a single feature. Logistic Regression Predicted Binary Labels:{log_reg.predict(X_new)}")

Trained with a single feature. Logistic Regression Predicted Probabilities:[[0.94631257 0.05368743]
 [0.92256675 0.07743325]
 [0.84414501 0.15585499]
 [0.44196909 0.55803091]
 [0.25000296 0.74999704]
 [0.4075295  0.5924705 ]
 [0.11297979 0.88702021]
 [0.80045058 0.19954942]
 [0.9703262  0.0296738 ]
 [0.95531229 0.04468771]
 [0.92659159 0.07340841]
 [0.88395096 0.11604904]
 [0.69497341 0.30502659]
 [0.84266839 0.15733161]
 [0.94560846 0.05439154]
 [0.97088198 0.02911802]
 [0.07968944 0.92031056]
 [0.03067714 0.96932286]
 [0.07608923 0.92391077]
 [0.07001053 0.92998947]]
Trained with a single feature. Logistic Regression Predicted Binary Labels:[False False False  True  True  True  True False False False False False
 False False False False  True  True  True  True]


**Train with all the features (sepal length, sepal width, petal length, petal width)**

In [6]:
X = iris.data
log_reg = LogisticRegression()
log_reg.fit(X, y)

X_new = np.random.uniform(0, 5, (20, 4))

print(f"Trained with all features. Logistic Regression Predicted Binary Labels:{log_reg.predict(X_new)}")

Trained with all features. Logistic Regression Predicted Binary Labels:[False  True  True False  True False False  True  True  True  True False
 False False False  True  True  True  True  True]


**Softmax Regression (Multinomial Logistic Regression) (Generalize Logistic Regression to multiclass classification)**

In [8]:
X = iris.data
y = iris.target

softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=100)
softmax_reg.fit(X, y)

print(f"Softmax prediction for a flower with sepal and petal length = 5 and width = 2: {iris.target_names[softmax_reg.predict([[5, 2, 5, 2]])[0]]}")

Softmax prediction for a flower with sepal and petal length = 5 and width = 2: virginica
