### Logistic Regression using sklearn

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

%matplotlib notebook

In [2]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-(z)))

In [3]:
z = np.arange(-7, 7, 0.1)
phi_z = sigmoid(z)

_ = plt.plot(z, phi_z, color='blue')
_ = plt.axvline(0.0, color='k')
_ = plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1, ls='dotted')
_ = plt.axhline(y=0.5, ls='dotted', color='k')
_ = plt.title('S shaped Sigmoid function', fontsize=10)

<IPython.core.display.Javascript object>

In [4]:
iris = load_iris()
X = iris.data[:,[2,3]]
y = iris.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)

In [6]:
sc = StandardScaler()
sc.fit(X_train)

X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [30]:
lr = LogisticRegression(C=1000, random_state=0)

In [31]:
lr.fit(X_train_std, y_train)

LogisticRegression(C=1000, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [32]:
X_combined_std = np.vstack((X_train_std, X_test_std))

In [33]:
y_combined_std = np.hstack((y_train, y_test))

In [34]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

def plot_decision_region(X, y, classifier, test_idx=None, resolution=0.2):
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    markers = ('s', 'x', 'o', '^', 'v')
    
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    """
    Plot the decision surface region
    """
    
    x1_min, x1_max = X[:,0].min(), X[:,0].max()
    x2_min, x2_max = X[:,1].min(), X[:,1].max()
    
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    
    Z = classifier.predict(np.array([np.ravel(xx1), np.ravel(xx2)]).T)
    Z = Z.reshape(xx1.shape)
    _ = plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    _ = plt.xlim(xx1.min(), xx1.max())
    _ = plt.ylim(xx2.min(), xx2.max())
    
    """
    Plot sample points
    """
    X_test, y_test = X[test_idx, 0], y[test_idx]
    for idx, c1 in enumerate(np.unique(y)):
        _ = plt.scatter(x=X[y==c1, 0], y=X[y==c1, 1], marker=markers[idx], c=cmap(idx), label=c1)
    
    """
    Highlight test points
    """
    if test_idx:
        X_test, y_test = X[test_idx, :], y[test_idx]
        plt.scatter(X_test[:, 0], X_test[:, 1], c='gray', alpha=0.8, linewidth=1, marker='o', s=55, label='test set')

In [35]:
plot_decision_region(X=X_combined_std, y=y_combined_std, test_idx=range(105, 150), classifier=lr, resolution=0.02)
_ = plt.xlabel('Petal Length')
_ = plt.ylabel('Petal Width')
_ = plt.title('Logistic Regression training')
_ = plt.legend()

<IPython.core.display.Javascript object>

In [36]:
from sklearn.metrics import accuracy_score

In [14]:
y_pred = lr.predict(X_test_std)

In [15]:
print("Accuracy: %.3f" %(accuracy_score(y_pred, y_test)*100))

Accuracy: 97.778


#### Comparing Regularization Parameters

In [16]:
weights, params = [], []
for c in range(-5,5):
    lr = LogisticRegression(C=10**c, random_state=0)
    lr.fit(X_train_std, y_train)
    weights.append(lr.coef_[1])
    params.append(10**c)
    
weights = np.array(weights)
_ = plt.plot(params, weights[:,0], label='Petal Length')
_ = plt.plot(params, weights[:,1], label='Petal Width', linestyle='--')
_ = plt.legend()
_ = plt.xlabel('C')
_ = plt.ylabel('Weight coefficient')
_ = plt.xscale('log')
_ = plt.title('Decreasing C - Increasing Regularization Strength', fontsize=10)

<IPython.core.display.Javascript object>

In [17]:
weights

array([[  6.55979672e-05,   3.27209902e-05],
       [  6.53690140e-04,   3.24948619e-04],
       [  6.31936684e-03,   3.03439293e-03],
       [  4.88199280e-02,   1.62092216e-02],
       [  2.38824476e-01,  -6.64831864e-02],
       [  1.06011115e+00,  -8.41544819e-01],
       [  2.19190886e+00,  -1.98265142e+00],
       [  2.50582212e+00,  -2.30336729e+00],
       [  2.54373335e+00,  -2.34219790e+00],
       [  2.54760621e+00,  -2.34616582e+00]])