# Logistic Regression
<img src='images/LogisticRegression.jpg' width=400><br>
Example of 2D logistic Regression.<br>
Unlike linear regression, we find a line that `separates` the two classes <br>
This we learn values of `a, b, c` such that there's a line $ax_1 + bx_2 + c = 0$<br>
Points that have $x_1, x_2$ such that $ax_1 + bx_2 + c < 0$ will be classified as `class 0` and rest as `class 1` 

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd
np.set_printoptions(precision=3)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

Data description : [iris](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html#sklearn.datasets.load_iris)

In [2]:
X, y = datasets.load_iris(return_X_y=True)
# a hack that works only here, done so that we have only two classes
X, y = X[:99], y[:99]

print(f"One example x={X[0]}, y={y[0]}")
print("Five examples")
print("X:", X[47:52])
print("y:", y[47:52])

# normalise data
X = (X - X.mean())/X.std()

One example x=[5.1 3.5 1.4 0.2], y=0
Five examples
X: [[4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]]
y: [0 0 0 1 1]


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

print('coefficients', model.coef_)
print('intercepts', model.intercept_)

coefficients [[ 0.702 -1.014  2.947  1.193]]
intercepts [1.098]


# Example predictions

In [4]:
X_example = X_train[10:20]
y_example = y_train[10:20]

# manually predict first example
w0, w1, w2, w3, w4 = model.intercept_[0], *model.coef_[0]
x1, x2, x3, x4 = X_example[0]
output = w0 + w1*x1 + w2*x2 + w3*x3 + w4*x4
if output <= 0:
    print("Predicted class:", 0)
else:
    print("Predicted class:", 1)

# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))
# predicted = model.intercept_[0]
# for i, x_i in enumerate(X_example[0]):
#     predicted += model.coef_[0][i] * x_i
# predicted = int(round(sigmoid(predicted)))
# print("Predicted class:", predicted)

print("Actual Class:", y_example[0])

Predicted class: 0
Actual Class: 0


In [5]:
y_example_pred = model.predict(X_example)

pd.DataFrame({
    'actual': y_example,
    'predicted':  y_example_pred,
    'is_correct':  y_example_pred == y_example,
})

Unnamed: 0,actual,predicted,is_correct
0,0,0,True
1,1,1,True
2,0,0,True
3,0,0,True
4,1,1,True
5,0,0,True
6,0,0,True
7,1,1,True
8,0,0,True
9,0,0,True


# Mean Squared Error

In [6]:
acc = model.score(X_train, y_train)
print("Train Accuracy", acc * 100)

acc = model.score(X_test, y_test)
print("Test Accuracy", acc * 100)

Train Accuracy 100.0
Test Accuracy 100.0
