In [12]:
import matplotlib.pyplot as plt
import math
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [42]:
#generate data
x = np.arange(10).reshape(-1, 1) #scikit learn requires 2d arrays so reshape() is used here for this purpose
y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [43]:
print(x)

[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]


In [44]:
#Shortcut using sklearn LogisticRegression()
#be careful about choice of solver as not all of them support L1/L2 regularization!
#Logistic regression in sklearn expects a 1d array instead of a 2d array so be sure to not reshape your data here! (dumb)
seed = 0
logistic_model = LogisticRegression(solver = 'liblinear', random_state = seed)

logistic_model.fit(x, y)


LogisticRegression(random_state=0, solver='liblinear')

In [45]:
print(logistic_model.classes_, logistic_model.coef_)

[0 1] [[0.51491375]]


In [46]:
#evaluate performance in sample
logistic_model.predict_proba(x)

array([[0.74002157, 0.25997843],
       [0.62975524, 0.37024476],
       [0.5040632 , 0.4959368 ],
       [0.37785549, 0.62214451],
       [0.26628093, 0.73371907],
       [0.17821501, 0.82178499],
       [0.11472079, 0.88527921],
       [0.07186982, 0.92813018],
       [0.04422513, 0.95577487],
       [0.02690569, 0.97309431]])

In [47]:
logistic_model.predict(x)

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])

In [48]:
#score just calculates # of correct predcitions as a percentage of the total
logistic_model.score(x, y)

0.9

In [49]:
confusion_matrix(y, logistic_model.predict(x))

array([[3, 1],
       [0, 6]], dtype=int64)

In [50]:
print(classification_report(y, logistic_model.predict(x)))

              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.86      1.00      0.92         6

    accuracy                           0.90        10
   macro avg       0.93      0.88      0.89        10
weighted avg       0.91      0.90      0.90        10



In [55]:
#improve model precision by increasing regularization strength 

seed = 0
logistic_model2 = LogisticRegression(solver = 'liblinear', random_state = seed, C = 10.0)

logistic_model2.fit(x, y)

#print(logistic_model.predict(x))
#print(logistic_model2.predict(x))

confusion_matrix(y, logistic_model2.predict(x)) #No misclassifications here!

array([[4, 0],
       [0, 6]], dtype=int64)

In [38]:
#example 2

# Step 1: Import packages, functions, and classes
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Step 2: Get data
x = np.arange(10).reshape(-1, 1)
y = np.array([0, 1, 0, 0, 1, 1, 1, 1, 1, 1])

# Step 3: Create a model and train it
model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
model.fit(x, y)

# Step 4: Evaluate the model
p_pred = model.predict_proba(x)
y_pred = model.predict(x)
score_ = model.score(x, y)
conf_m = confusion_matrix(y, y_pred)
report = classification_report(y, y_pred)

#predictions here will never be 100% accurate because data here are not linearly separable!

In [39]:
print(p_pred, y_pred, score_, conf_m, report)

[[0.81999686 0.18000314]
 [0.69272057 0.30727943]
 [0.52732579 0.47267421]
 [0.35570732 0.64429268]
 [0.21458576 0.78541424]
 [0.11910229 0.88089771]
 [0.06271329 0.93728671]
 [0.03205032 0.96794968]
 [0.0161218  0.9838782 ]
 [0.00804372 0.99195628]] [0 0 0 1 1 1 1 1 1 1] 0.8 [[2 1]
 [1 6]]               precision    recall  f1-score   support

           0       0.67      0.67      0.67         3
           1       0.86      0.86      0.86         7

    accuracy                           0.80        10
   macro avg       0.76      0.76      0.76        10
weighted avg       0.80      0.80      0.80        10

