# Introduction to Logistic Regression in python
See documentation at https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html. 

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.datasets import make_classification, load_iris

In [2]:
X,y = make_classification(n_samples = 1000, n_features = 10, n_informative= 6, n_redundant= 4, n_classes = 3, random_state= 11)
#X,y = load_iris(return_X_y=True)
print(X.shape)
print(y.shape)
print(Counter(y))

(1000, 10)
(1000,)
Counter({0: 336, 1: 333, 2: 331})


In [3]:
model = LogisticRegression(multi_class='multinomial', solver = 'lbfgs')

#solver{‘lbfgs’, ‘liblinear’, ‘newton-cg’, ‘newton-cholesky’, ‘sag’, ‘saga’}, default=’lbfgs’

In [4]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.8)

In [5]:
model.fit(Xtrain,ytrain)

In [6]:
yhat = model.predict(Xtest)

In [7]:
yhat[0]

1

In [8]:
p_hat = model.predict_proba(Xtest)

In [9]:
p_hat[0]

array([0.11044282, 0.81371348, 0.0758437 ])

In [10]:
score = model.score(Xtest, ytest)
print(score)

0.77


In [11]:
acc = accuracy_score(ytest, yhat)
print('Accuracy: ', acc) #as above

Accuracy:  0.77


In [12]:
print(classification_report(ytest, yhat))

              precision    recall  f1-score   support

           0       0.77      0.82      0.79        65
           1       0.79      0.72      0.76        75
           2       0.75      0.78      0.76        60

    accuracy                           0.77       200
   macro avg       0.77      0.77      0.77       200
weighted avg       0.77      0.77      0.77       200



In [13]:
print(confusion_matrix(ytest, yhat))

[[53  9  3]
 [ 8 54 13]
 [ 8  5 47]]
