# Introduction to Logistic Regression in python

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.datasets import make_classification, load_iris

In [2]:
X,y = make_classification(n_samples = 1000, n_features = 10, n_informative= 6, n_redundant= 4, n_classes = 3, random_state= 11)
#X,y = load_iris(return_X_y=True)
print(X.shape)
print(y.shape)
print(Counter(y))

(1000, 10)
(1000,)
Counter({0: 336, 1: 333, 2: 331})


In [4]:
model = LogisticRegression(multi_class='multinomial', solver = 'lbfgs')

#solver{‘lbfgs’, ‘liblinear’, ‘newton-cg’, ‘newton-cholesky’, ‘sag’, ‘saga’}, default=’lbfgs’

In [5]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.8)

In [6]:
model.fit(Xtrain,ytrain)

LogisticRegression(multi_class='multinomial')

In [7]:
yhat = model.predict(Xtest)

In [8]:
yhat[0]

0

In [9]:
p_hat = model.predict_proba(Xtest)

In [10]:
p_hat[0]

array([0.93431719, 0.05248916, 0.01319366])

In [11]:
acc = accuracy_score(ytest, yhat)
print('Accuracy: ', acc)

Accuracy:  0.755


In [12]:
print(classification_report(ytest, yhat))

              precision    recall  f1-score   support

           0       0.81      0.85      0.83        71
           1       0.64      0.70      0.67        56
           2       0.80      0.71      0.75        73

    accuracy                           0.76       200
   macro avg       0.75      0.75      0.75       200
weighted avg       0.76      0.76      0.76       200



In [13]:
print(confusion_matrix(ytest, yhat))

[[60  9  2]
 [ 6 39 11]
 [ 8 13 52]]
