### Logistic Regression

In [9]:
# import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn import preprocessing

np.random.seed(42)

#### Australian

In [2]:
names = pd.read_csv('australian.names')
data = pd.read_csv('australian.data', names=names.columns)
data

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,class
0,1,22.08,11.460,2,4,4,1.585,0,0,0,1,2,100,1213,0
1,0,22.67,7.000,2,8,4,0.165,0,0,0,0,2,160,1,0
2,0,29.58,1.750,1,4,4,1.250,0,0,0,1,2,280,1,0
3,0,21.67,11.500,1,5,3,0.000,1,1,11,1,2,0,1,1
4,1,20.17,8.170,2,6,4,1.960,1,1,14,0,2,60,159,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1,31.57,10.500,2,14,4,6.500,1,0,0,0,2,0,1,1
686,1,20.67,0.415,2,8,4,0.125,0,0,0,0,2,0,45,0
687,0,18.83,9.540,2,6,4,0.085,1,0,0,0,2,100,1,1
688,0,27.42,14.500,2,14,8,3.085,1,1,1,0,2,120,12,1


In [3]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [4]:
Y.value_counts()

0    383
1    307
Name: class, dtype: int64

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [6]:
log_reg_model = LogisticRegression(max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.855072463768116
F1 score:  0.8543701843698405
              precision    recall  f1-score   support

           0       0.87      0.90      0.88       126
           1       0.83      0.79      0.81        81

    accuracy                           0.86       207
   macro avg       0.85      0.84      0.85       207
weighted avg       0.85      0.86      0.85       207



#### German

In [7]:
names = pd.read_csv('german.names')
data = pd.read_csv('german.data', names=names.columns)
data

Unnamed: 0,Status of existing checking account,Duration in month,Credit history,Purpose,Credit amount,Savings account/bonds,Present employment since,Installment rate in percentage of disposable income,Personal status and sex,Other debtors / guarantors,...,Property,Age in years,Other installment plans,Housing,Number of existing credits at this bank,Job,Number of people being liable to provide maintenance for,Telephone,foreign worker,class
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,1
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,2
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,1
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,1
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,A14,12,A32,A42,1736,A61,A74,3,A92,A101,...,A121,31,A143,A152,1,A172,1,A191,A201,1
996,A11,30,A32,A41,3857,A61,A73,4,A91,A101,...,A122,40,A143,A152,1,A174,1,A192,A201,1
997,A14,12,A32,A43,804,A61,A75,4,A93,A101,...,A123,38,A143,A152,1,A173,1,A191,A201,1
998,A11,45,A32,A43,1845,A61,A73,4,A93,A101,...,A124,23,A143,A153,1,A173,1,A192,A201,2


In [8]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [10]:
Y.value_counts()

1    700
2    300
Name: class, dtype: int64

In [11]:
# label encode string catgorical data for input into classifier
label_encoder = preprocessing.LabelEncoder()
labels_to_encode = ['Status of existing checking account', 'Credit history', 'Purpose', 'Savings account/bonds', 'Present employment since', 'Personal status and sex', 'Other debtors / guarantors', 'Property', 'Other installment plans', 'Housing', 'Job', 'Telephone', 'foreign worker']
for i in labels_to_encode:
    X[i] = label_encoder.fit_transform(X[i])
X.head()

Unnamed: 0,Status of existing checking account,Duration in month,Credit history,Purpose,Credit amount,Savings account/bonds,Present employment since,Installment rate in percentage of disposable income,Personal status and sex,Other debtors / guarantors,Present residence since,Property,Age in years,Other installment plans,Housing,Number of existing credits at this bank,Job,Number of people being liable to provide maintenance for,Telephone,foreign worker
0,0,6,4,4,1169,4,4,4,2,0,4,0,67,2,1,2,2,1,1,0
1,1,48,2,4,5951,0,2,2,1,0,2,0,22,2,1,1,2,1,0,0
2,3,12,4,7,2096,0,3,2,2,0,3,0,49,2,1,1,1,2,0,0
3,0,42,2,3,7882,0,3,2,2,2,4,1,45,2,2,1,2,2,0,0
4,0,24,3,0,4870,0,2,3,2,0,4,3,53,2,2,2,2,2,0,0


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [13]:
log_reg_model = LogisticRegression(max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.7566666666666667
F1 score:  0.7386227969662247
              precision    recall  f1-score   support

           1       0.78      0.90      0.84       209
           2       0.66      0.42      0.51        91

    accuracy                           0.76       300
   macro avg       0.72      0.66      0.67       300
weighted avg       0.74      0.76      0.74       300



#### Iris

In [14]:
names = pd.read_csv('iris.names')
data = pd.read_csv('iris.data', names=names.columns)
data

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [15]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [16]:
Y.value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: class, dtype: int64

In [17]:
# encode categorical class labels to int
Y = data['class']
label_encoder = preprocessing.LabelEncoder()
Y = label_encoder.fit_transform(Y.values.reshape(-1,1))
Y

  y = column_or_1d(y, warn=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [19]:
log_reg_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  1.0
F1 score:  1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



#### Messidor

In [20]:
names = pd.read_csv('messidor_features.names')
data = pd.read_csv('messidor_features.data', names=names.columns)
data

Unnamed: 0,quality assessment,pre-screening,MA detection 1,MA detection 2,MA detection 3,MA detection 4,MA detection 5,MA detection 6,exudates info 1,exudates info 2,exudates info 3,exudates info 4,exudates info 5,exudates info 6,exudates info 7,exudates info 8,distance,diameter,result of the AM/FM-based classification,class
0,1,1,22,22,22,19,18,14,49.895756,17.775994,5.270920,0.771761,0.018632,0.006864,0.003923,0.003923,0.486903,0.100025,1,0
1,1,1,24,24,22,18,16,13,57.709936,23.799994,3.325423,0.234185,0.003903,0.003903,0.003903,0.003903,0.520908,0.144414,0,0
2,1,1,62,60,59,54,47,33,55.831441,27.993933,12.687485,4.852282,1.393889,0.373252,0.041817,0.007744,0.530904,0.128548,0,1
3,1,1,55,53,53,50,43,31,40.467228,18.445954,9.118901,3.079428,0.840261,0.272434,0.007653,0.001531,0.483284,0.114790,0,0
4,1,1,44,44,44,41,39,27,18.026254,8.570709,0.410381,0.000000,0.000000,0.000000,0.000000,0.000000,0.475935,0.123572,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1146,1,1,34,34,34,33,31,24,6.071765,0.937472,0.031145,0.003115,0.000000,0.000000,0.000000,0.000000,0.537470,0.116795,0,0
1147,1,1,49,49,49,49,45,37,63.197145,27.377668,8.067688,0.979548,0.001552,0.000000,0.000000,0.000000,0.516733,0.124190,0,0
1148,1,0,49,48,48,45,43,33,30.461898,13.966980,1.763305,0.137858,0.011221,0.000000,0.000000,0.000000,0.560632,0.129843,0,0
1149,1,1,39,36,29,23,13,7,40.525739,12.604947,4.740919,1.077570,0.563518,0.326860,0.239568,0.174584,0.485972,0.106690,1,1


In [21]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [22]:
Y.value_counts()

1    611
0    540
Name: class, dtype: int64

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [24]:
log_reg_model = LogisticRegression(max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.7514450867052023
F1 score:  0.7515862733373199
              precision    recall  f1-score   support

           0       0.68      0.83      0.75       155
           1       0.83      0.69      0.75       191

    accuracy                           0.75       346
   macro avg       0.76      0.76      0.75       346
weighted avg       0.77      0.75      0.75       346



#### Monks

In [25]:
names = pd.read_csv('monks.names')
data = pd.read_csv('monks.data', names=names.columns)
data

Unnamed: 0,a1,a2,a3,a4,a5,a6,class
0,1,1,1,1,1,1,1
1,1,1,1,1,1,2,1
2,1,1,1,1,2,1,1
3,1,1,1,1,2,2,1
4,1,1,1,1,3,1,1
...,...,...,...,...,...,...,...
427,3,3,2,3,2,2,1
428,3,3,2,3,3,1,1
429,3,3,2,3,3,2,1
430,3,3,2,3,4,1,1


In [26]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [27]:
Y.value_counts()

1    216
0    216
Name: class, dtype: int64

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [29]:
log_reg_model = LogisticRegression(max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.7153846153846154
F1 score:  0.7155025103897819
              precision    recall  f1-score   support

           0       0.68      0.75      0.71        61
           1       0.76      0.68      0.72        69

    accuracy                           0.72       130
   macro avg       0.72      0.72      0.72       130
weighted avg       0.72      0.72      0.72       130



#### Seeds

In [30]:
names = pd.read_csv('seeds.names')
data = pd.read_csv('seeds.data', names=names.columns)
data

Unnamed: 0,area,perimeter,compactness,length of kernel,width of kernel,asymmetry coefficient,length of kernel groove,class
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3


In [31]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [32]:
Y.value_counts()

1    70
2    70
3    70
Name: class, dtype: int64

In [33]:
Y = data['class']
label_encoder = preprocessing.LabelEncoder()
Y = label_encoder.fit_transform(Y.values.reshape(-1,1))
Y

  y = column_or_1d(y, warn=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [35]:
log_reg_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.9206349206349206
F1 score:  0.920804108502942
              precision    recall  f1-score   support

           0       0.89      0.85      0.87        20
           1       1.00      0.95      0.98        21
           2       0.88      0.95      0.91        22

    accuracy                           0.92        63
   macro avg       0.92      0.92      0.92        63
weighted avg       0.92      0.92      0.92        63



#### Tic-Tac-Toe

In [36]:
names = pd.read_csv('tic-tac-toe.names')
data = pd.read_csv('tic-tac-toe.data', names=names.columns)
data

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,class
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


In [37]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [38]:
Y.value_counts()

positive    626
negative    332
Name: class, dtype: int64

In [39]:
# encode string categorical variables to int for input into classifier
label_encoder = preprocessing.LabelEncoder()
for i in X.columns:
    X[i] = label_encoder.fit_transform(X[i])
X.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square
0,2,2,2,2,1,1,2,1,1
1,2,2,2,2,1,1,1,2,1
2,2,2,2,2,1,1,1,1,2
3,2,2,2,2,1,1,1,0,0
4,2,2,2,2,1,1,0,1,0


In [40]:
# encode string labels to int
label_encoder = preprocessing.LabelEncoder()
Y = label_encoder.fit_transform(Y.values.reshape(-1,1))
Y

  y = column_or_1d(y, warn=True)


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [42]:
log_reg_model = LogisticRegression(max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.6909722222222222
F1 score:  0.63899330304239
              precision    recall  f1-score   support

           0       0.59      0.21      0.31        95
           1       0.70      0.93      0.80       193

    accuracy                           0.69       288
   macro avg       0.65      0.57      0.56       288
weighted avg       0.67      0.69      0.64       288



#### Zoo

In [43]:
names = pd.read_csv('zoo.names')
data = pd.read_csv('zoo.data', names=names.columns)
data

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,enomous,fins,legs,tail,domestic,catsize,class
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1,1
97,1,0,1,0,1,0,0,0,0,1,1,0,6,0,0,0,6
98,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
99,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,7


In [44]:
X = data.copy().drop(columns=['class'])
Y = data['class']

In [45]:
Y.value_counts()

1    41
2    20
4    13
7    10
6     8
3     5
5     4
Name: class, dtype: int64

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [47]:
log_reg_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=2500, random_state=42)
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)
y_true = y_test
print("Accuracy: ", accuracy_score(y_true, y_pred))
print('F1 score: ', f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred))

Accuracy:  0.9354838709677419
F1 score:  0.9208211143695015
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00         3
           3       0.00      0.00      0.00         1
           4       0.67      1.00      0.80         2
           5       1.00      1.00      1.00         2
           6       0.83      1.00      0.91         5
           7       1.00      0.67      0.80         3

    accuracy                           0.94        31
   macro avg       0.79      0.81      0.79        31
weighted avg       0.92      0.94      0.92        31



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
