In [1]:
# Import necessary modules to access files in another folder in the project
import sys
import os
import mynbconfig
mynbconfig.add_syspath()    # Add project path to python system path to be able to access scripts from other folder

import numpy as np
np.random.seed(2042)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [2]:
from just4funml.algorithms.supervised import LogisticRegression
import just4funml.utils.preprocessing as preprocessing

In [3]:
from sklearn import datasets
iris = datasets.load_iris()

In [4]:
X = iris['data'][:, (2, 3)]
y = iris['target'] == 0

In [5]:
X_train, y_train, X_valid, y_valid, X_test, y_test = preprocessing.train_validation_test_split(X, y, validation_ratio=0.2, test_ratio=0.2)

In [31]:
from sklearn.linear_model import LogisticRegression

logistic_reg = LogisticRegression()
logistic_reg.fit(X_train, y_train)

LogisticRegression()

In [32]:
logistic_reg.coef_

array([[-2.25836296, -0.93073208]])

In [33]:
logistic_reg.intercept_

array([6.71121169])

In [34]:
logistic_reg.predict(X_valid)

array([ True, False, False, False, False, False, False,  True, False,
       False, False,  True,  True,  True, False,  True, False, False,
        True, False,  True, False,  True,  True,  True, False, False,
        True, False, False])

In [36]:
logistic_reg.predict_proba(X_valid)

array([[3.34604805e-02, 9.66539520e-01],
       [9.99658981e-01, 3.41018946e-04],
       [9.99842167e-01, 1.57833262e-04],
       [9.77202169e-01, 2.27978315e-02],
       [9.94546987e-01, 5.45301258e-03],
       [9.96830051e-01, 3.16994861e-03],
       [9.90635060e-01, 9.36493964e-03],
       [5.15789679e-02, 9.48421032e-01],
       [9.99233539e-01, 7.66461397e-04],
       [9.99854209e-01, 1.45790616e-04],
       [8.93194100e-01, 1.06805900e-01],
       [7.58790855e-02, 9.24120914e-01],
       [5.15789679e-02, 9.48421032e-01],
       [2.68782073e-02, 9.73121793e-01],
       [9.95029205e-01, 4.97079487e-03],
       [3.66048144e-02, 9.63395186e-01],
       [9.32695185e-01, 6.73048153e-02],
       [9.97600481e-01, 2.39951908e-03],
       [5.15789679e-02, 9.48421032e-01],
       [9.67686678e-01, 3.23133223e-02],
       [6.14831230e-02, 9.38516877e-01],
       [9.93174785e-01, 6.82521499e-03],
       [4.54577914e-02, 9.54542209e-01],
       [4.15858177e-02, 9.58414182e-01],
       [5.157896

In [39]:
y_train.astype(np.float64)

array([0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0.,
       0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
       0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
       0., 1., 0., 0., 1.])

In [6]:
my_logistic_reg = LogisticRegression(max_iter=1000)
my_logistic_reg.fit(X_train, y_train)

In [7]:
my_logistic_reg.theta

array([[ 4.58455379],
       [-1.43957649],
       [-1.61915358]])

In [8]:
my_logistic_reg.predict(X_train)

array([False,  True, False, False,  True,  True,  True, False, False,
       False, False, False, False,  True, False, False,  True, False,
        True,  True, False,  True, False, False,  True, False, False,
        True,  True,  True, False,  True, False, False,  True, False,
       False,  True, False, False, False, False, False, False,  True,
       False, False, False, False, False,  True, False, False, False,
       False,  True,  True, False, False,  True, False, False, False,
        True, False, False,  True, False, False, False, False,  True,
       False, False, False,  True, False, False, False, False, False,
       False, False,  True,  True, False, False, False, False, False])

In [11]:
np.mean(my_logistic_reg.predict(X_train) == y_train)

1.0

In [12]:
np.mean(my_logistic_reg.predict(X_valid) == y_valid)

1.0

In [13]:
np.mean(my_logistic_reg.predict(X_test) == y_test)

1.0

In [14]:
my_logistic_reg.predict(X_test)

array([ True, False, False,  True,  True, False,  True, False, False,
       False,  True, False,  True, False,  True,  True, False, False,
       False,  True, False,  True, False, False, False,  True, False,
       False,  True,  True])

In [15]:
y_test

array([ True, False, False,  True,  True, False,  True, False, False,
       False,  True, False,  True, False,  True,  True, False, False,
       False,  True, False,  True, False, False, False,  True, False,
       False,  True,  True])