In [1]:
import numpy as np
from dojo.linear import (
    LinearRegression,
    LogisticRegression,
)

from dojo.split import cross_validate

***

# Linear Regression

In [2]:
linear_reg = LinearRegression(verbose=True)

In [3]:
linear_reg

LinearRegression(
    intercept=0,
    coefs=[],
    verbose=True,
)

### Data

In [4]:
X = np.random.randn(100_000, 100)
y = X @ np.random.rand(100)

### Fitting the model

In [5]:
%time linear_reg.fit(X, y)

-----------------------------------------
Fitting...
The model has been fitted successfully!
-----------------------------------------
CPU times: user 387 ms, sys: 98.1 ms, total: 485 ms
Wall time: 273 ms


LinearRegression(
    intercept=8.124937907801749e-10,
    coefs=[0.48811474 0.5585973  0.61258    0.6961532  0.20657249 0.81228274
 0.7664448  0.14069556 0.94138753 0.998522   0.0777411  0.52178556
 0.7376799  0.6412946  0.4367812  0.04775922 0.15701298 0.49065667
 0.4086668  0.00886894 0.8917983  0.07885423 0.79797304 0.5774516
 0.5180287  0.41912508 0.4140055  0.10689278 0.4020511  0.8432907
 0.9906553  0.2415499  0.34419456 0.67658323 0.6455074  0.00170051
 0.6354273  0.15019806 0.3696529  0.11998124 0.5615434  0.7987413
 0.5866404  0.09025629 0.1963129  0.988427   0.46538684 0.34982556
 0.05871565 0.59167993 0.6370515  0.16695961 0.9016945  0.97035044
 0.11353493 0.9726331  0.9488629  0.64547884 0.22426493 0.04358616
 0.72221667 0.99278724 0.74433357 0.5255904  0.2605811  0.33193058
 0.08361445 0.89047194 0.67052585 0.55504876 0.79679656 0.12961787
 0.34876794 0.9029016  0.21416764 0.289517   0.2566668  0.6209782
 0.28463542 0.870004   0.4237214  0.18922856 0.602431   0.50586885
 

### Evaluating the model

In [6]:
linear_reg.evaluate(X, y)

Mean Squared Error: 2.517990841612872e-13


In [7]:
linear_reg.verbose = False
cross_validate(linear_reg, X, y, cv=10)

Mean Squared Error: 2.725433829985635e-13
Mean Squared Error: 2.7158439941388226e-13
Mean Squared Error: 2.719855014730826e-13
Mean Squared Error: 2.7527333745780425e-13
Mean Squared Error: 2.721956179826678e-13
Mean Squared Error: 2.7392781376534004e-13
Mean Squared Error: 2.7283308527516665e-13
Mean Squared Error: 2.6801445507373956e-13
Mean Squared Error: 2.7247278116745535e-13
Mean Squared Error: 2.7221997989101966e-13
Mean Squared Error: 2.721341594481009e-13
Mean Squared Error: 2.7069275709834526e-13
Mean Squared Error: 2.7235517999393347e-13
Mean Squared Error: 2.7463029628194137e-13
Mean Squared Error: 2.732704230440912e-13
Mean Squared Error: 2.6570874389619803e-13
Mean Squared Error: 2.71863120273295e-13
Mean Squared Error: 2.7792956824868045e-13
Mean Squared Error: 2.7198657944895415e-13
Mean Squared Error: 2.7526363410856904e-13


{'train_scores': array([None, None, None, None, None, None, None, None, None, None],
       dtype=object),
 'test_scores': array([None, None, None, None, None, None, None, None, None, None],
       dtype=object)}

***

# Logistic Regression

In [8]:
log_reg = LogisticRegression(verbose=True)

In [9]:
log_reg

LogisticRegression(
    intercept=0,
    coefs=[],
    C=1.0,
    lr=0.01,
    verbose=True,
)

### Data

In [10]:
X = np.array(
    [[1, 1],
    [1, 0],
    [0, 1],
    [0, 0]]
)

y = np.array([1 if x[0] and x[1] else 0 for x in X])

### Fitting the model

In [11]:
%time log_reg.fit(X, y)

--------------------------
10th iteration
Loss: 2.413937281697809e-05
--------------------------
20th iteration
Loss: 2.2960647674596935e-05
--------------------------
30th iteration
Loss: 2.1839480813912182e-05
--------------------------
40th iteration
Loss: 2.0773058258316546e-05
--------------------------
50th iteration
Loss: 1.9758713369344427e-05
--------------------------
60th iteration
Loss: 1.8793886828905038e-05
--------------------------
70th iteration
Loss: 1.7876183027953907e-05
--------------------------
80th iteration
Loss: 1.7003284573990918e-05
--------------------------
90th iteration
Loss: 1.6173012317710622e-05
--------------------------
100th iteration
Loss: 1.538327260230954e-05
--------------------------
110th iteration
Loss: 1.463210637620005e-05
--------------------------
120th iteration
Loss: 1.391761916191836e-05
--------------------------
130th iteration
Loss: 1.3238021983386083e-05
--------------------------
140th iteration
Loss: 1.2591605900058589e-05
-----

LogisticRegression(
    intercept=-0.002499999999999999,
    coefs=[0.00125287 0.00125287],
    C=1.0,
    lr=0.01,
    verbose=True,
)

### Evaluating the model

In [12]:
log_reg.evaluate(X, y)

Accuracy score: 1.0


In [14]:
from dojo.metrics.classification import accuracy_score
from dojo.tree import ClassificationTree

cross_validate(ClassificationTree(), X, y, cv=5)

Accuracy score: 0.6666666666666666
Accuracy score: 1.0
Accuracy score: 1.0
Accuracy score: 0.0
Accuracy score: 1.0
Accuracy score: 0.0
Accuracy score: 1.0
Accuracy score: 0.0


{'train_scores': array([None, None, None, None], dtype=object),
 'test_scores': array([None, None, None, None], dtype=object)}