### Install package(s)

In [1]:
!pip install olpy

Collecting olpy
  Downloading olpy-1.0.0.dev3-py3-none-any.whl (710 kB)
[K     |████████████████████████████████| 710 kB 408 kB/s eta 0:00:01
[?25hCollecting numpy>=1.20.1
  Downloading numpy-1.20.2-cp38-cp38-manylinux2010_x86_64.whl (15.4 MB)
[K     |████████████████████████████████| 15.4 MB 449 kB/s eta 0:00:01
[?25hCollecting pandas>=1.1.3
  Downloading pandas-1.2.4-cp38-cp38-manylinux1_x86_64.whl (9.7 MB)
[K     |████████████████████████████████| 9.7 MB 2.3 MB/s eta 0:00:01
[?25hCollecting scikit-learn>=0.24.1
  Downloading scikit_learn-0.24.1-cp38-cp38-manylinux2010_x86_64.whl (24.9 MB)
[K     |████████████████████████████████| 24.9 MB 1.1 MB/s eta 0:00:01
[?25hCollecting pytz>=2017.3
  Using cached pytz-2021.1-py2.py3-none-any.whl (510 kB)
Collecting python-dateutil>=2.7.3
  Using cached python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)
Collecting six>=1.5
  Downloading six-1.15.0-py2.py3-none-any.whl (10 kB)
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2

### Import the packages needed

In [91]:
from olpy.classifiers import *
from sklearn.datasets import make_hastie_10_2
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

### Load the dummy data

In [26]:
X, y = make_hastie_10_2(n_samples=200, random_state=32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Getting preliminary results

In [92]:
models = [
    LogisticRegression(random_state=32),
    LinearSVC(random_state=32),
    CW(random_state=32),
    SCW(random_state=32),
    ALMA(random_state=32),
    IELLIP(random_state=32),
    aROMMA(random_state=32),
    OGD(random_state=32),
    PA(random_state=32),
    PA_I(random_state=32),
    PA_II(random_state=32),
    Perceptron(random_state=32),
    CW(random_state=32),
    AROW(random_state=32),
    SCW(random_state=32),
    SCW2(random_state=32),
    ROMMA(random_state=32),
]

In [93]:
for model in models:
    model = model.fit(X_train, y_train)
    print(type(model).__name__, model.score(X_test, y_test))

LogisticRegression 0.45
LinearSVC 0.45
CW 0.45
SCW 0.425
ALMA 0.45
IELLIP 0.425
aROMMA 0.425
OGD 0.425
PA 0.325
PA_I 0.325
PA_II 0.325
Perceptron 0.425
CW 0.45
AROW 0.4
SCW 0.425
SCW2 0.425
ROMMA 0.45


### Trying to improve the performance for the top 3 models

In [94]:
from sklearn.model_selection import GridSearchCV

# The models are AROW, PA_I and PA_II
params_grid_pa = {
    'num_iterations': range(10),
    'C': [pow(10, i) for i in range(-3, 4)],
}
params_grid_arow = {
    'num_iterations': range(10),
    'r': [pow(10, i) for i in range(-3, 4)],
}

params_grid_svm = {
    'C': [pow(10, i) for i in range(-3, 4)],
    'loss': ['squared_hinge', 'hinge'],
}

m_ref = GridSearchCV(LinearSVC(random_state=32, max_iter=100000), params_grid_svm)
m_1 = GridSearchCV(PA_I(random_state=32), params_grid_pa)
m_2 = GridSearchCV(PA_II(random_state=32), params_grid_pa)
m_3 = GridSearchCV(AROW(random_state=32), params_grid_arow)

In [95]:
# Fit the hyper-parameter tuning
m_ref.fit(X_train, y_train)
m_1.fit(X_train, y_train)
m_2.fit(X_train, y_train)
m_3.fit(X_train, y_train)



GridSearchCV(estimator=<olpy.classifiers.arow.AROW object at 0x7f4e9eecb280>,
             param_grid={'num_iterations': range(0, 10),
                         'r': [0.001, 0.01, 0.1, 1, 10, 100, 1000]})

In [96]:
print("SVM:", m_ref.score(X_test, y_test))
print("Passive Agressive I:", m_1.score(X_test, y_test))
print("Passive Agressive II:", m_2.score(X_test, y_test))
print("AROW:", m_3.score(X_test, y_test))

SVM: 0.45
Passive Agressive I: 0.6
Passive Agressive II: 0.475
AROW: 0.625


In [97]:
print(m_1.best_params_)
print(m_2.best_params_)
print(m_3.best_params_)

{'C': 0.001, 'num_iterations': 8}
{'C': 10, 'num_iterations': 8}
{'num_iterations': 0, 'r': 0.001}


In [98]:
# We can create new data and learn from it
X_new, y_new = make_hastie_10_2(n_samples=100, random_state=32)

m1 = m_1.best_estimator_
m2 = m_2.best_estimator_
m3 = m_3.best_estimator_

m1.partial_fit(X_new, y_new)
m2.partial_fit(X_new, y_new)
m3.partial_fit(X_new, y_new)

<olpy.classifiers.arow.AROW at 0x7f4e9f6c7910>

> It is far from being perfect

In [99]:
print("SVM:", m_ref.score(X_test, y_test))
print("Passive Agressive I:", m1.score(X_test, y_test))
print("Passive Agressive II:", m2.score(X_test, y_test))
print("AROW:", m3.score(X_test, y_test))

SVM: 0.45
Passive Agressive I: 0.15
Passive Agressive II: 0.15
AROW: 0.175
