# Basic classification examples

* Using the ```test_then_train_evaluation``` and ```test_then_train_RIVER``` helper functions.
* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example.
* We show how to use AdaptiveRandomForest using its wrapper (```from ensembles import AdaptiveRandomForest```) and how to create MOA learners and wrap them using a MOALearner (```from MOALearners import MOAClassifier```).
* Examples from MOA and River use the same CSV file.

**Notebook last update: 20/10/2023**

In [1]:
from prepare_jpype import start_jpype
start_jpype()

from evaluation import test_then_train_evaluation
from benchmarking import test_then_train_RIVER

MOA jar path location (config.ini): /Users/gomeshe/Dropbox/ciencia_computacao/dev/main-projects/MOABridge/jar/moa.jar
JVM Location (system): 
/Users/gomeshe/Library/Java/JavaVirtualMachines/openjdk-20.0.1/Contents/Home
JVM args: ['-Xmx8g', '-Xss10M']
Sucessfully started the JVM and added MOA jar to the class path


In [2]:
## Datasets paths
csv_elec_tiny_path = './data/electricity_tiny.csv'

# csv_elec_tiny_path = './data/electricity_tiny_nominal_class.csv'

## Examples using River

In [3]:
import pandas as pd
# Read the file to a dataframe, used by all examples
river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()

In [4]:
from river.forest import ARFClassifier

river_arf5 = ARFClassifier(
    n_models=5,
    max_features=0.60
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_arf5)
print(f"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ARFClassifier, 0.7241, 0.0097, 0.0100


In [5]:
from river.tree import HoeffdingTreeClassifier

river_ht = HoeffdingTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_ht)
print(f"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

HoeffdingTreeClassifier, 0.7931, 0.0022, 0.0029


In [6]:
from river.neighbors import KNNClassifier

river_knn = KNNClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_knn)
print(f"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

KNNClassifier, 0.6897, 0.0011, 0.0011


In [7]:
from river.naive_bayes import GaussianNB

river_gnb = GaussianNB()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_gnb)
print(f"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

GaussianNB, 0.7931, 0.0019, 0.0026


In [8]:
from river.ensemble.streaming_random_patches import SRPClassifier

river_srp = SRPClassifier(
    n_models=5,
    subspace_size=0.6
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_srp)
print(f"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

SRPClassifier(HoeffdingTreeClassifier), 0.8276, 0.0232, 0.0237


In [9]:
from river.tree import ExtremelyFastDecisionTreeClassifier

river_EFDT = ExtremelyFastDecisionTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_EFDT)
print(f"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ExtremelyFastDecisionTreeClassifier, 0.7931, 0.0021, 0.0027


## Examples using MOA

In [10]:
# from moa.streams import ArffFileStream
from stream import stream_from_file
# from evaluation import test_then_train_evaluation

elec_stream = stream_from_file(path_to_csv_or_arff=csv_elec_tiny_path, class_index=-1)

  return NumpyStream(X=X, y=y.astype(int), dataset_name="Elec", feature_names=header[:-1], target_name=header[-1], enforce_regression=enforce_regression)


In [11]:
elec_stream.get_schema().label_values

['0']

In [12]:
from ensembles import AdaptiveRandomForest

arf10 = AdaptiveRandomForest(schema=elec_stream.get_schema(), ensemble_size=5)
results = test_then_train_evaluation(stream=elec_stream, learner=arf10)

print(f"{arf10} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

AdaptiveRandomForest wallclock: 0.04669809341430664 cpu_time: 0.13924000000000003 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}


In [13]:
from moa.classifiers.trees import HoeffdingTree
from MOALearners import MOAClassifier

ht_moa = MOAClassifier(moa_learner=HoeffdingTree())

results = test_then_train_evaluation(stream=elec_stream, learner=ht_moa)
print(f"{ht_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

HoeffdingTree wallclock: 0.0033457279205322266 cpu_time: 0.008777000000000257 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}


In [14]:
from moa.classifiers.lazy import kNN

knn_moa = MOAClassifier(moa_learner=kNN())

results = test_then_train_evaluation(stream=elec_stream, learner=knn_moa)
print(f"{knn_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

kNN wallclock: 0.004328250885009766 cpu_time: 0.009232000000000795 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}


In [15]:
from moa.classifiers.trees import EFDT

EFDT_moa = MOAClassifier(moa_learner=EFDT())

results = test_then_train_evaluation(stream=elec_stream, learner=EFDT_moa)
print(f"{EFDT_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

EFDT wallclock: 0.0025169849395751953 cpu_time: 0.004997999999999614 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}


In [16]:
from moa.classifiers.bayes import NaiveBayes

NaiveBayes_moa = MOAClassifier(moa_learner=NaiveBayes())

results = test_then_train_evaluation(stream=elec_stream, learner=NaiveBayes_moa)
print(f"{NaiveBayes_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

NaiveBayes wallclock: 0.0011181831359863281 cpu_time: 0.0023569999999999425 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}


In [17]:
from moa.classifiers.meta import StreamingRandomPatches

SRP_moa = MOAClassifier(moa_learner=StreamingRandomPatches(), CLI="-s 5 -u")

results = test_then_train_evaluation(stream=elec_stream, learner=SRP_moa)
print(f"{SRP_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

StreamingRandomPatches wallclock: 0.010870933532714844 cpu_time: 0.03796500000000069 metrics: {'classified instances': 29.0, 'classifications correct (percent)': 100.0, 'Kappa Statistic (percent)': nan, 'Kappa Temporal Statistic (percent)': nan, 'Kappa M Statistic (percent)': nan}
