# Basic classification examples

* Using the ```test_then_train_evaluation``` and ```test_then_train_RIVER``` helper functions.
* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example.
* We show how to use AdaptiveRandomForest using its wrapper (```from ensembles import AdaptiveRandomForest```) and how to create MOA learners and wrap them using a MOALearner (```from MOALearners import MOAClassifier```).
* Examples from MOA and River use the same CSV file.

**Notebook last update: 27/09/2023**

In [1]:
from prepare_jpype import start_jpype
start_jpype()

from evaluation import test_then_train_evaluation
from benchmarking import test_then_train_RIVER

MOA jar path location (config.ini): /Users/gomeshe/Dropbox/ciencia_computacao/dev/Using-MOA-API/moa.jar
JVM Location (system): 
/Users/gomeshe/Library/Java/JavaVirtualMachines/openjdk-20.0.1/Contents/Home
Sucessfully started the JVM and added MOA jar to the class path


In [2]:
## Datasets paths
csv_elec_tiny_path = '/Users/gomeshe/Desktop/data/electricity_tiny.csv'

## Examples using River

In [3]:
import pandas as pd
# Read the file to a dataframe, used by all examples
river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()

In [4]:
from river.forest import ARFClassifier

river_arf5 = ARFClassifier(
    n_models=5,
    max_features=0.60
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_arf5)
print(f"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ARFClassifier, 0.8410, 0.3540, 0.3544


In [5]:
from river.tree import HoeffdingTreeClassifier

river_ht = HoeffdingTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_ht)
print(f"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

HoeffdingTreeClassifier, 0.8230, 0.0397, 0.0404


In [6]:
from river.neighbors import KNNClassifier

river_knn = KNNClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_knn)
print(f"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

KNNClassifier, 0.7560, 1.9672, 1.9677


In [7]:
from river.naive_bayes import GaussianNB

river_gnb = GaussianNB()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_gnb)
print(f"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

GaussianNB, 0.8490, 0.0322, 0.0359


In [8]:
from river.ensemble.streaming_random_patches import SRPClassifier

river_srp = SRPClassifier(
    n_models=5,
    subspace_size=0.6
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_srp)
print(f"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

SRPClassifier(HoeffdingTreeClassifier), 0.8900, 0.7733, 0.7740


In [9]:
from river.tree import ExtremelyFastDecisionTreeClassifier

river_EFDT = ExtremelyFastDecisionTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_EFDT)
print(f"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ExtremelyFastDecisionTreeClassifier, 0.8230, 0.2309, 0.2314


## Examples using MOA

In [10]:
# from moa.streams import ArffFileStream
from stream import stream_from_file
# from evaluation import test_then_train_evaluation

elec_stream = stream_from_file(path_to_csv_or_arff=csv_elec_tiny_path, class_index=-1)

In [11]:
from ensembles import AdaptiveRandomForest

arf10 = AdaptiveRandomForest(schema=elec_stream.get_schema(), ensemble_size=5)
results = test_then_train_evaluation(stream=elec_stream, learner=arf10)

print(f"{arf10} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

AdaptiveRandomForest wallclock: 0.46613097190856934 cpu_time: 0.9091550000000002 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 89.45, 'Kappa Statistic (percent)': 78.10294728102947, 'Kappa Temporal Statistic (percent)': 24.9110320284697, 'Kappa M Statistic (percent)': 73.2233502538071}


In [12]:
from moa.classifiers.trees import HoeffdingTree
from MOALearners import MOAClassifier

ht_moa = MOAClassifier(moa_learner=HoeffdingTree())

results = test_then_train_evaluation(stream=elec_stream, learner=ht_moa)
print(f"{ht_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

HoeffdingTree wallclock: 0.04388594627380371 cpu_time: 0.08845100000000095 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.65, 'Kappa Statistic (percent)': 64.11315010011087, 'Kappa Temporal Statistic (percent)': -23.487544483985793, 'Kappa M Statistic (percent)': 55.964467005076145}


In [13]:
from moa.classifiers.lazy import kNN

knn_moa = MOAClassifier(moa_learner=kNN())

results = test_then_train_evaluation(stream=elec_stream, learner=knn_moa)
print(f"{knn_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

kNN wallclock: 0.16757774353027344 cpu_time: 0.3459479999999999 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 80.55, 'Kappa Statistic (percent)': 59.525712097754244, 'Kappa Temporal Statistic (percent)': -38.43416370106766, 'Kappa M Statistic (percent)': 50.63451776649747}


In [14]:
from moa.classifiers.trees import EFDT

EFDT_moa = MOAClassifier(moa_learner=EFDT())

results = test_then_train_evaluation(stream=elec_stream, learner=EFDT_moa)
print(f"{EFDT_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

EFDT wallclock: 0.045812129974365234 cpu_time: 0.09315900000000177 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.69999999999999, 'Kappa Statistic (percent)': 64.22426628726465, 'Kappa Temporal Statistic (percent)': -23.131672597864835, 'Kappa M Statistic (percent)': 56.09137055837563}


In [15]:
from moa.classifiers.bayes import NaiveBayes

NaiveBayes_moa = MOAClassifier(moa_learner=NaiveBayes())

results = test_then_train_evaluation(stream=elec_stream, learner=NaiveBayes_moa)
print(f"{NaiveBayes_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

NaiveBayes wallclock: 0.03906869888305664 cpu_time: 0.07884200000000163 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 84.0, 'Kappa Statistic (percent)': 66.56748353448563, 'Kappa Temporal Statistic (percent)': -13.879003558718917, 'Kappa M Statistic (percent)': 59.39086294416243}


In [16]:
from moa.classifiers.meta import StreamingRandomPatches

SRP_moa = MOAClassifier(moa_learner=StreamingRandomPatches(), CLI="-s 5 -u")

results = test_then_train_evaluation(stream=elec_stream, learner=SRP_moa)
print(f"{SRP_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

StreamingRandomPatches wallclock: 0.2898409366607666 cpu_time: 0.5924649999999989 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 88.75, 'Kappa Statistic (percent)': 76.35402128768642, 'Kappa Temporal Statistic (percent)': 19.928825622775744, 'Kappa M Statistic (percent)': 71.4467005076142}
