# Basic classification examples

* Using the ```test_then_train_evaluation``` and ```test_then_train_RIVER``` helper functions.
* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example.
* We show how to use AdaptiveRandomForest using its wrapper (```from classifiers import AdaptiveRandomForest```) and how to create MOA learners and wrap them using a MOALearner (```from learners import MOAClassifier```).
* Examples from MOA and River use the same CSV file.

**Notebook last update: 08/12/2023**

In [1]:
from capymoa.evaluation import test_then_train_evaluation
from benchmarking import test_then_train_RIVER

capymoa_root: /home/antonlee/github.com/tachyonicClock/MOABridge/src/capymoa
MOA jar path location (config.ini): jar/moa.jar
JVM Location (system): 
JAVA_HOME: /usr/lib/jvm/java-17-openjdk
JVM args: ['-Xmx8g', '-Xss10M']
Sucessfully started the JVM and added MOA jar to the class path


In [2]:
## Datasets paths
csv_elec_tiny_path = '../data/electricity_tiny.csv'

# csv_elec_tiny_path = './data/electricity_tiny_nominal_class.csv'

## Examples using River

In [3]:
import pandas as pd
# Read the file to a dataframe, used by all examples
river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()

In [4]:
from river.forest import ARFClassifier

river_arf5 = ARFClassifier(
    n_models=5,
    max_features=0.60
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_arf5)
print(f"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ARFClassifier, 0.8900, 0.6701, 0.6694


In [5]:
from river.tree import HoeffdingTreeClassifier

river_ht = HoeffdingTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_ht)
print(f"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

HoeffdingTreeClassifier, 0.8230, 0.0781, 0.0781


In [6]:
from river.neighbors import KNNClassifier

river_knn = KNNClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_knn)
print(f"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

KNNClassifier, 0.7560, 2.8551, 2.8530


In [7]:
from river.naive_bayes import GaussianNB

river_gnb = GaussianNB()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_gnb)
print(f"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

GaussianNB, 0.8490, 0.0612, 0.0613


In [8]:
from river.ensemble.streaming_random_patches import SRPClassifier

river_srp = SRPClassifier(
    n_models=5,
    subspace_size=0.6
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_srp)
print(f"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

SRPClassifier(HoeffdingTreeClassifier), 0.8950, 1.6984, 1.6968


In [9]:
from river.tree import ExtremelyFastDecisionTreeClassifier

river_EFDT = ExtremelyFastDecisionTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_EFDT)
print(f"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ExtremelyFastDecisionTreeClassifier, 0.8230, 0.4167, 0.4165


## Examples using MOA

In [10]:
# from moa.streams import ArffFileStream
from capymoa.stream.stream import stream_from_file
# from evaluation import test_then_train_evaluation

elec_stream = stream_from_file(path_to_csv_or_arff=csv_elec_tiny_path, class_index=-1)

In [11]:
elec_stream.get_schema().label_values

['0', '1']

In [12]:
from capymoa.learner.classifier import AdaptiveRandomForest

arf10 = AdaptiveRandomForest(schema=elec_stream.get_schema(), ensemble_size=5)
results = test_then_train_evaluation(stream=elec_stream, learner=arf10)

print(f"{arf10} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

AdaptiveRandomForest wallclock: 0.5703258514404297 cpu_time: 2.069129 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 89.45, 'Kappa Statistic (percent)': 78.10294728102947, 'Kappa Temporal Statistic (percent)': 24.9110320284697, 'Kappa M Statistic (percent)': 73.2233502538071}


In [13]:
from moa.classifiers.trees import HoeffdingTree
from capymoa.learner import MOAClassifier

ht_moa = MOAClassifier(moa_learner=HoeffdingTree())

results = test_then_train_evaluation(stream=elec_stream, learner=ht_moa)
print(f"{ht_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

HoeffdingTree wallclock: 0.05756115913391113 cpu_time: 0.22951699999999775 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.65, 'Kappa Statistic (percent)': 64.11315010011087, 'Kappa Temporal Statistic (percent)': -23.487544483985793, 'Kappa M Statistic (percent)': 55.964467005076145}


In [14]:
from moa.classifiers.lazy import kNN

knn_moa = MOAClassifier(moa_learner=kNN())

results = test_then_train_evaluation(stream=elec_stream, learner=knn_moa)
print(f"{knn_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

kNN wallclock: 0.2357478141784668 cpu_time: 0.9588379999999983 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 80.55, 'Kappa Statistic (percent)': 59.525712097754244, 'Kappa Temporal Statistic (percent)': -38.43416370106766, 'Kappa M Statistic (percent)': 50.63451776649747}


In [15]:
from moa.classifiers.trees import EFDT

EFDT_moa = MOAClassifier(moa_learner=EFDT())

results = test_then_train_evaluation(stream=elec_stream, learner=EFDT_moa)
print(f"{EFDT_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

EFDT wallclock: 0.055425405502319336 cpu_time: 0.23116999999999877 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.69999999999999, 'Kappa Statistic (percent)': 64.22426628726465, 'Kappa Temporal Statistic (percent)': -23.131672597864835, 'Kappa M Statistic (percent)': 56.09137055837563}


In [16]:
from moa.classifiers.bayes import NaiveBayes

NaiveBayes_moa = MOAClassifier(moa_learner=NaiveBayes())

results = test_then_train_evaluation(stream=elec_stream, learner=NaiveBayes_moa)
print(f"{NaiveBayes_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

NaiveBayes wallclock: 0.041938066482543945 cpu_time: 0.16296099999999925 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 84.0, 'Kappa Statistic (percent)': 66.56748353448563, 'Kappa Temporal Statistic (percent)': -13.879003558718917, 'Kappa M Statistic (percent)': 59.39086294416243}


In [17]:
from moa.classifiers.meta import StreamingRandomPatches

SRP_moa = MOAClassifier(moa_learner=StreamingRandomPatches(), CLI="-s 5 -u")

results = test_then_train_evaluation(stream=elec_stream, learner=SRP_moa)
print(f"{SRP_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

StreamingRandomPatches wallclock: 0.565244197845459 cpu_time: 1.9068369999999994 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 88.75, 'Kappa Statistic (percent)': 76.35402128768642, 'Kappa Temporal Statistic (percent)': 19.928825622775744, 'Kappa M Statistic (percent)': 71.4467005076142}
