# Basic classification examples

* Using the ```test_then_train_evaluation``` and ```test_then_train_RIVER``` helper functions.
* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example.
* We show how to use AdaptiveRandomForest using its wrapper (```from classifiers import AdaptiveRandomForest```) and how to create MOA learners and wrap them using a MOALearner (```from learners import MOAClassifier```).
* Examples from MOA and River use the same CSV file.

**Notebook last update: 08/12/2023**

In [1]:
from capymoa.evaluation import test_then_train_evaluation
from benchmarking import test_then_train_RIVER

capymoa_root: /home/antonlee/github.com/tachyonicClock/MOABridge/src/capymoa
MOA jar path location (config.ini): /home/antonlee/github.com/tachyonicClock/MOABridge/src/capymoa/jar/moa.jar
JVM Location (system): 
JAVA_HOME: /usr/lib/jvm/java-17-openjdk
JVM args: ['-Xmx8g', '-Xss10M']


Sucessfully started the JVM and added MOA jar to the class path


In [2]:
## Datasets paths
csv_elec_tiny_path = '../data/electricity_tiny.csv'

# csv_elec_tiny_path = './data/electricity_tiny_nominal_class.csv'

## Examples using River

In [3]:
import pandas as pd
# Read the file to a dataframe, used by all examples
river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()

In [4]:
from river.forest import ARFClassifier

river_arf5 = ARFClassifier(
    n_models=5,
    max_features=0.60
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_arf5)
print(f"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ARFClassifier, 0.8860, 1.0561, 0.7037


In [5]:
from river.tree import HoeffdingTreeClassifier

river_ht = HoeffdingTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_ht)
print(f"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

HoeffdingTreeClassifier, 0.8230, 0.2658, 0.0893


In [6]:
from river.neighbors import KNNClassifier

river_knn = KNNClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_knn)
print(f"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

KNNClassifier, 0.7570, 7.1267, 3.9199


In [7]:
from river.naive_bayes import GaussianNB

river_gnb = GaussianNB()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_gnb)
print(f"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

GaussianNB, 0.8490, 0.0912, 0.0757


In [8]:
from river.ensemble.streaming_random_patches import SRPClassifier

river_srp = SRPClassifier(
    n_models=5,
    subspace_size=0.6
)

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_srp)
print(f"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

SRPClassifier(HoeffdingTreeClassifier), 0.9070, 2.1189, 1.9090


In [9]:
from river.tree import ExtremelyFastDecisionTreeClassifier

river_EFDT = ExtremelyFastDecisionTreeClassifier()

acc, wallclock, cpu_time, df = test_then_train_RIVER(dataset=river_elec_tiny, model=river_EFDT)
print(f"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}")

ExtremelyFastDecisionTreeClassifier, 0.8230, 0.5219, 0.4909


## Examples using MOA

In [10]:
# from moa.streams import ArffFileStream
from capymoa.stream.stream import stream_from_file
# from evaluation import test_then_train_evaluation

elec_stream = stream_from_file(path_to_csv_or_arff=csv_elec_tiny_path, class_index=-1)

In [11]:
elec_stream.get_schema().label_values

['0', '1']

In [12]:
from capymoa.learner.classifier import AdaptiveRandomForest

arf10 = AdaptiveRandomForest(schema=elec_stream.get_schema(), ensemble_size=5)
results = test_then_train_evaluation(stream=elec_stream, learner=arf10)

print(f"{arf10} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

AdaptiveRandomForest wallclock: 2.0976288318634033 cpu_time: 3.406157696000001 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 89.45, 'Kappa Statistic (percent)': 78.10294728102947, 'Kappa Temporal Statistic (percent)': 24.9110320284697, 'Kappa M Statistic (percent)': 73.2233502538071, 'F1 Score (percent)': 89.07449732081565, 'F1 Score for class 0 (percent)': 91.14561477129669, 'F1 Score for class 1 (percent)': 86.95114409400124, 'Precision (percent)': 88.81882656350741, 'Precision for class 0 (percent)': 92.42553191489363, 'Precision for class 1 (percent)': 85.21212121212122, 'Recall (percent)': 89.33164425714095, 'Recall for class 0 (percent)': 89.90066225165563, 'Recall for class 1 (percent)': 88.76262626262627}


In [13]:
from moa.classifiers.trees import HoeffdingTree
from capymoa.learner import MOAClassifier

ht_moa = MOAClassifier(schema=elec_stream.schema, moa_learner=HoeffdingTree())

results = test_then_train_evaluation(stream=elec_stream, learner=ht_moa)
print(f"{ht_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

HoeffdingTree wallclock: 0.08550405502319336 cpu_time: 0.2671951210000003 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.65, 'Kappa Statistic (percent)': 64.11315010011087, 'Kappa Temporal Statistic (percent)': -23.487544483985793, 'Kappa M Statistic (percent)': 55.964467005076145, 'F1 Score (percent)': 82.09811936776865, 'F1 Score for class 0 (percent)': 85.34009294465568, 'F1 Score for class 1 (percent)': 78.75076546233926, 'Precision (percent)': 81.80034450954582, 'Precision for class 0 (percent)': 87.14408973252804, 'Precision for class 1 (percent)': 76.45659928656362, 'Recall (percent)': 82.39807010502375, 'Recall for class 0 (percent)': 83.6092715231788, 'Recall for class 1 (percent)': 81.18686868686868}


In [14]:
from moa.classifiers.lazy import kNN

knn_moa = MOAClassifier(schema=elec_stream.schema, moa_learner=kNN())

results = test_then_train_evaluation(stream=elec_stream, learner=knn_moa)
print(f"{knn_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

kNN wallclock: 0.47820544242858887 cpu_time: 1.0959418200000002 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 80.55, 'Kappa Statistic (percent)': 59.525712097754244, 'Kappa Temporal Statistic (percent)': -38.43416370106766, 'Kappa M Statistic (percent)': 50.63451776649747, 'F1 Score (percent)': 79.77003356968547, 'F1 Score for class 0 (percent)': 83.75782881002087, 'F1 Score for class 1 (percent)': 75.7632398753894, 'Precision (percent)': 79.64174207875188, 'Precision for class 0 (percent)': 84.49873631002527, 'Precision for class 1 (percent)': 74.78474784747847, 'Recall (percent)': 79.89873904609004, 'Recall for class 0 (percent)': 83.02980132450331, 'Recall for class 1 (percent)': 76.76767676767676}


In [15]:
from moa.classifiers.trees import EFDT

EFDT_moa = MOAClassifier(schema=elec_stream.schema, moa_learner=EFDT())

results = test_then_train_evaluation(stream=elec_stream, learner=EFDT_moa)
print(f"{EFDT_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

EFDT wallclock: 0.10801196098327637 cpu_time: 0.2733989769999994 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 82.69999999999999, 'Kappa Statistic (percent)': 64.22426628726465, 'Kappa Temporal Statistic (percent)': -23.131672597864835, 'Kappa M Statistic (percent)': 56.09137055837563, 'F1 Score (percent)': 82.15544727171476, 'F1 Score for class 0 (percent)': 85.37616229923923, 'F1 Score for class 1 (percent)': 78.82496940024481, 'Precision (percent)': 81.85195213304944, 'Precision for class 0 (percent)': 87.21934369602764, 'Precision for class 1 (percent)': 76.48456057007125, 'Recall (percent)': 82.46120141815506, 'Recall for class 0 (percent)': 83.6092715231788, 'Recall for class 1 (percent)': 81.31313131313132}


In [16]:
from moa.classifiers.bayes import NaiveBayes

NaiveBayes_moa = MOAClassifier(schema=elec_stream.schema, moa_learner=NaiveBayes())

results = test_then_train_evaluation(stream=elec_stream, learner=NaiveBayes_moa)
print(f"{NaiveBayes_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

NaiveBayes wallclock: 0.07117223739624023 cpu_time: 0.19391226799999828 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 84.0, 'Kappa Statistic (percent)': 66.56748353448563, 'Kappa Temporal Statistic (percent)': -13.879003558718917, 'Kappa M Statistic (percent)': 59.39086294416243, 'F1 Score (percent)': 83.28381505321872, 'F1 Score for class 0 (percent)': 86.74399337199668, 'F1 Score for class 1 (percent)': 79.82345523329128, 'Precision (percent)': 83.26942115618381, 'Precision for class 0 (percent)': 86.81592039800995, 'Precision for class 1 (percent)': 79.72292191435768, 'Recall (percent)': 83.29821392735299, 'Recall for class 0 (percent)': 86.67218543046357, 'Recall for class 1 (percent)': 79.92424242424242}


In [17]:
from moa.classifiers.meta import StreamingRandomPatches

SRP_moa = MOAClassifier(schema=elec_stream.schema, moa_learner=StreamingRandomPatches(), CLI="-s 5 -u")

results = test_then_train_evaluation(stream=elec_stream, learner=SRP_moa)
print(f"{SRP_moa} wallclock: {results['wallclock']} cpu_time: {results['cpu_time']} metrics: {results['cumulative']}")

StreamingRandomPatches wallclock: 0.6675782203674316 cpu_time: 1.8824357859999985 metrics: {'classified instances': 2000.0, 'classifications correct (percent)': 88.75, 'Kappa Statistic (percent)': 76.35402128768642, 'Kappa Temporal Statistic (percent)': 19.928825622775744, 'Kappa M Statistic (percent)': 71.4467005076142, 'F1 Score (percent)': 88.19027989119654, 'F1 Score for class 0 (percent)': 90.78246620237607, 'F1 Score for class 1 (percent)': 85.56767158434894, 'Precision (percent)': 88.41215762532106, 'Precision for class 0 (percent)': 89.86212489862126, 'Precision for class 1 (percent)': 86.96219035202087, 'Recall (percent)': 87.96951301090374, 'Recall for class 0 (percent)': 91.72185430463577, 'Recall for class 1 (percent)': 84.21717171717171}
