# Using jpype directly

* This notebook does not use the prepare_jpype.py script
* It servers as an example of how to use jpype directly (it is easy, but it is good to have this example around).

**Notebook last update: 17/02/2024**

In [1]:
# Use this to check if java_home is correctly set
import subprocess
import os

print("JAVA_HOME: ", os.environ['JAVA_HOME'])
# If JAVA_HOME is not set, then jpype will fail. 

JAVA_HOME:  /usr/lib/jvm/java-17-openjdk


In [2]:
import jpype

import jpype.imports
from jpype.types import *

# Starts the JVM
# jpype.startJVM()

# Define JVM parameters
jvm_args = ["-Xmx8g", "-Xss10M"]

# Start the JVM with the specified parameters
jpype.startJVM(jpype.getDefaultJVMPath(), *jvm_args)

In [3]:
import os
# Add the moa jar to the class path
jpype.addClassPath(os.getcwd()+'/../src/capymoa/jar/moa.jar')

## Basic classification using ARF

In [4]:
%%time
import pandas as pd

from moa.classifiers.meta import AdaptiveRandomForest
from moa.core import Example
from moa.evaluation import BasicClassificationPerformanceEvaluator
from moa.streams.generators import RandomTreeGenerator

maxInstancesToProcess = 1000
instancesProcessed = 1
sampleFrequency = 100

learner = AdaptiveRandomForest()
# Setting parameters using setViaCLIString
learner.getOptions().setViaCLIString("-s 10")
# Setting parameters using the option attribute directly
learner.setRandomSeed(5)
learner.prepareForUse()

rtg = RandomTreeGenerator()
# Setting parameters using setViaCLIString
rtg.getOptions().setViaCLIString("-c 3 -u 10 -o 0")
rtg.prepareForUse()

# Setting parameters using the option attribute directly
evaluator = BasicClassificationPerformanceEvaluator()
evaluator.recallPerClassOption.set()
evaluator.prepareForUse()

learner.setModelContext(rtg.getHeader())

# Create empty lists to store the data
data = []
performance_names = []
performance_values = []

while rtg.hasMoreInstances() and instancesProcessed <= maxInstancesToProcess:
    trainInst = rtg.nextInstance()
    testInst = trainInst

    prediction = learner.getVotesForInstance(testInst)

    evaluator.addResult(testInst, prediction)
    learner.trainOnInstance(trainInst)

    if instancesProcessed == 1:
        performance_measurements = evaluator.getPerformanceMeasurements()
        performance_names = ["".join(measurement.getName()) for measurement in performance_measurements]

    if instancesProcessed % sampleFrequency == 0:
        performance_values = [measurement.getValue() for measurement in evaluator.getPerformanceMeasurements()]
        data.append(performance_values)
    
    instancesProcessed += 1

# Create a DataFrame using collected data
results_df = pd.DataFrame(data, columns=performance_names)

# Print the DataFrame
results_df

CPU times: user 5.18 s, sys: 341 ms, total: 5.52 s
Wall time: 2.49 s


Unnamed: 0,classified instances,classifications correct (percent),Kappa Statistic (percent),Kappa Temporal Statistic (percent),Kappa M Statistic (percent),Recall for class 0 (percent),Recall for class 1 (percent),Recall for class 2 (percent)
0,100.0,75.0,51.399689,56.896552,34.210526,87.5,70.27027,0.0
1,200.0,76.5,55.126981,58.40708,45.348837,88.888889,73.684211,6.25
2,300.0,77.666667,58.234634,62.146893,49.242424,87.654321,79.279279,11.111111
3,400.0,79.25,61.918745,64.529915,55.135135,88.516746,83.006536,13.157895
4,500.0,80.2,64.225057,66.889632,57.51073,87.739464,84.736842,22.44898
5,600.0,80.833333,65.742713,68.144044,59.363958,88.102894,86.283186,25.396825
6,700.0,81.285714,66.78908,68.357488,61.127596,88.235294,87.640449,26.315789
7,800.0,81.5,67.593518,68.240343,61.558442,88.508557,88.135593,31.25
8,900.0,81.888889,68.292324,68.471954,62.268519,88.095238,89.425982,31.775701
9,1000.0,82.5,69.382685,69.298246,63.389121,88.75969,89.010989,35.833333
