# Using sklearn with CapyMOA

* Demonstrate how someone can directly use sklearn learners in CapyMOA.
* Ideally, one should be free to use other learners

**Accessing the input data x()**

* Accessing the input data as a double array from an ```Instance``` through function ```x()```
* Instances are represented internally as MOA Instances.

## Reading data and accessing x()

In [1]:
from capymoa.stream import stream_from_file

DATA_PATH = "../data/"

## Opening a file as a stream
elec_stream = stream_from_file(path_to_csv_or_arff=DATA_PATH+"electricity.csv")

elec_stream.restart()
i = 0
while elec_stream.has_more_instances():
    instance = elec_stream.next_instance()
    if i < 20: # prevent printing all the instances
        print(f'x: {instance.x}, y: {instance.y_index}')
    i+=1

capymoa_root: /Users/ng98/Desktop/CODE/CapyMOA_Latest/src/capymoa
MOA jar path location (config.ini): /Users/ng98/Desktop/CODE/CapyMOA_Latest/src/capymoa/jar/moa.jar
JVM Location (system): 
JAVA_HOME: /Users/ng98/Library/Java/JavaVirtualMachines/openjdk-14.0.1/Contents/Home
JVM args: ['-Xmx8g', '-Xss10M']
Sucessfully started the JVM and added MOA jar to the class path
x: [0.       0.056443 0.439155 0.003467 0.422915 0.414912], y: 1
x: [0.021277 0.051699 0.415055 0.003467 0.422915 0.414912], y: 1
x: [0.042553 0.051489 0.385004 0.003467 0.422915 0.414912], y: 1
x: [0.06383  0.045485 0.314639 0.003467 0.422915 0.414912], y: 1
x: [0.085106 0.042482 0.251116 0.003467 0.422915 0.414912], y: 0
x: [0.106383 0.041161 0.207528 0.003467 0.422915 0.414912], y: 0
x: [0.12766  0.041161 0.171824 0.003467 0.422915 0.414912], y: 0
x: [0.148936 0.041161 0.152782 0.003467 0.422915 0.414912], y: 0
x: [0.170213 0.041161 0.13493  0.003467 0.422915 0.414912], y: 0
x: [0.191489 0.041161 0.140583 0.003467 0.42

In [2]:
# Getting some extra information about the instance through the MOA representation. 
moa_instance = instance.java_instance.getData()

for i in range(0, moa_instance.numInputAttributes()):
    print(moa_instance.attribute(i))
    print(moa_instance.value(i))

@attribute attrib_0 numeric
1.0
@attribute attrib_1 numeric
0.050679
@attribute attrib_2 numeric
0.288753
@attribute attrib_3 numeric
0.003542
@attribute attrib_4 numeric
0.355256
@attribute attrib_5 numeric
0.23114


## Using scikit-learn

* Example showing how a model from scikit-learn can be used with our ```Instance``` representation

In [3]:

from sklearn import linear_model
from capymoa.evaluation import ClassificationEvaluator
from capymoa.datasets import ElectricityTiny

# Creating a stream. Using the tiny version of the electricity dataset to speed
# up the process
elec_stream = ElectricityTiny()

# Creating a learner
sklearn_SGD = linear_model.SGDClassifier()

# Creating the evaluator
ob_evaluator = ClassificationEvaluator(schema=elec_stream.get_schema())

# elec_stream.schema.get_label_indexes() --> the class labels

# Counter for partial fits
partial_fit_count = 0
while elec_stream.has_more_instances():
    instance = elec_stream.next_instance()

    prediction = -1
    if partial_fit_count > 0: # scikit-learn does not allows invoking predict in a model that was not fit before
        prediction = sklearn_SGD.predict([instance.x])[0]
    ob_evaluator.update(instance.y_index, prediction)
    sklearn_SGD.partial_fit([instance.x], [instance.y_index], classes=elec_stream.schema.get_label_indexes())
    partial_fit_count += 1

ob_evaluator.accuracy()

84.7

### Example using a MOA learner



In [4]:
from moa.classifiers.trees import HoeffdingAdaptiveTree
from capymoa.evaluation import ClassificationEvaluator
from capymoa.base import MOAClassifier

## Opening a file as a stream
elec_stream = ElectricityTiny()

# Creating a learner
moa_HAT = MOAClassifier(schema=elec_stream.get_schema(), moa_learner=HoeffdingAdaptiveTree())

# Creating the evaluator
hat_evaluator = ClassificationEvaluator(schema=elec_stream.get_schema())

while elec_stream.has_more_instances():
    instance = elec_stream.next_instance()

    prediction = moa_HAT.predict(instance)
    hat_evaluator.update(instance.y_index, prediction)
    moa_HAT.train(instance)
    partial_fit_count += 1

hat_evaluator.accuracy()

82.75

### Using SKClassifier


In [5]:
from sklearn import linear_model
from capymoa.base import SKClassifier
from capymoa.evaluation import ClassificationEvaluator

## Opening a file as a stream
elec_stream = ElectricityTiny()

# Creating a learner
sklearn_SGD = SKClassifier(schema=elec_stream.get_schema(), sklearner=linear_model.SGDClassifier())

# Creating the evaluator
sklearn_SGD_evaluator = ClassificationEvaluator(schema=elec_stream.get_schema())

while elec_stream.has_more_instances():
    instance = elec_stream.next_instance()

    prediction = sklearn_SGD.predict(instance)
    sklearn_SGD_evaluator.update(instance.y_index, prediction)
    sklearn_SGD.train(instance)

sklearn_SGD_evaluator.accuracy()

84.7

### Using prequential evaluation + SKClassifier

In [6]:
from capymoa.evaluation import prequential_evaluation

## Opening a file as a stream
elec_stream = stream_from_file(path_to_csv_or_arff=DATA_PATH+"electricity.csv")

# Creating a learner
sklearn_SGD = SKClassifier(schema=elec_stream.get_schema(), sklearner=linear_model.SGDClassifier())

results_sklearn_SGD = prequential_evaluation(stream=elec_stream, learner=sklearn_SGD, window_size=4500)

results_sklearn_SGD['cumulative'].accuracy()

83.88064971751412