In [6]:
import os
from river import datasets
from river import metrics
from river import tree
from river import ensemble
from river import evaluate
from river import compose
from river import naive_bayes
from river import anomaly
from river import compose
from river import datasets
from river import metrics
from river import preprocessing
from confluent_kafka import Producer,Consumer
import certifi
import time
import json
import pandas
import mlflow

In [7]:
from itertools import islice
dataset = datasets.MaliciousURL()
data = dataset.take(10)



In [7]:
user = os.environ['kafka_username']
password = os.environ['kafka_password']
bsts = os.environ['kafka_bootstrap_servers']
topic = 'model_updates'
conf = {'bootstrap.servers': bsts,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': certifi.where(),
            'sasl.username': user,
            'sasl.password': password,            
            'message.max.bytes': 8000000,
            'linger.ms': 100,
            'client.id': 'model-icde-2023'}
producer = Producer(conf) 

In [9]:
auc = metrics.ROCAUC()
f1 = metrics.F1()
recall = metrics.MicroRecall()


In [10]:
import codecs
import pickle
import sys
def generate_run(exp_id,run_name,model_artifact):
    global auc
    global f1
    global recall
    idx=0
    run_name
    
    with mlflow.start_run(experiment_id=exp_id, run_name=run_name) as run:
        # Retrieve run id
        RUN_ID = run.info.run_id
        max_size=2000
        
        dataset = datasets.MaliciousURL()
        data = dataset.take(max_size)
        cnt = 0
        for f, y in data:
            model_artifact = model_artifact.learn_one(f,y)
            cnt = cnt + 1
            if(cnt==1000):
                break
        for f, y in data:
            model_artifact = model_artifact.learn_one(f,y)
            cnt = cnt + 1
            
        # Track parameters
        mlflow.log_param("leaf_prediction", "mc")
        mlflow.log_metric("ROCAUC", auc.get())
        mlflow.log_metric("f1", auc.get())
        mlflow.log_metric("recall", auc.get())
        
        with open('model.pkl', 'wb') as handle:
            pickle.dump(model_artifact, handle, protocol=pickle.HIGHEST_PROTOCOL)
        mlflow.log_artifact('model.pkl')
        return model_artifact

In [11]:
EXPERIMENT_NAME = 'AdaptiveRandomForestExperiment'
exp = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
if not exp:
    EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)
else:
    EXPERIMENT_ID = exp.experiment_id

In [None]:
model_artifact = ensemble.AdaptiveRandomForestClassifier(leaf_prediction="mc")
out = generate_run(EXPERIMENT_ID,'03-27-wadkars-1',model_artifact)
print(out)

In [4]:
from mlflow import MlflowClient
client = MlflowClient()
my_model = client.get_registered_model("online-learning-adaptive-random-forest")
if not my_model:
    client.create_registered_model("online-learning-adaptive-random-forest")
    my_model = client.get_registered_model("online-learning-adaptive-random-forest")
print(my_model)

<RegisteredModel: creation_timestamp=1679948352237, description='', last_updated_timestamp=1679948352237, latest_versions=[], name='online-learning-adaptive-random-forest', tags={'mlflow.domino.project_id': 'icde23',
 'mlflow.domino.run_id': '6421bafc3cd3e2523924abd0',
 'mlflow.domino.user': 'integration-test',
 'mlflow.source.type': 'NOTEBOOK',
 'mlflow.user': 'integration-test'}>


In [8]:
import codecs
import pickle
import sys

pkd = pickle.dumps(model_artifact)
pickled = codecs.encode(pkd, "base64").decode()
model_instance = pickle.loads(codecs.decode(pickled.encode(), "base64"))
print(model_instance)


In [None]:
v=1
model_json={'m':pickled,'v':v}

msg = json.dumps(model_json)
sys.getsizeof(msg)
producer.produce(topic, value=msg, key=str(v))
producer.flush()


In [None]:
v=2
model_json={'m':pickled,'v':v}
producer.produce(topic, value=json.dumps(model_json), key=str(v))
producer.flush()


In [None]:
v=3
model_json={'m':pickled,'v':v}
producer.produce(topic, value=json.dumps(model_json), key=str(v))
producer.flush()

In [9]:
v=4
model_json={'m':pickled,'v':v}
producer.produce(topic, value=json.dumps(model_json), key=str(v))
producer.flush()

## Test the model init and predict function

```
import os
import model
model.init()
#model_test.init_get_latest_model()
x = {"2": 1.0, "4": 0.0788382, "5": 0.131034, "6": 0.117647, "10": 1.0, "11": 0.142857, "16": 0.4, "17": 0.830283, "18": 0.83965, "19": 0.583194, "20": 1.0, "21": 0.285713, "22": 0.00595238, "23": 0.00595238, "36": 1.0, "37": 1.0, "44": 1.0, "45": 1.0, "54": 1.0, "56": 1.0, "62": 1.0, "64": 1.0, "66": 1.0, "68": 1.0, "70": 1.0, "72": 1.0, "74": 1.0, "75": 0.25, "76": 1.0, "77": 0.166667, "79": 0.0769231, "81": 0.05, "82": 1.0, "84": 1.0, "86": 1.0, "88": 1.0, "90": 1.0, "92": 1.0, "94": 1.0, "96": 1.0, "102": 1.0, "104": 1.0, "106": 1.0, "108": 1.0, "110": 1.0, "112": 1.0, "131": 1.0, "133": 1.0, "139": 1.0, "141": 1.0, "143": 1.0, "145": 1.0, "147": 1.0, "149": 1.0, "253": 1.0, "260": 1.0, "277": 1.0, "304": 1.0, "305": 1.0, "425": 1.0, "521": 1.0, "673": 1.0, "674": 1.0, "675": 1.0, "676": 1.0, "731": 1.0, "732": 1.0, "733": 1.0, "1365": 1.0, "1488": 1.0, "2098": 1.0, "6027": 1.0, "6028": 1.0, "6029": 1.0, "6030": 1.0, "6031": 1.0, "9989": 1.0, "18073": 1.0, "18074": 1.0, "18077": 1.0, "18078": 1.0, "18079": 1.0, "18080": 1.0, "155153": 1.0, "155154": 1.0, "155155": 1.0, "155156": 1.0, "155157": 1.0, "155158": 1.0, "155159": 1.0, "155160": 1.0, "155161": 1.0, "155163": 1.0, "155164": 1.0, "155165": 1.0, "155166": 1.0, "155167": 1.0, "155168": 1.0, "155169": 1.0, "155170": 1.0, "155171": 1.0, "155172": 1.0, "155173": 1.0, "155174": 1.0, "155175": 1.0, "155176": 1.0, "155177": 1.0, "155178": 1.0, "155179": 1.0, "155180": 1.0, "155181": 1.0, "155182": 1.0, "155183": 1.0, "155184": 1.0, "155185": 1.0, "155186": 1.0, "155187": 1.0, "155188": 1.0, "155189": 1.0, "155190": 1.0, "155191": 1.0, "155192": 1.0, "155193": 1.0, "155194": 1.0, "155195": 1.0, "155196": 1.0, "155197": 1.0, "155198": 1.0, "155199": 1.0, "155200": 1.0, "155201": 1.0, "155202": 1.0, "155203": 1.0, "155204": 1.0, "155205": 1.0, "155206": 1.0, "155207": 1.0, "155208": 1.0, "155209": 1.0, "155210": 1.0, "155211": 1.0, "155212": 1.0, "155213": 1.0, "500481": 1.0}
model.predict(x)
```