In [107]:
!pip install river



In [2]:
import numpy as np
from river import ensemble
from river import evaluate
from river import metrics
from river import preprocessing
from river import stream
from river import datasets
import bentoml

In [3]:
model = ensemble.AdaptiveRandomForestRegressor(seed=42)

In [4]:
dataset = datasets.TrumpApproval()
dataset

Donald Trump approval ratings.

This dataset was obtained by reshaping the data used by FiveThirtyEight for analyzing Donald
Trump's approval ratings. It contains 5 features, which are approval ratings collected by
5 polling agencies. The target is the approval rating from FiveThirtyEight's model. The goal of
this task is to see if we can reproduce FiveThirtyEight's model.

    Name  TrumpApproval                                                                                
    Task  Regression                                                                                   
 Samples  1,001                                                                                        
Features  6                                                                                            
  Sparse  False                                                                                        
    Path  /opt/anaconda3/envs/icos_v1/lib/python3.10/site-packages/river/datasets/trump_approval.csv.gz

In [86]:
dataset.take(100)

<itertools.islice at 0x7fc7db78a2f0>

In [99]:
data = list()

In [100]:
for i in dataset.take(100):
    data.append(i)

In [101]:
data[0]

({'ordinal_date': 736389,
  'gallup': 43.843213,
  'ipsos': 46.19925042857143,
  'morning_consult': 48.318749,
  'rasmussen': 44.104692,
  'you_gov': 43.636914000000004},
 43.75505)

In [87]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=0.2, random_state=42, shuffle=True, stratify=labels
    )

In [7]:
len(train), len(test)

(80, 20)

In [8]:
y_pred = list()
y_test = list()

In [55]:
for data in train:
    model.learn_one(data[0],data[1])
    

In [10]:
model.predict_one({'ordinal_date': 736489, 'gallup': 37.843213, 'ipsos': 38.07067899999999, 'morning_consult': 42.318749, 'rasmussen': 40.104692, 'you_gov': 38.636914000000004})

41.660705523772066

In [11]:
for data in test:
    y_pred.append(model.predict_one(data[0]))
    y_test.append(data[1])

In [12]:
y_pred

[41.598845875053485,
 41.598845875053485,
 41.598845875053485,
 41.660705523772066,
 41.67339860124695,
 41.67339860124695,
 41.67339860124695,
 41.67339860124695,
 41.67339860124695,
 41.67339860124695,
 41.660705523772066,
 41.660705523772066,
 41.660705523772066,
 41.660705523772066,
 41.660705523772066,
 41.660705523772066,
 41.67339860124695,
 41.67339860124695,
 41.660705523772066,
 41.660705523772066]

In [13]:
y_test

[38.80148,
 38.83015,
 38.78198,
 39.14797,
 38.99393,
 39.05955,
 39.28042,
 39.27729,
 39.3673,
 39.3634,
 39.21078,
 39.14719,
 39.113640000000004,
 39.10624,
 38.9981,
 38.890209999999996,
 38.998259999999995,
 38.60328,
 38.28024,
 38.24259]

In [14]:
from sklearn.metrics import mean_squared_error

In [15]:
mse = mean_squared_error(y_test, y_pred)

In [16]:
mse

7.283201559739519

In [69]:
# Custom Python model class to include river model in mlflow
import mlflow.pyfunc
class ARFModel(mlflow.pyfunc.PythonModel):
    def __init__(self):
        self.arf_model = ensemble.AdaptiveRandomForestRegressor(seed=42)

    def learn_one(self,input_data,target):
        return self.arf_model.learn_one(input_data,target)

    def predict_one(self,model_input):
        return self.arf_model.predict_one(model_input)
        
    def predict(self,context,model_input):
        return self.predict_one(model_input)

In [70]:
my_model = ARFModel()

In [71]:
for data in train:
    my_model.learn_one(data[0],data[1])

In [73]:
my_model.predict_one({'ordinal_date': 736489, 'gallup': 37.843213, 'ipsos': 38.07067899999999, 'morning_consult': 42.318749, 'rasmussen': 40.104692, 'you_gov': 38.636914000000004})

41.660705523772066

In [72]:
my_model.predict(context=None,model_input={'ordinal_date': 736489, 'gallup': 37.843213, 'ipsos': 38.07067899999999, 'morning_consult': 42.318749, 'rasmussen': 40.104692, 'you_gov': 38.636914000000004})

41.660705523772066

In [67]:
my_model.predict_one

AttributeError: 'ARFModel' object has no attribute 'predict_one'

In [74]:
model_info = mlflow.pyfunc.log_model(artifact_path="model", python_model=my_model)

In [75]:
model_info

<mlflow.models.model.ModelInfo at 0x7fc7db5ea2c0>

In [76]:
bento_model = bentoml.mlflow.import_model('arf_model', model_info.model_uri)

In [77]:
bento_model

Model(tag="arf_model:v45cb2sqwwywjury", path="/Users/jaydeepsamanta/bentoml/models/arf_model/v45cb2sqwwywjury/")

In [78]:
!bentoml models list

[1m [0m[1mTag                  [0m[1m [0m[1m [0m[1mModule                [0m[1m [0m[1m [0m[1mSize      [0m[1m [0m[1m [0m[1mCreation Time      [0m[1m [0m
 arf_model:v45cb2sqww…  bentoml.mlflow          83.24 KiB   2024-08-02 10:57:53 
 arf_model:usg2o3cqws…  bentoml.mlflow          149.47 KiB  2024-08-02 10:50:25 
 arf_model:hh3qylcqws…  bentoml.mlflow          82.86 KiB   2024-08-02 10:47:26 
 arf_model:s23a7qsqwo…  bentoml.mlflow          82.85 KiB   2024-08-02 10:42:52 
 arima_forecast_model…  bentoml.picklable_mod…  1.04 MiB    2024-07-25 12:21:31 
 predictive_maintenan…  bentoml.xgboost         103.22 KiB  2024-02-16 13:15:56 
 iris_clf_with_featur…  bentoml.sklearn         6.36 KiB    2024-02-08 15:42:19 


In [102]:
bentoml_model = bentoml.mlflow.get("arf_model:latest")

In [103]:
runner = bentoml_model.to_runner()

In [104]:
runner.init_local()

'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.
 - mlflow (current: 2.3.2, required: mlflow==2.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [105]:
runner

Runner(name='arf_model', models=[Model(tag="arf_model:s5psddsqxwqurury", path="/Users/jaydeepsamanta/bentoml/models/arf_model/s5psddsqxwqurury")], resource_config=None, runnable_class=<class 'bentoml._internal.frameworks.mlflow.get_runnable.<locals>.MLflowPyfuncRunnable'>, embedded=False, runner_methods=[RunnerMethod(runner=..., name='predict', config=RunnableMethodConfig(batchable=False, batch_dim=(0, 0), input_spec=None, output_spec=None), max_batch_size=100, max_latency_ms=10000)], scheduling_strategy=<class 'bentoml._internal.runner.strategy.DefaultStrategy'>, workers_per_resource=1, runnable_init_params={}, _runner_handle=<bentoml._internal.runner.runner_handle.local.LocalRunnerRef object at 0x7fc7db7110c0>)

In [106]:
runner.predict.run({'ordinal_date': 736489, 'gallup': 37.843213, 'ipsos': 38.07067899999999, 'morning_consult': 42.318749, 'rasmussen': 40.104692, 'you_gov': 38.636914000000004})

41.660705523772066