## Install river and scikit-learn

In [41]:
pip install "./river/"

Processing ./river
Building wheels for collected packages: river
  Building wheel for river (setup.py) ... [?25ldone
[?25h  Created wheel for river: filename=river-1-cp39-cp39-macosx_10_9_x86_64.whl size=1195855 sha256=0c8a09779057bbafa33e90de91481d1278a41217290b4388a9d61cb884e6dca2
  Stored in directory: /private/var/folders/s0/cs0fw3px6tx5srp431pdvjc80000gn/T/pip-ephem-wheel-cache-zut_xzcu/wheels/d1/6b/a3/83d8a39007debc0733461c491a8263c5af566254c5860a1a1a
Successfully built river
Installing collected packages: river
  Attempting uninstall: river
    Found existing installation: river 1
    Uninstalling river-1:
      Successfully uninstalled river-1
Successfully installed river-1
You should consider upgrading via the '/Users/brianburns/ml/my_env/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install "./scikit-garden/"

Processing ./scikit-garden
Building wheels for collected packages: scikit-garden
  Building wheel for scikit-garden (setup.py) ... [?25ldone
[?25h  Created wheel for scikit-garden: filename=scikit_garden-0.1.3-cp39-cp39-macosx_10_9_x86_64.whl size=530087 sha256=20fe418083ddcd27e2fd796fb87a854e33f904287695b8362e70b74fd4741980
  Stored in directory: /Users/brianburns/Library/Caches/pip/wheels/cc/96/10/f1f98c7ebcbb916ea7b0a696a32ae8d73b3ed876c5d2911053
Successfully built scikit-garden
Installing collected packages: scikit-garden
  Attempting uninstall: scikit-garden
    Found existing installation: scikit-garden 0.1.3
    Uninstalling scikit-garden-0.1.3:
      Successfully uninstalled scikit-garden-0.1.3
Successfully installed scikit-garden-0.1.3
You should consider upgrading via the '/Users/brianburns/ml/my_env/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


## Imports

In [1]:
from river import stream
from river.ensemble import AdaptiveRandomForestRegressorCP, AdaptiveRandomForestRegressorQRF
from skgarden.mondrian import RiverMondrianForestRegressor
import datetime
import os
import pickle
from datetime import datetime
import time
import math
import numpy as np
from river import synth
import copy

## Experiment functions

In [2]:
def get_target_variable(file_path):
    """
    Our data files have different names for the target variable.
    This returns the string name of the target variable of a dataset in an arff file.
    """
    data_stream = stream.iter_arff(file_path)
    v = next(iter(data_stream))
    return list(v[0].keys())[-1]

In [3]:
def load_datastream(arff_file):
    target_name = get_target_variable(arff_file)
    datastream = stream.iter_arff(arff_file, target = target_name)
    return datastream

In [19]:
def run_experiment(datastream, dataset_name, models_with_names, metrics_with_names, alpha, parameter_info):
    """
    datastream is a river stream object, not the path of an arff file.
    """
    # Ensure iteration doesn't exhaust datastream
    datastream = list(datastream)
    
    all_results = dict()
    all_performances = dict()
        
    for model, model_name in models_with_names:
        results = []
        performances = dict()
        time_start = time.process_time()
        i = 0
        for x,y in datastream:
            i += 1
            interval = model.predict_interval(x, alpha)
            y_hat = model.predict_one(x)
            results.append([x,y,y_hat, interval, alpha])
            model.learn_one(x,y)
            if i % 1000 == 0:
                print(i)

        time_end = time.process_time()
        time_elapsed = time_end - time_start
        
        for metric, metric_name in metrics_with_names:
            performances[metric_name] = metric(results)
        performances["time"] = time_elapsed
        
        all_results[model_name] = results
        all_performances[model_name] = performances

    # Save results and performances
    experiment_id = str(datetime.now()).replace(":","-").replace(".","-").replace(" ", "-")
    #save_experiment(dataset_name, all_results, all_performances, alpha, experiment_id, parameter_info)

    return all_results, all_performances

In [5]:
def save_experiment(dataset_name, results, performances, alpha, experiment_id, parameter_info):
    for model_name in results.keys():
        filepath = "./results/" + dataset_name + "/" + model_name + "/" + experiment_id + "/"
        # Create directory for experiment, if it doesn't already exist
        os.makedirs(filepath, exist_ok=True)
        # save the result
        result = results[model_name]
        with open(filepath + "results.pckl", "wb") as pickle_file:
            pickle.dump(results, pickle_file)
        # save the metrics
        perf = performances[model_name]
        with open(filepath + "metrics.pckl", "wb") as pickle_file:
            pickle.dump(perf, pickle_file)
        with open(filepath + "parameters.txt","w") as params_file:
            params_file.writelines(parameter_info)

## Metrics

In [6]:
# results is a list with elements of the form [x, y, y_hat, interval, alpha]
def remove_inf_results(results):
    filtered_results = filter(lambda x: x[3][0] != -math.inf and x[3][1] != math.inf, results)
    return list(filtered_results)
    
def mean_error_rate(results):
    filtered_results = remove_inf_results(results)
    # sum instances where y isn't in confidence interval
    s = sum([x[1] < x[3][0] or x[1]> x[3][1] for x in filtered_results])
    n = len(filtered_results)
    return s/n
    

def relative_interval_size(results):
    filtered_results = remove_inf_results(results)
    y_vals = [item[1] for item in filtered_results]
    rho = max(y_vals) - min(y_vals)
    # sum length of intervals
    s = sum(x[3][1] - x[3][0] for x in filtered_results)
    n = len(filtered_results)
    return s/(rho*n)

    
def quantile_loss(results):
    filtered_results = remove_inf_results(results)
    alpha = results[0][4]
    a = alpha*relative_interval_size(results)
    def single_interval_loss(y, interval):
        return max(min(interval)-y, y- max(interval), 0)
    s = sum([single_interval_loss(x[1], x[3]) for x in filtered_results])
    y_vals = [item[1] for item in filtered_results]
    rho = max(y_vals) - min(y_vals)
    n = len(filtered_results)
    return a + (s/(n*rho))
         

def utility(results):
    filtered_results = remove_inf_results(results)
    alpha = results[0][4]
    gamma = 2*np.log(2)/alpha
    mer = mean_error_rate(results)
    ris = relative_interval_size(results)
    if mer <= alpha:
        return 1-ris
    else:
        return (1-ris)*np.exp(-gamma*(mer - alpha))

## Small-dataset experiments

In [7]:
experiment_parameters = """Experiment parameters

All river models
    - n_models = 10
    - max_features = "sqrt"
    - aggregation_method = "mean"
    - lambda_value = 1

OnlineCP
    - c_max = 1000
    - update_threshold = 50 (in case of CPExact)

OnlineQRF
    - K = 200

Mondrian forests
    - n_estimators = 10
    - min_samples_split = 2
"""

In [7]:
def instantiate_models():
    models_with_names = [
#         # Mondrian Forest 
#         (RiverMondrianForestRegressor(n_estimators = 10, min_samples_split =2),
#          "MondrianForest"),
        # CP Exact
#         (AdaptiveRandomForestRegressorCP(n_models = 10, max_features = "sqrt", 
#             aggregation_method = "mean", lambda_value = 1, cp_exact = True, c_max = 1000,
#             update_threshold = 50),
#           "CPExact")
        # OnlineQRF
        (AdaptiveRandomForestRegressorQRF(n_models = 10, max_features = "sqrt", 
            aggregation_method = "mean", lambda_value = 1, k_sketch = 200),
         "OnlineQRF"),
         
        # CP Approx
        (AdaptiveRandomForestRegressorCP(n_models = 10, max_features = "sqrt", 
            aggregation_method = "mean", lambda_value = 1, cp_exact = False, c_max = 1000),
         "CPApprox"),
    ]
    return models_with_names

In [9]:
metrics_with_names = [
        (mean_error_rate, "MER"),
        (relative_interval_size, "RIS"),
        (quantile_loss, "QL"),
        (utility, "Utility")
    ]

In [10]:
ds = list(load_datastream("data/stationary/2dplanes.arff"))
dataset_name = "2dplanes"
alpha = 0.1
models_with_names = instantiate_models()

In [198]:
res = run_experiment(ds[0:5000], dataset_name, models_with_names, metrics_with_names, alpha, experiment_parameters)

In [9]:
def run_small_experiments(n_repeats):
    datasets = os.listdir("data/stationary/")
    datasets.remove(".DS_Store")
    for i in range(n_repeats):
        for dataset in datasets:
            print(dataset)
            models_with_names = instantiate_models()
            alpha = 0.1
            datastream = list(load_datastream("data/stationary/" + dataset))
            dataset_name = dataset.replace(".arff","")
            run_experiment(datastream, dataset_name, models_with_names, metrics_with_names, alpha, experiment_parameters)

In [17]:
run_small_experiments(9)

yprop_4_1.arff
newsPopularity.arff
energy.arff
kin8nm.arff
elevators.arff
cpu_act.arff
house_8L.arff
puma8NH.arff
fried.arff
2dplanes.arff
calHousing.arff
house_16H.arff
ailerons.arff
sulfur.arff
yprop_4_1.arff
newsPopularity.arff
energy.arff
kin8nm.arff
elevators.arff
cpu_act.arff
house_8L.arff
puma8NH.arff
fried.arff
2dplanes.arff
calHousing.arff
house_16H.arff
ailerons.arff
sulfur.arff
yprop_4_1.arff
newsPopularity.arff
energy.arff
kin8nm.arff
elevators.arff
cpu_act.arff
house_8L.arff
puma8NH.arff
fried.arff
2dplanes.arff
calHousing.arff
house_16H.arff
ailerons.arff
sulfur.arff
yprop_4_1.arff
newsPopularity.arff
energy.arff
kin8nm.arff
elevators.arff
cpu_act.arff
house_8L.arff
puma8NH.arff
fried.arff
2dplanes.arff
calHousing.arff
house_16H.arff
ailerons.arff
sulfur.arff
yprop_4_1.arff
newsPopularity.arff
energy.arff
kin8nm.arff
elevators.arff
cpu_act.arff
house_8L.arff
puma8NH.arff
fried.arff
2dplanes.arff
calHousing.arff
house_16H.arff
ailerons.arff
sulfur.arff
yprop_4_1.arff
newsP

## Concept-drift experiments

In [11]:
# Experiment with Friedman data

In [12]:
# Functions which introduce concept-drift to an underlying Friedman datastream of 10 features

In [10]:
def cd_permute(datapoint:dict):
    d = copy.deepcopy(datapoint)
    d[0] = datapoint[3]
    d[1] = datapoint[4]
    d[2] = datapoint[1]
    d[3] = datapoint[0]
    d[4] = datapoint[2]
    return d

In [11]:
def cd_dataset(dataset):
    """
    Permute the positions of the independent variables starting 25% through the dataset.
    Revert to original permutation of independent variables starting 75% through the dataset.
    """
    n = len(dataset)
    n_start = int(0.25*n)
    n_end = int(0.75*n)
    dataset_new = copy.deepcopy(dataset)
    for i in range(n_start, n_end):
        dataset_new[i] = (cd_permute(dataset_new[i][0]), dataset_new[i][1])
    return dataset_new

In [12]:
# Load the Friedman data

In [13]:
datastream = synth.Friedman(seed=28)
datastream = list(datastream.take(1000000))
cd_datastream = cd_dataset(datastream)

In [17]:
# On large concept-drift data, we evaluate metrics on tumbling windows of size 10000.
# We modify our metrics to be evaluated on arithmetically-increasing subsets of our data.

In [10]:
# Given a metric, returns function which computes that metric on arithmetically-increasing
# subsets of the data, with tumbling window = window_size.
def tumbling_metric(metric, window_size):
    def t_metric(results):
        metric_results = []
        for i in range(window_size, len(results), window_size):
            perf = metric(results[i - window_size:i])
            metric_results.append([i, perf])
        return metric_results
    return t_metric

In [19]:
dataset_name = "cd_friedman1_global_abrupt"
models_with_names = instantiate_models()
metrics_with_names = [
        (tumbling_metric(mean_error_rate, 10000), "MER"),
        (tumbling_metric(relative_interval_size, 10000), "RIS"),
        (tumbling_metric(quantile_loss, 10000), "QL"),
        (tumbling_metric(utility, 10000), "Utility")
    ]
alpha = 0.1
parameter_info = """Experiment parameters

All river models
    - n_models = 10
    - max_features = "sqrt"
    - aggregation_method = "mean"
    - lambda_value = 1

OnlineCP
    - c_max = 1000
    - update_threshold = 50 (in case of CPExact)

OnlineQRF
    - K = 200

Mondrian forests
    - n_estimators = 10
    - min_samples_split = 2
"""

In [20]:
s = run_experiment(cd_datastream, dataset_name, models_with_names, metrics_with_names, alpha, parameter_info)

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
480000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
700000
710000
720000
730000
740000
750000
760000
770000
780000
790000
800000
810000
820000
830000
840000
850000
860000
870000
880000
890000
900000
910000
920000
930000
940000
950000
960000
970000
980000
990000
1000000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
46

In [21]:
s[1]

{'OnlineQRF': {'MER': [[10000, 0.0386038603860386],
   [20000, 0.0251],
   [30000, 0.0269],
   [40000, 0.0259],
   [50000, 0.023],
   [60000, 0.0237],
   [70000, 0.0227],
   [80000, 0.023],
   [90000, 0.0224],
   [100000, 0.0235],
   [110000, 0.0209],
   [120000, 0.0223],
   [130000, 0.0203],
   [140000, 0.0224],
   [150000, 0.0215],
   [160000, 0.0205],
   [170000, 0.021],
   [180000, 0.0201],
   [190000, 0.0191],
   [200000, 0.021],
   [210000, 0.0197],
   [220000, 0.0204],
   [230000, 0.0209],
   [240000, 0.0198],
   [250000, 0.0199],
   [260000, 0.1986],
   [270000, 0.125],
   [280000, 0.0905],
   [290000, 0.0682],
   [300000, 0.0512],
   [310000, 0.0415],
   [320000, 0.0407],
   [330000, 0.038],
   [340000, 0.0376],
   [350000, 0.0306],
   [360000, 0.0252],
   [370000, 0.0269],
   [380000, 0.0276],
   [390000, 0.0265],
   [400000, 0.0241],
   [410000, 0.0241],
   [420000, 0.0225],
   [430000, 0.0229],
   [440000, 0.0189],
   [450000, 0.0191],
   [460000, 0.0203],
   [470000, 0.019

In [110]:
cd_datastream[300000]

({0: 0.016410949070087333,
  1: 0.2782558469335449,
  2: 0.8185684229585304,
  3: 0.7683679899654309,
  4: 0.2148203408429732,
  5: 0.984567363665633,
  6: 0.5287647187545143,
  7: 0.7676221057186963,
  8: 0.4693229444473781,
  9: 0.28530809435445836},
 13.320901360071973)

In [111]:
datastream[300000]

({0: 0.7683679899654309,
  1: 0.8185684229585304,
  2: 0.2148203408429732,
  3: 0.016410949070087333,
  4: 0.2782558469335449,
  5: 0.984567363665633,
  6: 0.5287647187545143,
  7: 0.7676221057186963,
  8: 0.4693229444473781,
  9: 0.28530809435445836},
 13.320901360071973)

# Airlines dataset (700k)

In [8]:
airlines_ds = list(load_datastream("data/airlines/plane_700K_train.arff"))

In [11]:
dataset_name = "cd_airlines_700k"
models_with_names = instantiate_models()
metrics_with_names = [
        (tumbling_metric(mean_error_rate, 10000), "MER"),
        (tumbling_metric(relative_interval_size, 10000), "RIS"),
        (tumbling_metric(quantile_loss, 10000), "QL"),
        (tumbling_metric(utility, 10000), "Utility")
    ]
alpha = 0.1
parameter_info = """Experiment parameters

All river models
    - n_models = 10
    - max_features = "sqrt"
    - aggregation_method = "mean"
    - lambda_value = 1

OnlineCP
    - c_max = 1000
    - update_threshold = 50 (in case of CPExact)

OnlineQRF
    - K = 200

Mondrian forests
    - n_estimators = 10
    - min_samples_split = 2
"""

In [12]:
s = run_experiment(airlines_ds, dataset_name, models_with_names, metrics_with_names, alpha, parameter_info)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000
15

503000
504000
505000
506000
507000
508000
509000
510000
511000
512000
513000
514000
515000
516000
517000
518000
519000
520000
521000
522000
523000
524000
525000
526000
527000
528000
529000
530000
531000
532000
533000
534000
535000
536000
537000
538000
539000
540000
541000
542000
543000
544000
545000
546000
547000
548000
549000
550000
551000
552000
553000
554000
555000
556000
557000
558000
559000
560000
561000
562000
563000
564000
565000
566000
567000
568000
569000
570000
571000
572000
573000
574000
575000
576000
577000
578000
579000
580000
581000
582000
583000
584000
585000
586000
587000
588000
589000
590000
591000
592000
593000
594000
595000
596000
597000
598000
599000
600000
601000
602000
603000
604000
605000
606000
607000
608000
609000
610000
611000
612000
613000
614000
615000
616000
617000
618000
619000
620000
621000
622000
623000
624000
625000
626000
627000
628000
629000
630000
631000
632000
633000
634000
635000
636000
637000
638000
639000
640000
641000
642000
643000
644000
645000

In [None]:
6-7k in one minute, saving x.

## Figuring out what's going on with the data.

In [13]:
# Let's try wrapping these things in a standardscaler and see if it works at all better.

In [103]:
from river import compose
from river import linear_model
from river import preprocessing
from river import feature_extraction

model = compose.Pipeline(
    preprocessing.StandardScaler(),
    AdaptiveRandomForestRegressorCP(n_models = 10, max_features = "sqrt", 
            aggregation_method = "mean", lambda_value = 1, cp_exact = False, c_max = 1000)
)

In [104]:
# Add a pipeline predict_interval function

In [102]:
def pipeline_predict_interval(self, x, alpha):
    x_transformed, regressor = self._transform_one(x)
    return regressor.predict_interval(x_transformed, alpha)
compose.Pipeline.predict_interval = pipeline_predict_interval

In [105]:
ds = list(load_datastream("data/stationary/house_8L.arff"))
dataset_name = "houses"
alpha = 0.1
models_with_names = [(model, "pipeline_model")]
parameter_info = """Experiment parameters

All river models
    - n_models = 10
    - max_features = "sqrt"
    - aggregation_method = "mean"
    - lambda_value = 1

OnlineCP
    - c_max = 1000
    - update_threshold = 50 (in case of CPExact)

OnlineQRF
    - K = 200

Mondrian forests
    - n_estimators = 10
    - min_samples_split = 2
"""

In [83]:
res = run_experiment(ds[0:5000], dataset_name, models_with_names, metrics_with_names, alpha, parameter_info)

1000
2000
3000
4000
5000


In [84]:
res[0]

{'pipeline_model': [[{'P3': 7074.0,
    'P6p4': 0.0049639,
    'P11p3': 0.5074781,
    'P16p2': 0.5797286,
    'P19p2': 0.0366129,
    'H5p2': 0.0202439,
    'H15p1': 6.6187845,
    'H40p4': 0.7740586},
   130600.0,
   0.0,
   [-3, 3],
   0.1],
  [{'P3': 597.0,
    'P6p4': 0.003871,
    'P11p3': 0.48,
    'P16p2': 0.6951424,
    'P19p2': 0.0033501,
    'H5p2': 0.1707317,
    'H15p1': 7.1639344,
    'H40p4': 0.1428571},
   40500.0,
   2089.6,
   [-3, 3],
   0.1],
  [{'P3': 1931.0,
    'P6p4': 0.0023202,
    'P11p3': 0.4777473,
    'P16p2': 0.6835836,
    'P19p2': 0.0,
    'H5p2': 0.1176471,
    'H15p1': 6.1858476,
    'H40p4': 0.6875},
   28700.0,
   3638.592568532216,
   [-3, 3],
   0.1],
  [{'P3': 164.0,
    'P6p4': 0.0,
    'P11p3': 0.4925054,
    'P16p2': 0.7804878,
    'P19p2': 0.0,
    'H5p2': 0.1,
    'H15p1': 6.6198347,
    'H40p4': 1.0},
   28500.0,
   6712.89054377055,
   [-3, 3],
   0.1],
  [{'P3': 119.0,
    'P6p4': 0.0,
    'P11p3': 0.4806452,
    'P16p2': 0.7563025,
    'P

In [94]:
x_new = {'P3': 358.0,
    'P6p4': 0.0022247,
    'P11p3': 0.5038932,
    'P16p2': 0.7318436,
    'P19p2': 0.0,
    'H5p2': 0.1,
    'H15p1': 6.0774411,
    'H40p4': 1.0}
x, step = model._transform_one(x_new)
step.predict_interval(x, 0.9)

[30023.69974664437, 35087.562557606696]

In [95]:
model.predict_one(x_new)

32558.08305709588

In [100]:
model.predict_interval(x_new,0.1)

[-903926.8349685123, 969062.5412616465]

In [57]:
model["AdaptiveRandomForestRegressorCP"].predict_interval({'P3': -0.1361774465287634,
 'P6p4': -0.08098319201014761,
 'P11p3': -0.1796330695671541,
 'P16p2': 0.7085601024081527,
 'P19p2': -0.412830682880137,
 'H5p2': -1.2214024068998974,
 'H15p1': -0.1652797239817301,
 'H40p4': -1.4418811707937178},0.1)

[-31092.352995042398, 605635.8562030275]

In [52]:
model.steps

OrderedDict([('StandardScaler',
              StandardScaler (
                with_std=True
              )),
             ('AdaptiveRandomForestRegressorCP',
              [ForestMemberRegressor (
                index_original=0
                model=BaseTreeRegressor (
                  max_features=3
                  grace_period=50
                  max_depth=inf
                  split_confidence=0.01
                  tie_threshold=0.05
                  leaf_prediction="model"
                  leaf_model=LinearRegression (
                    optimizer=SGD (
                      lr=Constant (
                        learning_rate=0.01
                      )
                    )
                    loss=Squared ()
                    l2=0.
                    l1=0.
                    intercept_init=0.
                    intercept_lr=Constant (
                      learning_rate=0.01
                    )
                    clip_gradient=1e+12
                    initia

In [85]:
def pipeline_predict_interval(self, x, alpha):
    x_transformed, regressor = self._transform_one(x)
    return regressor.predict_interval(x_transformed, alpha)
compose.Pipeline.predict_interval = pipeline_predict_interval

In [86]:
compose.Pipeline.predict_interval = pipeline_predict_interval

In [93]:
model.predict_interval({'P3': 145.0,
    'P6p4': 0.0069767,
    'P11p3': 0.4744186,
    'P16p2': 0.7793103,
    'P19p2': 0.0,
    'H5p2': 0.0,
    'H15p1': 5.8571429,
    'H40p4': 0.0}, 0.9)

[-193519.7781227051, -188455.91531174278]

In [68]:
type(model)

river.compose.pipeline.Pipeline