In [1]:
from datetime import datetime
import math
import pandas as pd

# pip install river should only be used one to install the package into the environment, and
# should not be loaded again and again -- however, would be useful while using Google Colab
# !pip install river

# import river submodules -- should be written in alphabetical orders
from river import compose
from river import evaluate
from river import feature_extraction
from river import linear_model
from river import metrics
from river import model_selection
from river import optim
from river import preprocessing
from river import stats
from river import stream
from river import time_series
from river import utils

In [2]:
dataset = pd.read_csv("finance.csv")
dataset.head()

Unnamed: 0,time,value
0,2022-07-22 10:28:35,0.025808
1,2022-07-22 10:33:35,0.616177
2,2022-07-22 10:38:36,0.616177
3,2022-07-22 10:43:37,1.100507
4,2022-07-22 10:48:36,-0.044577


In [3]:
params = {'converters': {'value': float},'parse_dates': {'time': "%Y-%m-%d %H:%M:%S"}}

# double check that the dataset has been parsed 
for x, y in stream.iter_csv('finance.csv', target = 'value', **params):
    print(x, y)
    break

{'time': datetime.datetime(2022, 7, 22, 10, 28, 35)} 0.025808099608183845


In [4]:
import math

def get_time(x):
    return {
        "hour": x["time"].hour,
        "minute": x["time"].minute,
    }

def get_hour_sin_cos(x):
    return {
        "sin_hour": math.sin(2 * math.pi * (x['time'].hour) / 24),
        "cos_hour": math.cos(2 * math.pi * (x['time'].hour) / 24)
    }

def get_minute_sin_cos(x):
    return {
        "sin_minute": math.sin(2 * math.pi * (x['time'].minute) / 60),
        "cos_minute": math.cos(2 * math.pi * (x['time'].minute) / 60)
    }

def get_time_progress(x):
    return {
        "progress": x['time'].toordinal() - datetime(2022, 1, 1, 0, 0).toordinal()
    }

In [5]:
from river import anomaly
from river import tree

import math


model = (
    compose.FuncTransformer(get_time_progress) +
    compose.FuncTransformer(get_hour_sin_cos) +
    compose.FuncTransformer(get_minute_sin_cos) + (
        compose.FuncTransformer(get_time) | (
            feature_extraction.TargetAgg(by='hour', how=stats.EWMean(alpha=0.9)) +
            feature_extraction.TargetAgg(by='minute', how=stats.EWMean(alpha=0.9)) +
            feature_extraction.TargetAgg(by='hour', how=stats.EWMean(alpha=0.1)) +
            feature_extraction.TargetAgg(by='minute', how=stats.EWMean(alpha=0.1)) +
            feature_extraction.TargetAgg(by='hour', how=stats.EWVar(alpha=0.9)) +
            feature_extraction.TargetAgg(by='minute', how=stats.EWVar(alpha=0.9))
        )
    )
) | compose.Discard('time', 'hour', 'minute') | anomaly.QuantileFilter(
    anomaly_detector=anomaly.GaussianScorer(grace_period=10),
    q=0.98,
    protect_anomaly_detector=True
) | preprocessing.StandardScaler()

model |= preprocessing.TargetStandardScaler( 
    model_selection.UCBRegressor(
        models=[
            linear_model.LinearRegression(
                optimizer=optim.SGD(lr=lr)
            ) for lr in [0.05, 0.02, 0.01, 0.005, 0.002, 0.0001]
        ] + [
            tree.HoeffdingTreeRegressor(grace_period=20),
            linear_model.PARegressor(C=0.012, eps=0.05),
        ],
        delta=0.01, burn_in=100, seed=1
    )
)
model

ZeroDivisionError: division by zero

Pipeline (
  TransformerUnion (
    FuncTransformer (
      func="get_time_progress"
    ),
    FuncTransformer (
      func="get_hour_sin_cos"
    ),
    FuncTransformer (
      func="get_minute_sin_cos"
    ),
    Pipeline (
      FuncTransformer (
        func="get_time"
      ),
      TransformerUnion (
        TargetAgg (
          by=['hour']
          how=EWMean (
            alpha=0.9
          )
          target_name="y"
        ),
        TargetAgg (
          by=['minute']
          how=EWMean (
            alpha=0.9
          )
          target_name="y"
        ),
        TargetAgg (
          by=['hour']
          how=EWMean (
            alpha=0.1
          )
          target_name="y"
        ),
        TargetAgg (
          by=['minute']
          how=EWMean (
            alpha=0.1
          )
          target_name="y"
        ),
        TargetAgg (
          by=['hour']
          how=EWVar (
            alpha=0.9
          )
          target_name="y"
        ),
        

In [6]:
params = {'converters': {'value': float},'parse_dates': {'time': "%Y-%m-%d %H:%M:%S"}}
dataset = stream.iter_csv('finance.csv', target = 'value', **params)
metric = metrics.MAE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=50)

[50] MAE: 0.958706, R2: -0.03256
[100] MAE: 0.776177, R2: 0.160527
[150] MAE: 0.720166, R2: 0.209708
[200] MAE: 0.622409, R2: 0.2875
[250] MAE: 0.560003, R2: 0.285056
[300] MAE: 0.65832, R2: 0.206238
[350] MAE: 0.693999, R2: 0.140606
[400] MAE: 0.698142, R2: 0.128034
[450] MAE: 0.658838, R2: 0.159326
[500] MAE: 0.596721, R2: 0.182515
[550] MAE: 0.548987, R2: 0.194396
[600] MAE: 0.510623, R2: 0.19393
[650] MAE: 0.474306, R2: 0.193959
[700] MAE: 0.444191, R2: 0.194207
[750] MAE: 0.416676, R2: 0.197501
[800] MAE: 0.394486, R2: 0.211287
[850] MAE: 0.386587, R2: 0.220198
[900] MAE: 0.378572, R2: 0.217334
[950] MAE: 0.36713, R2: 0.216711
[1,000] MAE: 0.383136, R2: 0.196628


MAE: 0.383136, R2: 0.196628

In [7]:
from river import metrics
import matplotlib.pyplot as plt


def evaluate_model(model): 

    metric = utils.Rolling(metrics.MAE() + metrics.R2(), 10)

    dates = []
    y_trues = []
    y_preds = []

    for x, y in stream.iter_csv('finance.csv', target = 'value', **params):

        # Obtain the prior prediction and update the model in one go
        y_pred = model.predict_one(x)
        model.learn_one(x, y)

        # Update the error metric
        metric.update(y, y_pred)

        # Store the true value and the prediction
        dates.append(x['time'])
        y_trues.append(y)
        y_preds.append(y_pred)

    # Plot the results
    fig, ax = plt.subplots(figsize=(20, 6))
    ax.grid(alpha=0.75)
    ax.plot(dates, y_trues, lw=3, color='#2ecc71', alpha=0.8, label='Ground truth')
    ax.plot(dates, y_preds, lw=3, color='#e74c3c', alpha=0.8, label='Prediction')
    ax.legend()
    ax.set_title(metric)
evaluate_model(model)

AttributeError: module 'river.utils' has no attribute 'Rolling'