In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from river import compose
from river import datasets
from river import evaluate
from river import feature_extraction
from river import linear_model
from river import metrics
from river import optim
from river import preprocessing
from river import stats

In [3]:
dataset = datasets.Bikes()

In [4]:
model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))
model

In [5]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 4.912727, RMSE: 6.578071, R2: 0.389708 – 00:00:01 – 5.6 KB
[40,000] MAE: 5.333554, RMSE: 6.953025, R2: 0.368931 – 00:00:02 – 5.6 KB
[60,000] MAE: 5.330948, RMSE: 6.940436, R2: 0.364829 – 00:00:03 – 5.6 KB
[80,000] MAE: 5.392313, RMSE: 7.047605, R2: 0.356374 – 00:00:04 – 5.6 KB
[100,000] MAE: 5.423059, RMSE: 7.072087, R2: 0.352245 – 00:00:06 – 5.6 KB
[120,000] MAE: 5.541223, RMSE: 7.176361, R2: 0.344607 – 00:00:07 – 5.6 KB
[140,000] MAE: 5.613023, RMSE: 7.248915, R2: 0.336937 – 00:00:08 – 5.6 KB
[160,000] MAE: 5.622428, RMSE: 7.28009, R2: 0.336293 – 00:00:10 – 5.6 KB
[180,000] MAE: 5.567824, RMSE: 7.251344, R2: 0.341127 – 00:00:11 – 5.6 KB


MAE: 5.563893, RMSE: 7.247396, R2: 0.341716

In [6]:
def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))
model

In [7]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 3.721246, RMSE: 4.93913, R2: 0.655934 – 00:00:01 – 51.31 KB
[40,000] MAE: 3.829972, RMSE: 5.033206, R2: 0.669312 – 00:00:03 – 51.31 KB
[60,000] MAE: 3.845068, RMSE: 5.045273, R2: 0.66435 – 00:00:05 – 51.31 KB
[80,000] MAE: 3.910259, RMSE: 5.122218, R2: 0.66001 – 00:00:08 – 51.31 KB
[100,000] MAE: 3.888652, RMSE: 5.104473, R2: 0.662544 – 00:00:10 – 51.31 KB
[120,000] MAE: 3.923727, RMSE: 5.134803, R2: 0.664463 – 00:00:12 – 51.31 KB
[140,000] MAE: 3.980953, RMSE: 5.20237, R2: 0.658484 – 00:00:14 – 51.31 KB
[160,000] MAE: 3.950034, RMSE: 5.168122, R2: 0.665522 – 00:00:16 – 51.31 KB
[180,000] MAE: 3.934545, RMSE: 5.152266, R2: 0.66737 – 00:00:18 – 51.31 KB


MAE: 3.933498, RMSE: 5.149357, R2: 0.667681

In [8]:
def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour | (
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.9)
        ) +      
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.1)
        )
    )
    
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(
    optimizer=optim.SGD(0.001),
    intercept_lr=0.001,
    initializer=optim.initializers.Normal(mu=0, sigma=1, seed=42)
)
model

In [9]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 1.463061, RMSE: 2.598729, R2: 0.904751 – 00:00:02 – 81.64 KB
[40,000] MAE: 1.326403, RMSE: 2.467358, R2: 0.920532 – 00:00:04 – 81.64 KB
[60,000] MAE: 1.34619, RMSE: 2.519989, R2: 0.916264 – 00:00:07 – 81.64 KB
[80,000] MAE: 1.425589, RMSE: 2.667011, R2: 0.907828 – 00:00:09 – 81.64 KB
[100,000] MAE: 1.460641, RMSE: 2.723656, R2: 0.903923 – 00:00:12 – 81.64 KB
[120,000] MAE: 1.479141, RMSE: 2.746089, R2: 0.904033 – 00:00:14 – 81.64 KB
[140,000] MAE: 1.484774, RMSE: 2.749861, R2: 0.904582 – 00:00:17 – 81.64 KB
[160,000] MAE: 1.484678, RMSE: 2.748953, R2: 0.905368 – 00:00:19 – 81.64 KB
[180,000] MAE: 1.48759, RMSE: 2.757977, R2: 0.904688 – 00:00:22 – 81.64 KB


MAE: 1.48856, RMSE: 2.756881, R2: 0.904746

In [10]:
def get_hour_and_weekday(x):
    x['hour'] = x['moment'].hour
    x['weekday'] = x['moment'].weekday
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour_and_weekday | (
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.9)
        ) +      
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.1)
        ) +
        feature_extraction.TargetAgg(
            by=['station', 'weekday'], how=stats.EWMean(alpha=0.9)
        )
    )
    
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(
    optimizer=optim.SGD(0.001),
    intercept_lr=0.001,
    initializer=optim.initializers.Normal(mu=0, sigma=1, seed=42)
)
model

In [11]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 1.413898, RMSE: 2.563973, R2: 0.907281 – 00:00:03 – 13.19 MB
[40,000] MAE: 1.30215, RMSE: 2.449201, R2: 0.921697 – 00:00:06 – 26.31 MB
[60,000] MAE: 1.330034, RMSE: 2.508159, R2: 0.917048 – 00:00:10 – 40.68 MB
[80,000] MAE: 1.413474, RMSE: 2.658635, R2: 0.908406 – 00:00:13 – 52.54 MB
[100,000] MAE: 1.450949, RMSE: 2.717097, R2: 0.904385 – 00:00:18 – 69.4 MB
[120,000] MAE: 1.471064, RMSE: 2.740669, R2: 0.904411 – 00:00:22 – 81.26 MB
[140,000] MAE: 1.477851, RMSE: 2.745222, R2: 0.904904 – 00:00:27 – 93.12 MB
[160,000] MAE: 1.478621, RMSE: 2.744893, R2: 0.905647 – 00:00:31 – 104.98 MB
[180,000] MAE: 1.482206, RMSE: 2.754381, R2: 0.904937 – 00:00:36 – 126.84 MB


MAE: 1.483248, RMSE: 2.753332, R2: 0.904991