In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from river import compose
from river import datasets
from river import evaluate
from river import feature_extraction
from river import linear_model
from river import metrics
from river import optim
from river import preprocessing
from river import stats

In [3]:
dataset = datasets.Bikes()

In [4]:
model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))
model

In [5]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 4.912727, RMSE: 6.578071, R2: 0.389708 – 00:00:02 – 5.6 KB
[40,000] MAE: 5.333554, RMSE: 6.953025, R2: 0.368931 – 00:00:05 – 5.6 KB
[60,000] MAE: 5.330948, RMSE: 6.940436, R2: 0.364829 – 00:00:07 – 5.6 KB
[80,000] MAE: 5.392313, RMSE: 7.047605, R2: 0.356374 – 00:00:09 – 5.6 KB
[100,000] MAE: 5.423059, RMSE: 7.072087, R2: 0.352245 – 00:00:12 – 5.6 KB
[120,000] MAE: 5.541223, RMSE: 7.176361, R2: 0.344607 – 00:00:14 – 5.6 KB
[140,000] MAE: 5.613023, RMSE: 7.248915, R2: 0.336937 – 00:00:17 – 5.6 KB
[160,000] MAE: 5.622428, RMSE: 7.28009, R2: 0.336293 – 00:00:19 – 5.6 KB
[180,000] MAE: 5.567824, RMSE: 7.251344, R2: 0.341127 – 00:00:22 – 5.6 KB


MAE: 5.563893, RMSE: 7.247396, R2: 0.341716

In [6]:
def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))
model

In [7]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 3.721246, RMSE: 4.93913, R2: 0.655934 – 00:00:03 – 51.31 KB
[40,000] MAE: 3.829972, RMSE: 5.033206, R2: 0.669312 – 00:00:06 – 51.31 KB
[60,000] MAE: 3.845068, RMSE: 5.045273, R2: 0.66435 – 00:00:10 – 51.31 KB
[80,000] MAE: 3.910259, RMSE: 5.122218, R2: 0.66001 – 00:00:13 – 51.31 KB
[100,000] MAE: 3.888652, RMSE: 5.104473, R2: 0.662544 – 00:00:17 – 51.31 KB
[120,000] MAE: 3.923727, RMSE: 5.134803, R2: 0.664463 – 00:00:20 – 51.31 KB
[140,000] MAE: 3.980953, RMSE: 5.20237, R2: 0.658484 – 00:00:24 – 51.31 KB
[160,000] MAE: 3.950034, RMSE: 5.168122, R2: 0.665522 – 00:00:27 – 51.31 KB
[180,000] MAE: 3.934545, RMSE: 5.152266, R2: 0.66737 – 00:00:31 – 51.31 KB


MAE: 3.933498, RMSE: 5.149357, R2: 0.667681

In [8]:
def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour | (
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.9)
        ) +      
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.1)
        )
    )
    
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(
    optimizer=optim.SGD(0.001),
    intercept_lr=0.001,
    initializer=optim.initializers.Normal(mu=0, sigma=1, seed=42)
)
model

In [9]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 1.457733, RMSE: 2.589659, R2: 0.905414 – 00:00:04 – 81.64 KB
[40,000] MAE: 1.323716, RMSE: 2.462595, R2: 0.920838 – 00:00:08 – 81.64 KB
[60,000] MAE: 1.344398, RMSE: 2.516882, R2: 0.91647 – 00:00:13 – 81.64 KB
[80,000] MAE: 1.424245, RMSE: 2.66481, R2: 0.90798 – 00:00:17 – 81.64 KB
[100,000] MAE: 1.459566, RMSE: 2.721932, R2: 0.904044 – 00:00:21 – 81.64 KB
[120,000] MAE: 1.478245, RMSE: 2.744664, R2: 0.904132 – 00:00:25 – 81.64 KB
[140,000] MAE: 1.484006, RMSE: 2.748641, R2: 0.904667 – 00:00:29 – 81.64 KB
[160,000] MAE: 1.484007, RMSE: 2.747885, R2: 0.905442 – 00:00:33 – 81.64 KB
[180,000] MAE: 1.486993, RMSE: 2.757031, R2: 0.904754 – 00:00:38 – 81.64 KB


MAE: 1.487971, RMSE: 2.755947, R2: 0.90481

In [10]:
def get_hour_and_weekday(x):
    x['hour'] = x['moment'].hour
    x['weekday'] = x['moment'].weekday
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour_and_weekday | (
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.9)
        ) +      
        feature_extraction.TargetAgg(
            by=['station', 'hour'], how=stats.EWMean(alpha=0.1)
        ) +
        feature_extraction.TargetAgg(
            by=['station', 'weekday'], how=stats.EWMean(alpha=0.9)
        )
    )
    
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(
    optimizer=optim.SGD(0.001),
    intercept_lr=0.001,
    initializer=optim.initializers.Normal(mu=0, sigma=1, seed=42)
)
model

In [11]:
metric = metrics.MAE() + metrics.RMSE() + metrics.R2()
evaluate.progressive_val_score(dataset, model, metric, print_every=20_000, show_time=True, show_memory=True)

[20,000] MAE: 1.421956, RMSE: 2.566813, R2: 0.907076 – 00:00:05 – 13.19 MB
[40,000] MAE: 1.305277, RMSE: 2.450389, R2: 0.921621 – 00:00:10 – 26.31 MB
[60,000] MAE: 1.332087, RMSE: 2.508918, R2: 0.916998 – 00:00:16 – 40.68 MB
[80,000] MAE: 1.415009, RMSE: 2.659169, R2: 0.908369 – 00:00:23 – 52.54 MB
[100,000] MAE: 1.452175, RMSE: 2.717514, R2: 0.904356 – 00:00:29 – 69.4 MB
[120,000] MAE: 1.472085, RMSE: 2.741014, R2: 0.904387 – 00:00:37 – 81.26 MB
[140,000] MAE: 1.478726, RMSE: 2.745516, R2: 0.904883 – 00:00:44 – 93.12 MB
[160,000] MAE: 1.479387, RMSE: 2.745151, R2: 0.90563 – 00:00:53 – 104.98 MB
[180,000] MAE: 1.482886, RMSE: 2.754609, R2: 0.904921 – 00:01:01 – 126.84 MB


MAE: 1.48392, RMSE: 2.753557, R2: 0.904975