In [1]:
!pip install river

Collecting river
  Downloading river-0.11.1-cp38-cp38-win_amd64.whl (1.2 MB)
Collecting numpy>=1.22
  Downloading numpy-1.23.1-cp38-cp38-win_amd64.whl (14.7 MB)
Collecting pandas>=1.3
  Downloading pandas-1.4.3-cp38-cp38-win_amd64.whl (10.6 MB)
Installing collected packages: numpy, pandas, river
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.2
    Uninstalling numpy-1.19.2:
      Successfully uninstalled numpy-1.19.2
  Attempting uninstall: pandas
    Found existing installation: pandas 1.1.3
    Uninstalling pandas-1.1.3:
      Successfully uninstalled pandas-1.1.3
Successfully installed numpy-1.23.1 pandas-1.4.3 river-0.11.1


In [2]:
%load_ext autoreload
%autoreload 2


In [4]:
from river import compose
from river import linear_model
from river import metrics
from river import evaluate
from river import preprocessing
from river import optim


model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)


[20,000] MAE: 4.912727
[40,000] MAE: 5.333554
[60,000] MAE: 5.330948
[80,000] MAE: 5.392313
[100,000] MAE: 5.423059
[120,000] MAE: 5.541223
[140,000] MAE: 5.613023
[160,000] MAE: 5.622428
[180,000] MAE: 5.567824


MAE: 5.563893

What I learnt and concepts I don't understand:
- compose.Select: The parameters are put into a set and the columns of the features included in the set will be extracted
- preprocessing.StandardScaler: Each object in the class will take 4 parameters (A boolean to check if it is neccessary to get std equal 1, a counter to keep track of the number of datapoints learnt, a mean and a variance). Then learn_one will update the mean and the variance (if the boolean is True) while learn_many will update the mean and variance of each feature using itertool_zip
- linear reg: There are severals things I don't understand
    + Intercept_init and the _super() part in the class?
    + self.loss.mean_func(self._raw_dot_one(x)). Parameters that the loss function takes? Mean_func?
    + No learn_one function?
    
- MAE : Mean Absolute Error
- Progressive_val_score:
    + Metric in accordance with model?
    + utils.inspect?
    + next_checkpoint?
    
- Simualte_qa: Show datapoints in order of arrival




In [6]:
from river import feature_extraction
from river import stats

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)


[20,000] MAE: 3.721246
[40,000] MAE: 3.829972
[60,000] MAE: 3.845068
[80,000] MAE: 3.910259
[100,000] MAE: 3.888652
[120,000] MAE: 3.923727
[140,000] MAE: 3.980953
[160,000] MAE: 3.950034
[180,000] MAE: 3.934545


MAE: 3.933498

In [7]:
from pprint import pprint
from river import datasets

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    return x

model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)

model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

<class 'river.datasets.bikes.Bikes'>
['scattered clouds', 'light rain', 'thunderstorm with heavy rain', 'few clouds', 'thunderstorm with light rain', 'light intensity drizzle rain', 'mist', 'drizzle', 'Sky is Clear', 'moderate rain', 'heavy intensity rain', 'thunderstorm', 'light intensity drizzle', 'overcast clouds', 'thunderstorm with rain', 'clear sky', 'very heavy rain', 'broken clouds']
[20,000] MAE: 3.723647
[40,000] MAE: 3.831113
[60,000] MAE: 3.847153
[80,000] MAE: 3.911021
[100,000] MAE: 3.888753
[120,000] MAE: 3.924125
[140,000] MAE: 3.981258
[160,000] MAE: 3.949619
[180,000] MAE: 3.93391


MAE: 3.932767

In [8]:
print(model.debug_one(x))

0. Input
--------
clouds: 88 (int)
description: overcast clouds (str)
humidity: 84 (int)
moment: 2016-10-05 09:57:18 (datetime)
pressure: 1,017.34000 (float)
station: pomme (str)
temperature: 17.45000 (float)
wind: 1.95000 (float)

1. Transformer union
--------------------
    1.0 add_feature | ~['description', 'moment', 'station']
    -------------------------------------------------------
    clouds: 88 (int)
    humidity: 84 (int)
    pressure: 1,017.34000 (float)
    rain: 2 (int)
    temperature: 17.45000 (float)
    wind: 1.95000 (float)

    1.1 get_hour | y_mean_by_station_and_hour
    -----------------------------------------
    y_mean_by_station_and_hour: 7.89396 (float)

clouds: 88 (int)
hour: 9 (int)
humidity: 84 (int)
pressure: 1,017.34000 (float)
rain: 2 (int)
temperature: 17.45000 (float)
wind: 1.95000 (float)
y_mean_by_station_and_hour: 7.89396 (float)

2. StandardScaler
-----------------
clouds: 1.54778 (float)
hour: 0.00000 (float)
humidity: 1.16366 (float)
pressure:

We want to make use of the weather feature so a naive method to convert classified values into numbers is to do seperate the classes into 2 and assign 1 to a group and 0 to the other. Here, there are 18 'description' classes and I classified them by rain and not rain.
By adding the feature weather, we see that the error decrease a little.

In [52]:
from pprint import pprint
from river import datasets

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    get_station(x)
    return x

model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)

model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

<class 'river.datasets.bikes.Bikes'>
['light intensity drizzle rain', 'very heavy rain', 'clear sky', 'Sky is Clear', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'scattered clouds', 'thunderstorm', 'few clouds', 'thunderstorm with rain', 'light rain', 'broken clouds']
[20,000] MAE: 2.828958
[40,000] MAE: 2.920016
[60,000] MAE: 3.001646
[80,000] MAE: 3.097058
[100,000] MAE: 3.12772
[120,000] MAE: 3.126179
[140,000] MAE: 3.127185
[160,000] MAE: 3.126707
[180,000] MAE: 3.138914


MAE: 3.139136

Since there are only 5 stations and they share no common features, I set 5 new features and assign 1 if the location it is in x
and the other 4 get 0.
By adding the location feature, we see that the error get much lower.

In [7]:
import itertools

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression()

for x, y in itertools.islice(dataset, 10000):
    y_pred = model.predict_one(x)
    model.learn_one(x, y)

x, y = next(iter(dataset))
print(model.debug_one(x))


0. Input
--------
clouds: 75 (int)
description: light rain (str)
humidity: 81 (int)
moment: 2016-04-01 00:00:07 (datetime)
pressure: 1,017.00000 (float)
station: metro-canal-du-midi (str)
temperature: 6.54000 (float)
wind: 9.30000 (float)

1. Transformer union
--------------------
    1.0 Select
    ----------
    clouds: 75 (int)
    humidity: 81 (int)
    pressure: 1,017.00000 (float)
    temperature: 6.54000 (float)
    wind: 9.30000 (float)

    1.1 get_hour | y_mean_by_station_and_hour
    -----------------------------------------
    y_mean_by_station_and_hour: 4.43243 (float)

clouds: 75 (int)
humidity: 81 (int)
pressure: 1,017.00000 (float)
temperature: 6.54000 (float)
wind: 9.30000 (float)
y_mean_by_station_and_hour: 4.43243 (float)

2. StandardScaler
-----------------
clouds: 0.47566 (float)
humidity: 0.42247 (float)
pressure: 1.05314 (float)
temperature: -1.22098 (float)
wind: 2.21104 (float)
y_mean_by_station_and_hour: -0.59098 (float)

3. LinearRegression
-----------------

In [8]:
import datetime as dt

evaluate.progressive_val_score(
    dataset=dataset,
    model=model.clone(),
    metric=metrics.MAE(),
    moment='moment',
    delay=dt.timedelta(minutes=30),
    print_every=20_000
)


[20,000] MAE: 4.203433
[40,000] MAE: 4.195404
[60,000] MAE: 4.130316
[80,000] MAE: 4.122796
[100,000] MAE: 4.069826
[120,000] MAE: 4.066034
[140,000] MAE: 4.088604
[160,000] MAE: 4.059282
[180,000] MAE: 4.026821


MAE: 4.024939

R2 score:
- Coefficient of determination: Basic idea is to see the ratio between square error of the line and square error of the labels. The smaller the square error of the line is, the better the model.
- Update by first updating the variance, then the square error of the line
    + sample_weight?
    




In [17]:
model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric1 = metrics.R2()
metric2 = metrics.RMSE()


evaluate.progressive_val_score(dataset, model, metric1, print_every=20_000)
evaluate.progressive_val_score(dataset, model, metric2, print_every=20_000)

[20,000] R2: 0.389708
[40,000] R2: 0.368931
[60,000] R2: 0.364829
[80,000] R2: 0.356374
[100,000] R2: 0.352245
[120,000] R2: 0.344607
[140,000] R2: 0.336937
[160,000] R2: 0.336293
[180,000] R2: 0.341127
[20,000] RMSE: 6.691911
[40,000] RMSE: 7.026693
[60,000] RMSE: 6.993845
[80,000] RMSE: 7.090635
[100,000] RMSE: 7.108276
[120,000] RMSE: 7.206799
[140,000] RMSE: 7.275251
[160,000] RMSE: 7.303001
[180,000] RMSE: 7.271946


RMSE: 7.267737

With out the existence of time feature, the metric score is quite low (far from 1).

In [10]:
model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MSE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

[20,000] MSE: 43.271017
[40,000] MSE: 48.344556
[60,000] MSE: 48.169653
[80,000] MSE: 49.668739
[100,000] MSE: 50.014417
[120,000] MSE: 51.500157
[140,000] MSE: 52.546765
[160,000] MSE: 52.999709
[180,000] MSE: 52.581995


MSE: 52.524742

The RMSE increases as we add datapoints which means we should add features or extract polynomial degree features. 

In [16]:
from river import feature_extraction
model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= feature_extraction.PolynomialExtender(interaction_only=True)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric1 = metrics.MAE()
metric2 = metrics.RMSE()

evaluate.progressive_val_score(dataset, model, metric1, print_every=20_000)
evaluate.progressive_val_score(dataset, model, metric2, print_every=20_000)

[20,000] MAE: 4.933912
[40,000] MAE: 5.341118
[60,000] MAE: 5.325019
[80,000] MAE: 5.374263
[100,000] MAE: 5.394721
[120,000] MAE: 5.509585
[140,000] MAE: 5.58477
[160,000] MAE: 5.588667
[180,000] MAE: 5.530857
[20,000] RMSE: 6.667285
[40,000] RMSE: 7.009185
[60,000] RMSE: 6.980304
[80,000] RMSE: 7.068839
[100,000] RMSE: 7.081574
[120,000] RMSE: 7.181142
[140,000] RMSE: 7.250624
[160,000] RMSE: 7.27599
[180,000] RMSE: 7.245113


RMSE: 7.240081

Adding the polynomial features help decrease both RMSE and R2.

In [13]:
from pprint import pprint
from river import datasets

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    get_station(x)
    return x

model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= feature_extraction.PolynomialExtender(interaction_only=True)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric1 = metrics.MAE()
metric2 = metrics.MSE()

evaluate.progressive_val_score(dataset, model, metric1, print_every=20_000)
evaluate.progressive_val_score(dataset, model, metric2, print_every=20_000)

<class 'river.datasets.bikes.Bikes'>
['scattered clouds', 'light rain', 'thunderstorm with heavy rain', 'few clouds', 'thunderstorm with light rain', 'light intensity drizzle rain', 'mist', 'drizzle', 'Sky is Clear', 'moderate rain', 'heavy intensity rain', 'thunderstorm', 'light intensity drizzle', 'overcast clouds', 'thunderstorm with rain', 'clear sky', 'very heavy rain', 'broken clouds']
[20,000] MAE: 1.645678
[40,000] MAE: 1.657413
[60,000] MAE: 1.68982
[80,000] MAE: 1.766984
[100,000] MAE: 1.824618
[120,000] MAE: 1.843591
[140,000] MAE: 1.870782
[160,000] MAE: 1.896705
[180,000] MAE: 1.913381
[20,000] MSE: 4.464201
[40,000] MSE: 4.645743
[60,000] MSE: 5.127339
[80,000] MSE: 5.7859
[100,000] MSE: 6.284571
[120,000] MSE: 6.426705
[140,000] MSE: 6.622598
[160,000] MSE: 6.836894
[180,000] MSE: 6.974476


MSE: 6.982597

As we combine both feature addition and polynomial extension, the error is approximately 2, which is much better than the original model.

In [9]:
from pprint import pprint
from river import datasets
from river import model_selection
from river import linear_model

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    get_station(x)
    return x

models = [linear_model.LinearRegression(optimizer=optim.SGD(lr=lr)) for lr in [0.0001, 0.001, 1e-05, 0.01]]


model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= feature_extraction.PolynomialExtender(interaction_only=True)
model |= preprocessing.StandardScaler()
model |= model_selection.EpsilonGreedyRegressor(models,epsilon=0.1,decay=0.001,burn_in=100,seed=1)

metric = metrics.MSE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

<class 'river.datasets.bikes.Bikes'>
['scattered clouds', 'light rain', 'thunderstorm with heavy rain', 'few clouds', 'thunderstorm with light rain', 'light intensity drizzle rain', 'mist', 'drizzle', 'Sky is Clear', 'moderate rain', 'heavy intensity rain', 'thunderstorm', 'light intensity drizzle', 'overcast clouds', 'thunderstorm with rain', 'clear sky', 'very heavy rain', 'broken clouds']
[20,000] MAE: 1.649619
[40,000] MAE: 1.659618
[60,000] MAE: 1.691286
[80,000] MAE: 1.768075
[100,000] MAE: 1.825491
[120,000] MAE: 1.844346
[140,000] MAE: 1.871433
[160,000] MAE: 1.897282
[180,000] MAE: 1.913895


MAE: 1.916351

In [10]:
from pprint import pprint
from river import datasets
from river import model_selection
from river import linear_model

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    get_station(x)
    return x

models = [linear_model.LinearRegression(optimizer=optim.SGD(lr=lr)) for lr in [0.0001, 0.001, 1e-05, 0.01]]
metric = metrics.MAE()

model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= feature_extraction.PolynomialExtender(interaction_only=True)
model |= preprocessing.StandardScaler()
model |= model_selection.GreedyRegressor(models, metric)

metric = metrics.MSE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

<class 'river.datasets.bikes.Bikes'>
['scattered clouds', 'light rain', 'thunderstorm with heavy rain', 'few clouds', 'thunderstorm with light rain', 'light intensity drizzle rain', 'mist', 'drizzle', 'Sky is Clear', 'moderate rain', 'heavy intensity rain', 'thunderstorm', 'light intensity drizzle', 'overcast clouds', 'thunderstorm with rain', 'clear sky', 'very heavy rain', 'broken clouds']
[20,000] MAE: 1.645991
[40,000] MAE: 1.65757
[60,000] MAE: 1.689924
[80,000] MAE: 1.767063
[100,000] MAE: 1.824681
[120,000] MAE: 1.843644
[140,000] MAE: 1.870827
[160,000] MAE: 1.896744
[180,000] MAE: 1.913416


MAE: 1.915881

In [None]:
from river import facto
from pprint import pprint
from river import datasets
from river import model_selection
from river import linear_model

dataset = datasets.Bikes()

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x


print(type(dataset))
cache_weather = set()
for x, y in dataset:
    if x['description'] not in cache_weather:
        cache_weather.add(x['description'])
# print(cache_weather)
# print(len(cache_weather))
weather_list = list(cache_weather)
# print(weather_list)


def get_weather(x):
    if x['description'] in {'light intensity drizzle rain', 'very heavy rain', 'moderate rain', 'drizzle', 'thunderstorm with heavy rain', 'heavy intensity rain', 'mist', 'overcast clouds', 'light intensity drizzle', 'thunderstorm with light rain', 'thunderstorm', 'thunderstorm with rain', 'light rain'}:
        x['rain'] = 2
    else:
        x['rain'] = 1
    return x

cache_station = set()
for x, y in dataset:
    if x['station'] not in cache_station:
        cache_station.add(x['station'])
# print(cache_station)
# print(len(cache_station))
station_list = list(cache_station)
# print(station_list)

def get_station(x):
    for station in station_list:
        if x['station'] == station:
            x[station] = 1
        else:
            x[station] = 0
    return x

def add_feature(x):
    get_weather(x)
    get_station(x)
    return x

models = [linear_model.LinearRegression(optimizer=optim.SGD(lr=lr)) for lr in [0.0001, 0.001, 1e-05, 0.01]]
metric = metrics.MAE()

model = add_feature
model |= compose.Discard('station', 'description', 'moment')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= feature_extraction.PolynomialExtender(interaction_only=True)
model |= preprocessing.StandardScaler()
model |= facto.FwFMRegressor(n_factors=10, intercept=5, seed=42)

metric = metrics.MSE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)