In [1]:
# notebooks/train_model.ipynb

import sys
import os
import pandas as pd
import pickle

# Add the directory containing the scripts to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

from train_model import train_model, save_intermediate_results, load_intermediate_results


In [2]:
from utils import get_absolute_path

In [3]:
# Load the processed data directly
processed_bike_data_path = get_absolute_path('../data/processed_bike_data.parquet')

bike_data = pd.read_parquet(processed_bike_data_path)


In [4]:
# Batch process stations in chunks of 100
stations = bike_data['stationcode'].unique()[:150]  # Process only the first 150 stations for testing
batch_size = 100

all_models = {}
all_scalers = {}
all_nearby_station_results = {}

In [5]:
# Load intermediate results if they exist
current_batch = 0
while os.path.exists(get_absolute_path(f'../data/model_batch_{current_batch}.pkl')):
    models, scalers, nearby_station_results = load_intermediate_results(current_batch)
    all_models.update(models)
    all_scalers.update(scalers)
    all_nearby_station_results.update(nearby_station_results)
    current_batch += 1

for i in range(current_batch * batch_size, len(stations), batch_size):
    batch_stations = stations[i:i + batch_size]
    models, scaler, nearby_station_results = train_model(bike_data, batch_stations)
    all_models.update(models)
    all_scalers.update({station: scaler for station in batch_stations})
    all_nearby_station_results.update(nearby_station_results)
    
    # Save intermediate results
    save_intermediate_results(models, {station: scaler for station in batch_stations}, nearby_station_results, current_batch)
    current_batch += 1



2024-06-09 13:48:54,654 - INFO - Training models...


Creating features...


Training models:   0%|                                                                             | 0/100 [00:00<?, ?station/s]

Valid indices for station 10001: [1019, 7, 6, 1005, 1, 0, 1006, 10, 1004, 9]


2024-06-09 13:49:28,269 - INFO - Cross-Validation Score for station 10001: -0.0028942516236178896
2024-06-09 13:49:28,549 - INFO - Mean Squared Error for station 10001: 0.0005680624225173463
Training models:   1%|▋                                                                    | 1/100 [00:10<17:21, 10.52s/station]

Valid indices for station 10001_relais: [1019, 7, 6, 1005, 1, 0, 10, 1004, 9, 11]


2024-06-09 13:50:14,623 - INFO - Cross-Validation Score for station 10001_relais: -8.236932649540751e-07
2024-06-09 13:50:16,087 - INFO - Mean Squared Error for station 10001_relais: 4.02425500412383e-08
Training models:   2%|█▍                                                                   | 2/100 [00:58<52:44, 32.29s/station]

Valid indices for station 10003: [716, 759, 764, 798, 1444, 40, 4, 758, 44, 2, 3]


2024-06-09 13:51:00,034 - INFO - Cross-Validation Score for station 10003: -7.0781522307191555e-06
2024-06-09 13:51:02,202 - INFO - Mean Squared Error for station 10003: 2.1435161641763968e-07
Training models:   3%|██                                                                 | 3/100 [01:44<1:02:24, 38.60s/station]

Valid indices for station 10004: [1012, 1019, 63, 758, 44, 2, 3, 7, 5, 19]


2024-06-09 13:51:39,456 - INFO - Cross-Validation Score for station 10004: -8.632610025142804e-06
2024-06-09 13:51:40,394 - INFO - Mean Squared Error for station 10004: 1.0327131518954223e-05
Training models:   4%|██▋                                                                | 4/100 [02:22<1:01:30, 38.44s/station]

Out of bounds indices for station 10005: [1474]
Max valid index: 1460
Valid indices for station 10005: [764, 1444, 40, 1443, 4, 1441, 1434, 2, 19, 21]


2024-06-09 13:52:14,870 - INFO - Cross-Validation Score for station 10005: -1.0961488805887687e-05
2024-06-09 13:52:16,478 - INFO - Mean Squared Error for station 10005: 1.3907498522009203e-09
Training models:   5%|███▍                                                                 | 5/100 [02:58<59:31, 37.59s/station]

Valid indices for station 10006: [63, 44, 3, 7, 5, 16, 19, 17, 18, 67]


2024-06-09 13:52:52,097 - INFO - Cross-Validation Score for station 10006: -1.0069346002350575e-05
2024-06-09 13:52:53,089 - INFO - Mean Squared Error for station 10006: 4.515489005878964e-06
Training models:   6%|████▏                                                                | 6/100 [03:35<58:22, 37.26s/station]

Valid indices for station 10008: [1019, 7, 16, 6, 1, 0, 10, 9, 11, 15]


2024-06-09 13:52:59,903 - INFO - Cross-Validation Score for station 10008: -0.0015549460854655732
2024-06-09 13:53:00,132 - INFO - Mean Squared Error for station 10008: 0.00041003131782825275
Training models:   7%|████▊                                                                | 7/100 [03:42<42:26, 27.38s/station]

Valid indices for station 10009: [1019, 63, 3, 7, 5, 16, 17, 18, 62, 6, 1, 0, 9, 11, 15, 58]


2024-06-09 13:53:40,900 - INFO - Cross-Validation Score for station 10009: -1.3767940260894305e-05
2024-06-09 13:53:43,055 - INFO - Mean Squared Error for station 10009: 3.5618286589658606e-06
Training models:   8%|█████▌                                                               | 8/100 [04:25<49:34, 32.33s/station]

Valid indices for station 1001: [1336, 1332, 1331, 8, 145, 133]


2024-06-09 13:54:21,831 - INFO - Cross-Validation Score for station 1001: -1.8581330247958727e-05
2024-06-09 13:54:23,849 - INFO - Mean Squared Error for station 1001: 0.00013196750304597101
Training models:   9%|██████▏                                                              | 9/100 [05:05<53:02, 34.97s/station]

Valid indices for station 10010: [7, 6, 1, 0, 10, 1004, 9, 13, 11, 14, 15]


2024-06-09 13:55:08,423 - INFO - Cross-Validation Score for station 10010: -6.5571302670213165e-06
2024-06-09 13:55:10,429 - INFO - Mean Squared Error for station 10010: 6.731677961466758e-08
Training models:  10%|██████▊                                                             | 10/100 [05:52<57:50, 38.56s/station]

Valid indices for station 10011: [6, 1005, 1, 0, 10, 1004, 108, 109, 116, 9, 12, 13]


2024-06-09 13:55:37,855 - INFO - Cross-Validation Score for station 10011: -9.575876598410546e-06
2024-06-09 13:55:38,998 - INFO - Mean Squared Error for station 10011: 9.906115323696194e-30
Training models:  11%|███████▍                                                            | 11/100 [06:20<52:39, 35.50s/station]

Valid indices for station 10012: [7, 16, 17, 18, 62, 6, 1, 9, 11, 14, 15, 58, 23]


2024-06-09 13:56:24,026 - INFO - Cross-Validation Score for station 10012: -2.0329049129843214e-05
2024-06-09 13:56:26,144 - INFO - Mean Squared Error for station 10012: 1.695417836066926e-05
Training models:  12%|████████▏                                                           | 12/100 [07:08<57:15, 39.04s/station]

Valid indices for station 10013: [10, 108, 116, 12, 13, 68, 107, 48, 47, 32]


2024-06-09 13:57:10,846 - INFO - Cross-Validation Score for station 10013: -1.1601959824881502e-05
2024-06-09 13:57:11,630 - INFO - Mean Squared Error for station 10013: 5.718467988869307e-06
Training models:  13%|████████▊                                                           | 13/100 [07:53<59:26, 40.99s/station]

Valid indices for station 10014: [10, 1004, 108, 9, 12, 13, 68, 14, 48]


2024-06-09 13:57:57,668 - INFO - Cross-Validation Score for station 10014: -4.3514341583450965e-06
2024-06-09 13:57:59,234 - INFO - Mean Squared Error for station 10014: 1.4002145465055905e-05
Training models:  14%|█████████▏                                                        | 14/100 [08:41<1:01:37, 42.99s/station]

Valid indices for station 10015: [62, 9, 13, 68, 11, 14, 15, 58, 23]


2024-06-09 13:58:30,039 - INFO - Cross-Validation Score for station 10015: -1.0491913734052883e-05
2024-06-09 13:58:30,855 - INFO - Mean Squared Error for station 10015: 5.058280182117115e-05
Training models:  15%|██████████▏                                                         | 15/100 [09:12<56:02, 39.56s/station]

Valid indices for station 10016: [7, 16, 17, 18, 62, 67, 6, 9, 11, 14, 15, 58, 23]


2024-06-09 13:59:04,173 - INFO - Cross-Validation Score for station 10016: -4.199381811011742e-06
2024-06-09 13:59:05,289 - INFO - Mean Squared Error for station 10016: 1.4349119519763218e-08
Training models:  16%|██████████▉                                                         | 16/100 [09:47<53:13, 38.02s/station]

Valid indices for station 10017: [7, 5, 16, 17, 18, 62, 67, 22, 6, 11, 15, 58, 23]


2024-06-09 13:59:32,699 - INFO - Cross-Validation Score for station 10017: -3.992137781517181e-06
2024-06-09 13:59:33,680 - INFO - Mean Squared Error for station 10017: 5.841067377082121e-07
Training models:  17%|███████████▌                                                        | 17/100 [10:15<48:35, 35.12s/station]

Valid indices for station 10018: [7, 5, 16, 19, 17, 18, 62, 67, 22, 55, 11, 15, 58, 56, 54]


2024-06-09 14:00:13,011 - INFO - Cross-Validation Score for station 10018: -1.722839130528076e-05
2024-06-09 14:00:14,358 - INFO - Mean Squared Error for station 10018: 1.1900310851472883e-06
Training models:  18%|████████████▏                                                       | 18/100 [10:56<50:17, 36.79s/station]

Valid indices for station 10019: [4, 3, 5, 19, 17, 18, 67, 22, 21]


2024-06-09 14:00:55,377 - INFO - Cross-Validation Score for station 10019: -4.120082966687036e-06
2024-06-09 14:00:57,987 - INFO - Mean Squared Error for station 10019: 1.578892917863271e-06
Training models:  19%|████████████▉                                                       | 19/100 [11:39<52:26, 38.85s/station]

Valid indices for station 1002: [1127, 20, 30, 135, 133, 69, 134, 1141, 1143]


2024-06-09 14:01:29,594 - INFO - Cross-Validation Score for station 1002: -4.381782804579783e-06
2024-06-09 14:01:30,357 - INFO - Mean Squared Error for station 1002: 3.576178600199956e-30
Training models:  20%|█████████████▌                                                      | 20/100 [12:12<49:12, 36.90s/station]

Out of bounds indices for station 10020: [1474, 1475]
Max valid index: 1460
Valid indices for station 10020: [4, 1434, 1435, 19, 22, 21, 55, 56, 28, 29, 33]


2024-06-09 14:01:35,805 - INFO - Cross-Validation Score for station 10020: -0.0008504964752953943
2024-06-09 14:01:35,929 - INFO - Mean Squared Error for station 10020: 0.0069851316353744764
Training models:  21%|██████████████▎                                                     | 21/100 [12:17<36:12, 27.50s/station]

Valid indices for station 10021: [16, 19, 17, 18, 62, 67, 27, 22, 21, 55, 56, 54, 28, 33]


2024-06-09 14:02:09,724 - INFO - Cross-Validation Score for station 10021: -2.085957904539464e-05
2024-06-09 14:02:10,875 - INFO - Mean Squared Error for station 10021: 1.0071515302081187e-29
Training models:  22%|██████████████▉                                                     | 22/100 [12:52<38:39, 29.73s/station]

Valid indices for station 10023: [16, 62, 27, 11, 14, 15, 58, 23, 26]


2024-06-09 14:02:38,715 - INFO - Cross-Validation Score for station 10023: -8.498413298663958e-06
2024-06-09 14:02:39,680 - INFO - Mean Squared Error for station 10023: 2.3120795519487094e-06
Training models:  23%|███████████████▋                                                    | 23/100 [13:21<37:47, 29.45s/station]

Valid indices for station 10024: [106, 105, 47, 32, 61, 24, 38, 642]


2024-06-09 14:02:45,161 - INFO - Cross-Validation Score for station 10024: -0.0017510228011725093
2024-06-09 14:02:45,244 - INFO - Mean Squared Error for station 10024: 0.00042484770355980883
Training models:  24%|████████████████▎                                                   | 24/100 [13:27<28:13, 22.29s/station]

Valid indices for station 10025: [68, 25, 26, 59, 48, 37, 49, 46, 36]


2024-06-09 14:02:50,943 - INFO - Cross-Validation Score for station 10025: -0.0022729464728964877
2024-06-09 14:02:51,070 - INFO - Mean Squared Error for station 10025: 0.00035140729392528855
Training models:  25%|█████████████████                                                   | 25/100 [13:33<21:41, 17.35s/station]

Valid indices for station 10026: [27, 23, 25, 26, 59, 60, 31]


2024-06-09 14:03:29,087 - INFO - Cross-Validation Score for station 10026: -2.3002917881720714e-05
2024-06-09 14:03:30,082 - INFO - Mean Squared Error for station 10026: 2.2671883394769113e-05
Training models:  26%|█████████████████▋                                                  | 26/100 [14:12<29:24, 23.85s/station]

Valid indices for station 10027: [62, 67, 27, 22, 55, 58, 23, 26, 56, 54, 60]


2024-06-09 14:04:02,922 - INFO - Cross-Validation Score for station 10027: -9.2223609979012e-06
2024-06-09 14:04:03,699 - INFO - Mean Squared Error for station 10027: 1.4791636108412228e-07
Training models:  27%|██████████████████▎                                                 | 27/100 [14:45<32:34, 26.78s/station]

Out of bounds indices for station 10028: [1475, 1479]
Max valid index: 1460
Valid indices for station 10028: [1435, 22, 21, 55, 56, 54, 28, 29, 33, 45, 597]


2024-06-09 14:04:09,376 - INFO - Cross-Validation Score for station 10028: -0.03450311893941334
2024-06-09 14:04:09,521 - INFO - Mean Squared Error for station 10028: 0.00042021578980356076
Training models:  28%|███████████████████                                                 | 28/100 [14:51<24:35, 20.49s/station]

Out of bounds indices for station 10029: [1475, 1479]
Max valid index: 1460
Valid indices for station 10029: [1434, 1435, 21, 55, 56, 28, 29, 33, 45]


2024-06-09 14:04:31,754 - INFO - Cross-Validation Score for station 10029: -6.098735779516144e-06
2024-06-09 14:04:32,512 - INFO - Mean Squared Error for station 10029: 3.483770408261178e-05
Training models:  29%|███████████████████▋                                                | 29/100 [15:14<25:08, 21.24s/station]

Valid indices for station 1003: [1127, 20, 30, 135, 133, 69, 134, 41, 1140, 1141, 1142, 1143, 1147]


2024-06-09 14:05:15,192 - INFO - Cross-Validation Score for station 1003: -7.94618161280325e-05
2024-06-09 14:05:16,520 - INFO - Mean Squared Error for station 1003: 9.530929630786378e-06
Training models:  30%|████████████████████▍                                               | 30/100 [15:58<32:44, 28.07s/station]

Valid indices for station 10031: [26, 59, 34, 31, 35]


2024-06-09 14:05:23,840 - INFO - Cross-Validation Score for station 10031: -0.00304766105681656
2024-06-09 14:05:24,075 - INFO - Mean Squared Error for station 10031: 0.0003954365779886666
Training models:  31%|█████████████████████                                               | 31/100 [16:06<25:12, 21.92s/station]

Valid indices for station 10032: [12, 106, 48, 49, 47, 32, 61, 24, 38]


2024-06-09 14:06:03,646 - INFO - Cross-Validation Score for station 10032: -1.6055979680254138e-05
2024-06-09 14:06:05,710 - INFO - Mean Squared Error for station 10032: 2.8930731756288172e-05
Training models:  32%|█████████████████████▊                                              | 32/100 [16:47<31:32, 27.83s/station]

Out of bounds indices for station 10033: [1475, 1479]
Max valid index: 1460
Valid indices for station 10033: [1435, 22, 21, 55, 56, 28, 29, 33, 45, 597]


2024-06-09 14:06:27,069 - INFO - Cross-Validation Score for station 10033: -7.67498454188738e-05
2024-06-09 14:06:27,649 - INFO - Mean Squared Error for station 10033: 3.931699163304187e-06
Training models:  33%|██████████████████████▍                                             | 33/100 [17:09<29:06, 26.06s/station]

Valid indices for station 10034: [39, 561, 60, 611, 34, 599, 31]


2024-06-09 14:07:01,569 - INFO - Cross-Validation Score for station 10034: -1.1832908568947945e-05
2024-06-09 14:07:03,206 - INFO - Mean Squared Error for station 10034: 4.551427867060283e-06
Training models:  34%|███████████████████████                                             | 34/100 [17:45<31:48, 28.91s/station]

Valid indices for station 10036: [31, 587, 35, 36, 658, 618, 617]


2024-06-09 14:07:33,742 - INFO - Cross-Validation Score for station 10036: -9.061807938073401e-06
2024-06-09 14:07:34,802 - INFO - Mean Squared Error for station 10036: 3.302607095237157e-05
Training models:  35%|███████████████████████▊                                            | 35/100 [18:16<32:11, 29.72s/station]

Valid indices for station 10037: [25, 59, 37, 46, 35, 36, 618, 617]


2024-06-09 14:08:10,572 - INFO - Cross-Validation Score for station 10037: -0.00014741458682665225
2024-06-09 14:08:12,494 - INFO - Mean Squared Error for station 10037: 0.00022110327500599267
Training models:  36%|████████████████████████▍                                           | 36/100 [18:54<34:15, 32.11s/station]

Valid indices for station 10038: [25, 59, 48, 37, 49, 46, 61, 679, 36]


2024-06-09 14:08:46,602 - INFO - Cross-Validation Score for station 10038: -3.261555854005297e-05
2024-06-09 14:08:47,848 - INFO - Mean Squared Error for station 10038: 2.1158624614089027e-05
Training models:  37%|█████████████████████████▏                                          | 37/100 [19:29<34:44, 33.08s/station]

Valid indices for station 10039: [32, 61, 24, 38, 679, 678, 642]


2024-06-09 14:09:19,610 - INFO - Cross-Validation Score for station 10039: -2.8835102996875237e-05
2024-06-09 14:09:20,496 - INFO - Mean Squared Error for station 10039: 6.434820710689716e-07
Training models:  38%|█████████████████████████▊                                          | 38/100 [20:02<34:03, 32.95s/station]

Valid indices for station 10041: [45, 39, 597, 561, 564, 560, 60, 611, 34]


2024-06-09 14:09:43,431 - INFO - Cross-Validation Score for station 10041: -1.366502363331207e-05
2024-06-09 14:09:44,017 - INFO - Mean Squared Error for station 10041: 1.845251675551321e-06
Training models:  39%|██████████████████████████▌                                         | 39/100 [20:25<30:37, 30.12s/station]

Out of bounds indices for station 10042: [1474]
Max valid index: 1460
Valid indices for station 10042: [764, 798, 1444, 40, 1443, 4, 758, 44, 2]


2024-06-09 14:10:13,965 - INFO - Cross-Validation Score for station 10042: -0.0001546543015141209
2024-06-09 14:10:14,755 - INFO - Mean Squared Error for station 10042: 0.00016837266752302998
Training models:  40%|███████████████████████████▏                                        | 40/100 [20:56<30:18, 30.31s/station]

Valid indices for station 1006: [30, 69, 134, 41, 42, 1142, 1143, 1144, 1147, 699, 90, 689, 1027, 707]


2024-06-09 14:10:51,786 - INFO - Cross-Validation Score for station 1006: -1.3849149323832353e-05
2024-06-09 14:10:52,882 - INFO - Mean Squared Error for station 1006: 7.585379891217228e-06
Training models:  41%|███████████████████████████▉                                        | 41/100 [21:34<32:06, 32.65s/station]

Valid indices for station 1007: [135, 69, 134, 41, 42, 1143, 1147, 774, 724, 716, 699, 90, 689, 707]


2024-06-09 14:11:34,755 - INFO - Cross-Validation Score for station 1007: -6.837022650051169e-07
2024-06-09 14:11:36,347 - INFO - Mean Squared Error for station 1007: 6.730461046073442e-09
Training models:  42%|████████████████████████████▌                                       | 42/100 [22:18<34:42, 35.90s/station]

Valid indices for station 1008: [145, 43, 51, 50, 73, 74, 72, 726, 725, 133, 774, 724, 699, 763]


2024-06-09 14:12:10,565 - INFO - Cross-Validation Score for station 1008: -1.3042185745501396e-05
2024-06-09 14:12:11,492 - INFO - Mean Squared Error for station 1008: 9.428078944875104e-06
Training models:  43%|█████████████████████████████▏                                      | 43/100 [22:53<33:53, 35.67s/station]

Valid indices for station 10105: [716, 759, 798, 40, 1012, 707, 63, 758, 44, 2, 3, 5]


2024-06-09 14:12:52,950 - INFO - Cross-Validation Score for station 10105: -1.322420167106355e-05
2024-06-09 14:12:54,574 - INFO - Mean Squared Error for station 10105: 1.0686132291304192e-06
Training models:  44%|█████████████████████████████▉                                      | 44/100 [23:36<35:22, 37.89s/station]

Out of bounds indices for station 10107: [1475, 1479]
Max valid index: 1460
Valid indices for station 10107: [1435, 55, 559, 56, 28, 29, 33, 45, 39, 597]


2024-06-09 14:13:29,771 - INFO - Cross-Validation Score for station 10107: -3.403006615770708e-05
2024-06-09 14:13:31,280 - INFO - Mean Squared Error for station 10107: 5.924605584710382e-05
Training models:  45%|██████████████████████████████▌                                     | 45/100 [24:13<34:24, 37.54s/station]

Valid indices for station 10110: [25, 59, 37, 49, 46, 36]


2024-06-09 14:13:59,811 - INFO - Cross-Validation Score for station 10110: -3.6135163852517904e-05
2024-06-09 14:14:00,429 - INFO - Mean Squared Error for station 10110: 1.5980893237570182e-06
Training models:  46%|███████████████████████████████▎                                    | 46/100 [24:42<31:31, 35.02s/station]

Valid indices for station 10113: [12, 107, 106, 105, 47, 32, 61, 24]


2024-06-09 14:14:44,078 - INFO - Cross-Validation Score for station 10113: -1.1857555611876013e-05
2024-06-09 14:14:46,428 - INFO - Mean Squared Error for station 10113: 4.4262176924156726e-05
Training models:  47%|███████████████████████████████▉                                    | 47/100 [25:28<33:50, 38.31s/station]

Valid indices for station 10114: [12, 13, 68, 25, 48, 37, 49, 32, 61]


2024-06-09 14:15:28,945 - INFO - Cross-Validation Score for station 10114: -9.097253991059006e-06
2024-06-09 14:15:29,812 - INFO - Mean Squared Error for station 10114: 7.02296106064041e-06
Training models:  48%|████████████████████████████████▋                                   | 48/100 [26:11<34:31, 39.84s/station]

Valid indices for station 10115: [68, 25, 59, 48, 37, 49, 46, 32, 61]


2024-06-09 14:16:02,286 - INFO - Cross-Validation Score for station 10115: -2.4613707991630335e-05
2024-06-09 14:16:03,788 - INFO - Mean Squared Error for station 10115: 1.5001543130832987e-06
Training models:  49%|█████████████████████████████████▎                                  | 49/100 [26:45<32:21, 38.08s/station]

Valid indices for station 1012: [145, 43, 51, 50, 73, 74, 72, 726, 725, 133, 724]


2024-06-09 14:16:35,116 - INFO - Cross-Validation Score for station 1012: -9.718503868071657e-06
2024-06-09 14:16:35,912 - INFO - Mean Squared Error for station 1012: 2.5709257544396104e-06
Training models:  50%|██████████████████████████████████                                  | 50/100 [27:17<30:14, 36.29s/station]

Valid indices for station 1013: [71, 53, 145, 43, 51, 50, 73, 74, 72]


2024-06-09 14:16:42,137 - INFO - Cross-Validation Score for station 1013: -0.0015442963957389053
2024-06-09 14:16:42,374 - INFO - Mean Squared Error for station 1013: 0.0017042609502999774
Training models:  51%|██████████████████████████████████▋                                 | 51/100 [27:24<22:19, 27.34s/station]

Valid indices for station 1014: [71, 52, 53, 64, 57, 132, 805, 760, 804]


2024-06-09 14:17:14,691 - INFO - Cross-Validation Score for station 1014: -1.2111647799165809e-05
2024-06-09 14:17:15,468 - INFO - Mean Squared Error for station 1014: 4.6672712919637343e-07
Training models:  52%|███████████████████████████████████▎                                | 52/100 [27:57<23:15, 29.07s/station]

Valid indices for station 1015: [71, 52, 53, 57, 132, 805, 760, 51, 74, 804]


2024-06-09 14:17:21,037 - INFO - Cross-Validation Score for station 1015: -0.0007758399002245859
2024-06-09 14:17:21,157 - INFO - Mean Squared Error for station 1015: 0.0005356932732092153
Training models:  53%|████████████████████████████████████                                | 53/100 [28:03<17:16, 22.05s/station]

Valid indices for station 10151: [17, 18, 62, 67, 27, 22, 55, 56, 54, 28, 60]


2024-06-09 14:18:05,360 - INFO - Cross-Validation Score for station 10151: -7.43327794642838e-07
2024-06-09 14:18:06,540 - INFO - Mean Squared Error for station 10151: 1.9674182790424495e-09
Training models:  54%|████████████████████████████████████▋                               | 54/100 [28:48<22:16, 29.05s/station]

Valid indices for station 10152: [17, 18, 67, 27, 22, 21, 55, 56, 54, 28, 29, 33, 45]


2024-06-09 14:18:38,569 - INFO - Cross-Validation Score for station 10152: -2.1825750092976415e-05
2024-06-09 14:18:39,833 - INFO - Mean Squared Error for station 10152: 9.564452553715963e-06
Training models:  55%|█████████████████████████████████████▍                              | 55/100 [29:21<22:44, 30.33s/station]

Valid indices for station 10153: [17, 18, 67, 27, 22, 21, 55, 56, 54, 28, 29, 33, 45, 60]


2024-06-09 14:19:14,765 - INFO - Cross-Validation Score for station 10153: -3.0852766039423282e-06
2024-06-09 14:19:16,074 - INFO - Mean Squared Error for station 10153: 4.711900403082476e-06
Training models:  56%|██████████████████████████████████████                              | 56/100 [29:58<23:32, 32.10s/station]

Valid indices for station 1016: [71, 52, 53, 64, 57, 132, 805, 760, 70, 753, 814, 804, 744, 757]


2024-06-09 14:19:22,273 - INFO - Cross-Validation Score for station 1016: -0.0006974230241792185
2024-06-09 14:19:22,438 - INFO - Mean Squared Error for station 1016: 0.0011069110022564835
Training models:  57%|██████████████████████████████████████▊                             | 57/100 [30:04<17:28, 24.38s/station]

Valid indices for station 10161: [7, 16, 17, 18, 62, 67, 27, 11, 14, 15, 58, 23]


2024-06-09 14:19:56,103 - INFO - Cross-Validation Score for station 10161: -1.1526737564192768e-05
2024-06-09 14:19:56,866 - INFO - Mean Squared Error for station 10161: 3.711837676625082e-05
Training models:  58%|███████████████████████████████████████▍                            | 58/100 [30:38<19:10, 27.39s/station]

Valid indices for station 10162: [25, 26, 59, 37, 49, 46, 31, 36]


2024-06-09 14:20:29,548 - INFO - Cross-Validation Score for station 10162: -1.3347816966653354e-05
2024-06-09 14:20:31,017 - INFO - Mean Squared Error for station 10162: 9.281015581868662e-07
Training models:  59%|████████████████████████████████████████                            | 59/100 [31:12<20:06, 29.42s/station]

Valid indices for station 10163: [27, 26, 56, 54, 39, 60, 611, 34]


2024-06-09 14:21:07,254 - INFO - Cross-Validation Score for station 10163: -3.970822717814196e-05
2024-06-09 14:21:08,629 - INFO - Mean Squared Error for station 10163: 5.156944653261796e-05
Training models:  60%|████████████████████████████████████████▊                           | 60/100 [31:50<21:15, 31.88s/station]

Valid indices for station 10164: [48, 37, 49, 47, 32, 61, 24, 38, 679, 678]


2024-06-09 14:21:43,583 - INFO - Cross-Validation Score for station 10164: -9.712132650056497e-05
2024-06-09 14:21:45,137 - INFO - Mean Squared Error for station 10164: 6.9294793396990665e-06
Training models:  61%|█████████████████████████████████████████▍                          | 61/100 [32:27<21:37, 33.27s/station]

Valid indices for station 10165: [7, 16, 17, 18, 62, 67, 27, 22, 11, 14, 15, 58, 23, 54]


2024-06-09 14:22:29,098 - INFO - Cross-Validation Score for station 10165: -3.4246502694200045e-05
2024-06-09 14:22:31,048 - INFO - Mean Squared Error for station 10165: 1.8146527035420665e-05
Training models:  62%|██████████████████████████████████████████▏                         | 62/100 [33:13<23:28, 37.06s/station]

Valid indices for station 10166: [1012, 1019, 63, 758, 44, 3, 7, 5]


2024-06-09 14:23:03,583 - INFO - Cross-Validation Score for station 10166: -3.3307422771298536e-06
2024-06-09 14:23:04,168 - INFO - Mean Squared Error for station 10166: 1.673789149971118e-07
Training models:  63%|██████████████████████████████████████████▊                         | 63/100 [33:46<22:07, 35.88s/station]

Valid indices for station 1017: [65, 52, 64, 57, 132, 760, 70, 753]


2024-06-09 14:23:43,630 - INFO - Cross-Validation Score for station 1017: -5.993343450568962e-06
2024-06-09 14:23:45,744 - INFO - Mean Squared Error for station 1017: 2.2092835460786918e-05
Training models:  64%|███████████████████████████████████████████▌                        | 64/100 [34:27<22:33, 37.59s/station]

Valid indices for station 1018: [65, 66, 64, 70]
Out of bounds indices for station 1018: [1463, 1472, 1462, 1468]
Max valid index: 1460
Valid indices for station 1018: [1358, 1357, 1430, 65, 66, 1383, 1356, 71, 52, 53, 64, 57, 132, 805, 760, 70, 753, 804, 744, 757]


2024-06-09 14:24:20,131 - INFO - Cross-Validation Score for station 1018: -2.029560734285366e-05
2024-06-09 14:24:21,184 - INFO - Mean Squared Error for station 1018: 1.9078648344313673e-06
Training models:  65%|████████████████████████████████████████████▏                       | 65/100 [35:03<21:33, 36.94s/station]

Out of bounds indices for station 1020: [1463]
Max valid index: 1460
Valid indices for station 1020: [1430, 65, 66]
Out of bounds indices for station 1020: [1463, 1472, 1462, 1471]
Max valid index: 1460
Valid indices for station 1020: [1358, 1359, 1357, 1430, 65, 66, 1383, 1392, 1391, 1384, 1386, 64, 760, 70, 753]


2024-06-09 14:25:04,895 - INFO - Cross-Validation Score for station 1020: -4.066969825492118e-06
2024-06-09 14:25:06,649 - INFO - Mean Squared Error for station 1020: 6.1352072128091355e-06
Training models:  66%|████████████████████████████████████████████▉                       | 66/100 [35:48<22:22, 39.50s/station]

Valid indices for station 10201: [5, 16, 19, 17, 18, 62, 67, 27, 22, 55, 15, 58, 56, 54]


2024-06-09 14:25:12,864 - INFO - Cross-Validation Score for station 10201: -0.007272254617620814
2024-06-09 14:25:13,088 - INFO - Mean Squared Error for station 10201: 0.001994590679869914
Training models:  67%|█████████████████████████████████████████████▌                      | 67/100 [35:55<16:16, 29.58s/station]

Valid indices for station 10202: [12, 13, 68, 14, 25, 48, 49]


2024-06-09 14:25:19,827 - INFO - Cross-Validation Score for station 10202: -0.001430540663244267
2024-06-09 14:25:20,027 - INFO - Mean Squared Error for station 10202: 0.00028110833199204943
Training models:  68%|██████████████████████████████████████████████▏                     | 68/100 [36:01<12:09, 22.79s/station]

Valid indices for station 1021: [20, 30, 135, 133, 69, 134, 41, 42, 1143, 1147, 699, 90]


2024-06-09 14:25:52,253 - INFO - Cross-Validation Score for station 1021: -1.1265960069689703e-05
2024-06-09 14:25:52,815 - INFO - Mean Squared Error for station 1021: 2.5406990116056056e-07
Training models:  69%|██████████████████████████████████████████████▉                     | 69/100 [36:34<13:19, 25.79s/station]

Out of bounds indices for station 1022: [1463, 1472]
Max valid index: 1460
Valid indices for station 1022: [65, 64, 57, 132, 760, 70, 753, 757]


2024-06-09 14:26:33,025 - INFO - Cross-Validation Score for station 1022: -4.502473617459232e-06
2024-06-09 14:26:35,003 - INFO - Mean Squared Error for station 1022: 1.786249842765693e-06
Training models:  70%|███████████████████████████████████████████████▌                    | 70/100 [37:16<15:21, 30.71s/station]

Valid indices for station 1023: [71, 52, 53, 57, 132, 805, 760, 51, 804]


2024-06-09 14:27:08,221 - INFO - Cross-Validation Score for station 1023: -2.6989368010166454e-05
2024-06-09 14:27:09,391 - INFO - Mean Squared Error for station 1023: 1.120263820910453e-06
Training models:  71%|████████████████████████████████████████████████▎                   | 71/100 [37:51<15:22, 31.81s/station]

Valid indices for station 1024: [145, 43, 51, 50, 73, 74, 72, 726, 725, 133, 774, 724, 699, 763]


2024-06-09 14:27:44,926 - INFO - Cross-Validation Score for station 1024: -2.188664871044553e-05
2024-06-09 14:27:45,705 - INFO - Mean Squared Error for station 1024: 4.255879218756108e-06
Training models:  72%|████████████████████████████████████████████████▉                   | 72/100 [38:27<15:28, 33.16s/station]

Valid indices for station 1025: [145, 43, 51, 50, 73, 74, 72, 133]


2024-06-09 14:27:51,981 - INFO - Cross-Validation Score for station 1025: -0.0008861082539028949
2024-06-09 14:27:52,098 - INFO - Mean Squared Error for station 1025: 0.00012015059476438753
Training models:  73%|█████████████████████████████████████████████████▋                  | 73/100 [38:34<11:18, 25.13s/station]

Valid indices for station 1026: [53, 145, 43, 51, 50, 73, 74, 72, 726, 725]


2024-06-09 14:28:39,411 - INFO - Cross-Validation Score for station 1026: -8.176234849995151e-06
2024-06-09 14:28:41,694 - INFO - Mean Squared Error for station 1026: 2.3377151678921016e-06
Training models:  74%|██████████████████████████████████████████████████▎                 | 74/100 [39:23<14:04, 32.47s/station]

Valid indices for station 11001: [1131, 138, 1145, 75, 76, 1148, 104]


2024-06-09 14:29:27,847 - INFO - Cross-Validation Score for station 11001: -6.205319357240632e-06
2024-06-09 14:29:29,725 - INFO - Mean Squared Error for station 11001: 9.862737686917243e-07
Training models:  75%|███████████████████████████████████████████████████                 | 75/100 [40:11<15:28, 37.14s/station]

Valid indices for station 11002: [78, 130, 75, 76, 77, 104, 96]


2024-06-09 14:30:05,395 - INFO - Cross-Validation Score for station 11002: -1.0274913464898076e-05
2024-06-09 14:30:06,886 - INFO - Mean Squared Error for station 11002: 1.9752681350628394e-05
Training models:  76%|███████████████████████████████████████████████████▋                | 76/100 [40:48<14:51, 37.15s/station]

Valid indices for station 11003: [78, 130, 76, 77, 96, 79]


2024-06-09 14:30:50,201 - INFO - Cross-Validation Score for station 11003: -6.7301328990717684e-06
2024-06-09 14:30:52,614 - INFO - Mean Squared Error for station 11003: 2.175305119133094e-06
Training models:  77%|████████████████████████████████████████████████████▎               | 77/100 [41:34<15:13, 39.72s/station]

Valid indices for station 11004: [78, 130, 76, 77]
Valid indices for station 11004: [192, 1131, 142, 173, 193, 143, 139, 172, 78, 130, 138, 1145, 75, 76, 77, 1148, 104, 96, 1001, 121, 112, 144, 140, 119, 80, 79, 95]


2024-06-09 14:31:35,074 - INFO - Cross-Validation Score for station 11004: -1.0757785381165115e-05
2024-06-09 14:31:37,312 - INFO - Mean Squared Error for station 11004: 2.1832240438649877e-06
Training models:  78%|█████████████████████████████████████████████████████               | 78/100 [42:19<15:06, 41.21s/station]

Valid indices for station 11006: [130, 77, 119, 79, 91, 93, 95]


2024-06-09 14:32:21,904 - INFO - Cross-Validation Score for station 11006: -6.86044878051786e-06
2024-06-09 14:32:23,243 - INFO - Mean Squared Error for station 11006: 5.057161122528807e-06
Training models:  79%|█████████████████████████████████████████████████████▋              | 79/100 [43:05<14:55, 42.63s/station]

Valid indices for station 11007: [119, 80, 118, 124, 128, 91]


2024-06-09 14:33:09,966 - INFO - Cross-Validation Score for station 11007: -7.905009416570538e-06
2024-06-09 14:33:11,836 - INFO - Mean Squared Error for station 11007: 3.172379919888539e-05
Training models:  80%|██████████████████████████████████████████████████████▍             | 80/100 [43:53<14:48, 44.42s/station]

Valid indices for station 11009: [127, 120, 131, 81, 82, 84, 86, 89]


2024-06-09 14:33:19,078 - INFO - Cross-Validation Score for station 11009: -0.0007378310678106687
2024-06-09 14:33:19,337 - INFO - Mean Squared Error for station 11009: 0.0017828830107069946
Training models:  81%|███████████████████████████████████████████████████████             | 81/100 [44:01<10:33, 33.34s/station]

Valid indices for station 11010: [120, 131, 176, 81, 198, 82, 84, 86, 137, 136]


2024-06-09 14:33:25,296 - INFO - Cross-Validation Score for station 11010: -0.003494652987677429
2024-06-09 14:33:25,416 - INFO - Mean Squared Error for station 11010: 0.0008403003650429126
Training models:  82%|███████████████████████████████████████████████████████▊            | 82/100 [44:07<07:32, 25.16s/station]

Valid indices for station 11011: [83, 121, 115, 112, 123]


2024-06-09 14:33:59,703 - INFO - Cross-Validation Score for station 11011: -1.0427850263332676e-05
2024-06-09 14:34:00,508 - INFO - Mean Squared Error for station 11011: 8.098974612934457e-07
Training models:  83%|████████████████████████████████████████████████████████▍           | 83/100 [44:42<07:58, 28.14s/station]

Valid indices for station 11012: [131, 176, 81, 198, 82, 84, 86, 137, 136]


2024-06-09 14:34:32,155 - INFO - Cross-Validation Score for station 11012: -1.5638257361518766e-05
2024-06-09 14:34:32,937 - INFO - Mean Squared Error for station 11012: 4.7732098174454206e-06
Training models:  84%|█████████████████████████████████████████████████████████           | 84/100 [45:14<07:50, 29.43s/station]

Valid indices for station 11014: [198, 152, 680, 149, 137, 136, 85, 719]


2024-06-09 14:35:19,678 - INFO - Cross-Validation Score for station 11014: -3.416324624043713e-05
2024-06-09 14:35:21,506 - INFO - Mean Squared Error for station 11014: 3.4936011401408543e-05
Training models:  85%|█████████████████████████████████████████████████████████▊          | 85/100 [46:03<08:47, 35.17s/station]

Valid indices for station 11016: [120, 131, 81, 82, 84, 86, 137, 136, 87, 122]


2024-06-09 14:35:55,340 - INFO - Cross-Validation Score for station 11016: -0.0005405845392958452
2024-06-09 14:35:56,755 - INFO - Mean Squared Error for station 11016: 5.2275740659463926e-05
Training models:  86%|██████████████████████████████████████████████████████████▍         | 86/100 [46:38<08:12, 35.19s/station]

Valid indices for station 11017: [86, 87, 719, 692, 122, 694, 695, 693]


2024-06-09 14:36:31,614 - INFO - Cross-Validation Score for station 11017: -5.8819281974741964e-05
2024-06-09 14:36:32,657 - INFO - Mean Squared Error for station 11017: 7.139591587551827e-06
Training models:  87%|███████████████████████████████████████████████████████████▏        | 87/100 [47:14<07:40, 35.41s/station]

Valid indices for station 11018: [98, 112, 88, 101, 103, 123, 102, 97, 99]


2024-06-09 14:37:05,801 - INFO - Cross-Validation Score for station 11018: -2.8915872250215693e-05
2024-06-09 14:37:06,579 - INFO - Mean Squared Error for station 11018: 5.220083288885882e-05
Training models:  88%|███████████████████████████████████████████████████████████▊        | 88/100 [47:48<06:59, 34.96s/station]

Valid indices for station 11019: [127, 120, 81, 748, 89, 92, 122, 694]


2024-06-09 14:37:13,778 - INFO - Cross-Validation Score for station 11019: -0.0006467103669067357
2024-06-09 14:37:14,021 - INFO - Mean Squared Error for station 11019: 0.0008573327508893961
Training models:  89%|████████████████████████████████████████████████████████████▌       | 89/100 [47:55<04:53, 26.71s/station]

Valid indices for station 1102: [69, 134, 41, 42, 1147, 774, 724, 716, 699, 90, 759, 689, 707]


2024-06-09 14:38:00,086 - INFO - Cross-Validation Score for station 1102: -7.680791075663506e-06
2024-06-09 14:38:02,506 - INFO - Mean Squared Error for station 1102: 1.0854790465048315e-06
Training models:  90%|█████████████████████████████████████████████████████████████▏      | 90/100 [48:44<05:32, 33.24s/station]

Valid indices for station 11020: [80, 128, 127, 79, 91, 93, 94]


2024-06-09 14:38:42,665 - INFO - Cross-Validation Score for station 11020: -1.0567853502524115e-05
2024-06-09 14:38:44,262 - INFO - Mean Squared Error for station 11020: 3.12997065927563e-06
Training models:  91%|█████████████████████████████████████████████████████████████▉      | 91/100 [49:26<05:22, 35.79s/station]

Valid indices for station 11021: [126, 748, 89, 92, 122]


2024-06-09 14:39:16,273 - INFO - Cross-Validation Score for station 11021: -4.411714870293275e-06
2024-06-09 14:39:17,287 - INFO - Mean Squared Error for station 11021: 1.2265077096142108e-07
Training models:  92%|██████████████████████████████████████████████████████████████▌     | 92/100 [49:59<04:39, 34.96s/station]

Valid indices for station 11022: [79, 91, 93, 95, 94, 97]


2024-06-09 14:40:00,939 - INFO - Cross-Validation Score for station 11022: -5.251088741011807e-06
2024-06-09 14:40:03,265 - INFO - Mean Squared Error for station 11022: 1.0326520654737353e-06
Training models:  93%|███████████████████████████████████████████████████████████████▏    | 93/100 [50:45<04:27, 38.27s/station]

Valid indices for station 11023: [91, 93, 94, 126, 97]
Valid indices for station 11023: [130, 77, 98, 112, 88, 119, 80, 118, 124, 128, 127, 120, 81, 79, 91, 93, 95, 94, 126, 708, 97, 99, 100, 748, 89, 92]


2024-06-09 14:40:42,294 - INFO - Cross-Validation Score for station 11023: -8.283352911540938e-06
2024-06-09 14:40:43,401 - INFO - Mean Squared Error for station 11023: 1.5081569499913165e-05
Training models:  94%|███████████████████████████████████████████████████████████████▉    | 94/100 [51:25<03:52, 38.83s/station]

Valid indices for station 11024: [98, 112, 79, 93, 95, 97]


2024-06-09 14:40:50,512 - INFO - Cross-Validation Score for station 11024: -0.0009984429361704506
2024-06-09 14:40:50,781 - INFO - Mean Squared Error for station 11024: 0.00018777319759854025
Training models:  95%|████████████████████████████████████████████████████████████████▌   | 95/100 [51:32<02:26, 29.39s/station]

Valid indices for station 11025: [76, 77, 104, 96, 121]


2024-06-09 14:41:34,128 - INFO - Cross-Validation Score for station 11025: -1.015012559734007e-05
2024-06-09 14:41:36,050 - INFO - Mean Squared Error for station 11025: 4.014018772778435e-06
Training models:  96%|█████████████████████████████████████████████████████████████████▎  | 96/100 [52:18<02:16, 34.16s/station]

Valid indices for station 11026: [98, 88, 93, 95, 94, 97, 99]


2024-06-09 14:42:19,542 - INFO - Cross-Validation Score for station 11026: -3.318448706519295e-05
2024-06-09 14:42:20,770 - INFO - Mean Squared Error for station 11026: 1.4334556419549704e-05
Training models:  97%|█████████████████████████████████████████████████████████████████▉  | 97/100 [53:02<01:51, 37.33s/station]

Valid indices for station 11027: [98, 112, 88, 101, 123, 102, 95, 97, 99]


2024-06-09 14:42:59,784 - INFO - Cross-Validation Score for station 11027: -2.508178952815528e-05
2024-06-09 14:43:00,855 - INFO - Mean Squared Error for station 11027: 7.621162092883336e-06
Training models:  98%|██████████████████████████████████████████████████████████████████▋ | 98/100 [53:42<01:16, 38.15s/station]

Valid indices for station 11028: [98, 88, 97, 99, 100]
Valid indices for station 11028: [83, 98, 112, 88, 101, 103, 123, 102, 79, 91, 93, 95, 94, 126, 708, 97, 99, 709, 100, 756, 715, 717, 710]


2024-06-09 14:43:43,639 - INFO - Cross-Validation Score for station 11028: -3.1505622274120975e-05
2024-06-09 14:43:45,716 - INFO - Mean Squared Error for station 11028: 1.0830199721655492e-05
Training models:  99%|███████████████████████████████████████████████████████████████████▎| 99/100 [54:27<00:40, 40.17s/station]

Valid indices for station 11029: [99, 100, 715, 717, 710]
Valid indices for station 11029: [98, 88, 101, 103, 123, 102, 642, 93, 95, 94, 126, 708, 97, 99, 709, 100, 756, 677, 676, 715, 654, 717, 738, 710, 751, 649, 711]


2024-06-09 14:44:28,095 - INFO - Cross-Validation Score for station 11029: -7.994659756817819e-06
2024-06-09 14:44:30,621 - INFO - Mean Squared Error for station 11029: 2.3268904923238363e-06
Training models: 100%|███████████████████████████████████████████████████████████████████| 100/100 [55:12<00:00, 33.13s/station]
2024-06-09 14:44:34,236 - INFO - Intermediate results saved for batch 0
2024-06-09 14:44:34,237 - INFO - Training models...


Creating features...


Training models:   0%|                                                                              | 0/50 [00:00<?, ?station/s]

Valid indices for station 11030: [98, 105, 88, 101, 103, 123, 102]


2024-06-09 14:45:42,832 - INFO - Cross-Validation Score for station 11030: -1.922185980644323e-06
2024-06-09 14:45:45,034 - INFO - Mean Squared Error for station 11030: 9.671177614646758e-06
Training models:   2%|█▍                                                                    | 1/50 [00:44<36:36, 44.82s/station]

Valid indices for station 11032: [88, 101, 103, 123, 102, 756, 715]


2024-06-09 14:46:18,563 - INFO - Cross-Validation Score for station 11032: -3.785892310766281e-06
2024-06-09 14:46:19,625 - INFO - Mean Squared Error for station 11032: 1.3667520461631112e-07
Training models:   4%|██▊                                                                   | 2/50 [01:19<31:02, 38.80s/station]

Valid indices for station 11033: [1145, 75, 76, 1148, 104, 96, 1001, 121]


2024-06-09 14:47:10,458 - INFO - Cross-Validation Score for station 11033: -3.128530510007776e-06
2024-06-09 14:47:12,988 - INFO - Mean Squared Error for station 11033: 5.640264629741695e-06
Training models:   6%|████▏                                                                 | 3/50 [02:12<35:36, 45.45s/station]

Valid indices for station 11034: [125, 110, 129, 106, 105, 101, 123, 102, 47, 24]


2024-06-09 14:48:05,583 - INFO - Cross-Validation Score for station 11034: -2.060750492508675e-05
2024-06-09 14:48:07,465 - INFO - Mean Squared Error for station 11034: 5.47605918282325e-06
Training models:   8%|█████▌                                                                | 4/50 [03:07<37:34, 49.01s/station]

Valid indices for station 11035: [125, 110, 107, 129, 106, 105, 47, 32, 24]


2024-06-09 14:48:16,488 - INFO - Cross-Validation Score for station 11035: -0.00249957781798475
2024-06-09 14:48:16,689 - INFO - Mean Squared Error for station 11035: 0.0016368835368462
Training models:  10%|███████                                                               | 5/50 [03:16<25:59, 34.67s/station]

Valid indices for station 11036: [108, 109, 116, 111, 12, 125, 110, 113, 107, 129, 106, 47]


2024-06-09 14:48:23,112 - INFO - Cross-Validation Score for station 11036: -0.0019364814522869298
2024-06-09 14:48:23,246 - INFO - Mean Squared Error for station 11036: 0.0006568357705912561
Training models:  12%|████████▍                                                             | 6/50 [03:23<18:24, 25.11s/station]

Valid indices for station 11037: [10, 1004, 108, 109, 116, 111, 12, 13, 107]


2024-06-09 14:49:08,043 - INFO - Cross-Validation Score for station 11037: -3.15679680325676e-06
2024-06-09 14:49:10,105 - INFO - Mean Squared Error for station 11037: 1.8229545054633092e-06
Training models:  14%|█████████▊                                                            | 7/50 [04:09<23:05, 32.22s/station]

Valid indices for station 11038: [1003, 114, 1006, 10, 1004, 108, 109, 116, 111, 110, 113, 107, 129]


2024-06-09 14:50:00,957 - INFO - Cross-Validation Score for station 11038: -3.965762594784222e-06
2024-06-09 14:50:03,755 - INFO - Mean Squared Error for station 11038: 2.9561403568121947e-06
Training models:  16%|███████████▏                                                          | 8/50 [05:03<27:19, 39.04s/station]

Valid indices for station 11039: [114, 115, 109, 116, 111, 125, 110, 113, 107, 129, 106, 105]


2024-06-09 14:50:49,577 - INFO - Cross-Validation Score for station 11039: -6.886049175476953e-06
2024-06-09 14:50:51,272 - INFO - Mean Squared Error for station 11039: 4.7553610742914574e-07
Training models:  18%|████████████▌                                                         | 9/50 [05:51<28:29, 41.69s/station]

Valid indices for station 11040: [1003, 114, 117, 1006, 1004, 108, 109, 116, 111, 110, 113, 107, 129]


2024-06-09 14:51:43,528 - INFO - Cross-Validation Score for station 11040: -9.039574367012538e-06
2024-06-09 14:51:46,387 - INFO - Mean Squared Error for station 11040: 1.9693192647922225e-05
Training models:  20%|█████████████▊                                                       | 10/50 [06:46<30:33, 45.84s/station]

Valid indices for station 11041: [83, 98, 112, 88, 123, 102, 95]


2024-06-09 14:52:27,892 - INFO - Cross-Validation Score for station 11041: -1.4765711012203195e-05
2024-06-09 14:52:28,751 - INFO - Mean Squared Error for station 11041: 2.9862978766266845e-06
Training models:  22%|███████████████▏                                                     | 11/50 [07:28<29:06, 44.77s/station]

Valid indices for station 11042: [114, 117, 115, 109, 116, 111, 125, 110, 113, 107, 129]


2024-06-09 14:53:16,165 - INFO - Cross-Validation Score for station 11042: -8.38928864808814e-06
2024-06-09 14:53:18,226 - INFO - Mean Squared Error for station 11042: 1.5433095857546557e-05
Training models:  24%|████████████████▌                                                    | 12/50 [08:18<29:15, 46.21s/station]

Valid indices for station 11043: [1003, 114, 117, 115, 109, 116, 111, 110, 113]


2024-06-09 14:53:55,606 - INFO - Cross-Validation Score for station 11043: -2.730562638646504e-06
2024-06-09 14:53:56,617 - INFO - Mean Squared Error for station 11043: 3.573310981456889e-07
Training models:  26%|█████████████████▉                                                   | 13/50 [08:56<27:01, 43.84s/station]

Valid indices for station 11044: [114, 117, 83, 121, 115, 125, 110, 113, 129, 123]


2024-06-09 14:54:25,316 - INFO - Cross-Validation Score for station 11044: -2.9014239398959377e-06
2024-06-09 14:54:26,436 - INFO - Mean Squared Error for station 11044: 3.261202458321451e-07
Training models:  28%|███████████████████▎                                                 | 14/50 [09:26<23:45, 39.60s/station]

Valid indices for station 11046: [114, 1006, 10, 1004, 108, 109, 116, 111, 12, 110, 113, 107, 129]


2024-06-09 14:55:07,527 - INFO - Cross-Validation Score for station 11046: -3.510383959414052e-07
2024-06-09 14:55:08,790 - INFO - Mean Squared Error for station 11046: 2.1124318046838754e-07
Training models:  30%|████████████████████▋                                                | 15/50 [10:08<23:35, 40.43s/station]

Valid indices for station 11047: [1002, 1003, 114, 117, 1001, 121, 115, 111, 113]


2024-06-09 14:55:42,290 - INFO - Cross-Validation Score for station 11047: -2.6616996074211636e-06
2024-06-09 14:55:42,936 - INFO - Mean Squared Error for station 11047: 2.42605524708376e-07
Training models:  32%|██████████████████████                                               | 16/50 [10:42<21:50, 38.54s/station]

Valid indices for station 11048: [119, 80, 118, 124, 128, 199]


2024-06-09 14:55:50,873 - INFO - Cross-Validation Score for station 11048: -0.0013894271153796274
2024-06-09 14:55:51,141 - INFO - Mean Squared Error for station 11048: 0.0012327497459130583
Training models:  34%|███████████████████████▍                                             | 17/50 [10:50<16:10, 29.42s/station]

Valid indices for station 11101: [130, 140, 119, 80, 118, 124, 79]


2024-06-09 14:55:57,927 - INFO - Cross-Validation Score for station 11101: -0.0017121743404196628
2024-06-09 14:55:58,149 - INFO - Mean Squared Error for station 11101: 0.0006102745258722982
Training models:  36%|████████████████████████▊                                            | 18/50 [10:57<12:05, 22.68s/station]

Valid indices for station 11102: [127, 120, 131, 81, 82, 86, 89]


2024-06-09 14:56:35,417 - INFO - Cross-Validation Score for station 11102: -5.126212552971443e-05
2024-06-09 14:56:36,743 - INFO - Mean Squared Error for station 11102: 3.5698627526106823e-06
Training models:  38%|██████████████████████████▏                                          | 19/50 [11:36<14:11, 27.46s/station]

Valid indices for station 11103: [117, 104, 96, 1001, 83, 121, 115]


2024-06-09 14:57:19,605 - INFO - Cross-Validation Score for station 11103: -8.087307779327404e-06
2024-06-09 14:57:21,101 - INFO - Mean Squared Error for station 11103: 9.37595492267352e-06
Training models:  40%|███████████████████████████▌                                         | 20/50 [12:20<16:16, 32.54s/station]

Valid indices for station 11104: [86, 87, 89, 92, 122, 694]


2024-06-09 14:57:27,871 - INFO - Cross-Validation Score for station 11104: -0.0033374160578719613
2024-06-09 14:57:28,073 - INFO - Mean Squared Error for station 11104: 0.00036705809873027647
Training models:  42%|████████████████████████████▉                                        | 21/50 [12:27<12:00, 24.86s/station]

Valid indices for station 11105: [83, 98, 115, 112, 125, 105, 88, 101, 103, 123, 102]


2024-06-09 14:57:35,500 - INFO - Cross-Validation Score for station 11105: -0.0010280865939644556
2024-06-09 14:57:35,711 - INFO - Mean Squared Error for station 11105: 0.0010777666706157391
Training models:  44%|██████████████████████████████▎                                      | 22/50 [12:35<09:11, 19.69s/station]

Valid indices for station 11107: [119, 80, 118, 124, 128]


2024-06-09 14:58:13,392 - INFO - Cross-Validation Score for station 11107: -9.546971125784167e-06
2024-06-09 14:58:14,454 - INFO - Mean Squared Error for station 11107: 1.375082216493224e-05
Training models:  46%|███████████████████████████████▋                                     | 23/50 [13:14<11:26, 25.41s/station]

Valid indices for station 11109: [115, 125, 110, 113, 107, 129, 106, 105, 123]


2024-06-09 14:58:20,525 - INFO - Cross-Validation Score for station 11109: -0.0010852934924752027
2024-06-09 14:58:20,626 - INFO - Mean Squared Error for station 11109: 0.0005861853478604144
Training models:  48%|█████████████████████████████████                                    | 24/50 [13:20<08:30, 19.64s/station]

Valid indices for station 11110: [127, 94, 126, 708, 748, 92]


2024-06-09 14:58:52,973 - INFO - Cross-Validation Score for station 11110: -9.75171755443353e-05
2024-06-09 14:58:54,293 - INFO - Mean Squared Error for station 11110: 3.6342828637094744e-05
Training models:  50%|██████████████████████████████████▌                                  | 25/50 [13:54<09:56, 23.85s/station]

Valid indices for station 11111: [128, 127, 120, 81, 91, 126, 89]


2024-06-09 14:59:27,526 - INFO - Cross-Validation Score for station 11111: -9.849227585165687e-06
2024-06-09 14:59:28,284 - INFO - Mean Squared Error for station 11111: 4.994498395971715e-06
Training models:  52%|███████████████████████████████████▉                                 | 26/50 [14:28<10:45, 26.89s/station]

Valid indices for station 11112: [80, 118, 124, 128, 127, 91]


2024-06-09 15:00:12,422 - INFO - Cross-Validation Score for station 11112: -1.2260883311107968e-05
2024-06-09 15:00:14,892 - INFO - Mean Squared Error for station 11112: 2.9081421443558145e-06
Training models:  54%|█████████████████████████████████████▎                               | 27/50 [15:14<12:34, 32.81s/station]

Valid indices for station 11113: [115, 109, 116, 111, 125, 110, 113, 107, 129, 106, 105]


2024-06-09 15:01:00,614 - INFO - Cross-Validation Score for station 11113: -4.6041436276637574e-05
2024-06-09 15:01:02,143 - INFO - Mean Squared Error for station 11113: 1.687359316038114e-05
Training models:  56%|██████████████████████████████████████▋                              | 28/50 [16:01<13:37, 37.14s/station]

Valid indices for station 11114: [78, 130, 76, 77, 140, 119, 79]


2024-06-09 15:01:44,696 - INFO - Cross-Validation Score for station 11114: -4.603399952850085e-06
2024-06-09 15:01:46,658 - INFO - Mean Squared Error for station 11114: 9.638429087605852e-08
Training models:  58%|████████████████████████████████████████                             | 29/50 [16:46<13:46, 39.35s/station]

Valid indices for station 11115: [199, 120, 131, 176, 81, 82, 84, 86]


2024-06-09 15:02:20,953 - INFO - Cross-Validation Score for station 11115: -7.175379472714073e-06
2024-06-09 15:02:21,631 - INFO - Mean Squared Error for station 11115: 7.507934450159668e-07
Training models:  60%|█████████████████████████████████████████▍                           | 30/50 [17:21<12:40, 38.04s/station]

Valid indices for station 1116: [71, 52, 53, 64, 57, 132, 805, 760, 70, 753, 814, 804, 744, 757]


2024-06-09 15:03:17,292 - INFO - Cross-Validation Score for station 1116: -2.6524874196689178e-05
2024-06-09 15:03:21,356 - INFO - Mean Squared Error for station 1116: 1.361077616688274e-06
Training models:  62%|██████████████████████████████████████████▊                          | 31/50 [18:21<14:06, 44.54s/station]

Valid indices for station 1117: [8, 145, 43, 50, 73, 72, 20, 30, 135, 133, 69]


2024-06-09 15:04:11,612 - INFO - Cross-Validation Score for station 1117: -2.4059659828404928e-05
2024-06-09 15:04:13,446 - INFO - Mean Squared Error for station 1117: 6.736976751970007e-06
Training models:  64%|████████████████████████████████████████████▏                        | 32/50 [19:13<14:02, 46.81s/station]

Valid indices for station 1119: [20, 30, 135, 69, 134, 41, 42, 1140, 1141, 1142, 1143, 1144, 1147, 90, 1027]


2024-06-09 15:05:21,400 - INFO - Cross-Validation Score for station 1119: -9.031833060931568e-06
2024-06-09 15:05:24,915 - INFO - Mean Squared Error for station 1119: 3.1201134755650128e-06
Training models:  66%|█████████████████████████████████████████████▌                       | 33/50 [20:24<15:21, 54.21s/station]

Valid indices for station 1120: [1127, 20, 30, 135, 133, 69, 134, 42, 1141, 1143, 1147]


2024-06-09 15:06:21,844 - INFO - Cross-Validation Score for station 1120: -2.1127566286942383e-05
2024-06-09 15:06:24,845 - INFO - Mean Squared Error for station 1120: 1.5573104243261302e-05
Training models:  68%|██████████████████████████████████████████████▉                      | 34/50 [21:24<14:54, 55.92s/station]

Valid indices for station 11201: [176, 198, 152, 82, 84, 86, 137, 136, 85]


2024-06-09 15:07:42,968 - INFO - Cross-Validation Score for station 11201: -4.784858699523587e-05
2024-06-09 15:07:46,307 - INFO - Mean Squared Error for station 11201: 7.1109232535387895e-06
Training models:  70%|████████████████████████████████████████████████▎                    | 35/50 [22:46<15:53, 63.59s/station]

Valid indices for station 12001: [1131, 139, 138, 1145, 75, 1148]


2024-06-09 15:08:32,578 - INFO - Cross-Validation Score for station 12001: -3.8080811150452424e-06
2024-06-09 15:08:34,183 - INFO - Mean Squared Error for station 12001: 1.0264879451246038e-05
Training models:  72%|█████████████████████████████████████████████████▋                   | 36/50 [23:33<13:44, 58.87s/station]

Valid indices for station 12002: [192, 1131, 173, 193, 139, 138]


2024-06-09 15:08:42,662 - INFO - Cross-Validation Score for station 12002: -0.0015278056394928082
2024-06-09 15:08:42,871 - INFO - Mean Squared Error for station 12002: 0.0008639444180587991
Training models:  74%|███████████████████████████████████████████████████                  | 37/50 [23:42<09:29, 43.82s/station]

Valid indices for station 12005: [143, 172, 130, 144, 140, 119]


2024-06-09 15:09:38,691 - INFO - Cross-Validation Score for station 12005: -4.476791767432329e-05
2024-06-09 15:09:40,504 - INFO - Mean Squared Error for station 12005: 2.469022958567878e-06
Training models:  76%|████████████████████████████████████████████████████▍                | 38/50 [24:40<09:35, 47.96s/station]

Valid indices for station 12006: [269, 142, 173, 141, 193]


2024-06-09 15:10:43,608 - INFO - Cross-Validation Score for station 12006: -7.278908394662421e-06
2024-06-09 15:10:46,122 - INFO - Mean Squared Error for station 12006: 3.697227474336307e-06
Training models:  78%|█████████████████████████████████████████████████████▊               | 39/50 [25:45<09:45, 53.26s/station]

Valid indices for station 12007: [142, 173, 141, 193, 143, 172]


2024-06-09 15:11:53,571 - INFO - Cross-Validation Score for station 12007: -2.6986545601417176e-07
2024-06-09 15:11:54,870 - INFO - Mean Squared Error for station 12007: 1.4012938297762084e-07
Training models:  80%|███████████████████████████████████████████████████████▏             | 40/50 [26:54<09:39, 57.91s/station]

Valid indices for station 12008: [142, 143, 172, 144, 175, 140]


2024-06-09 15:13:00,315 - INFO - Cross-Validation Score for station 12008: -3.885074950979128e-06
2024-06-09 15:13:02,060 - INFO - Mean Squared Error for station 12008: 8.007899545975412e-06
Training models:  82%|████████████████████████████████████████████████████████▌            | 41/50 [28:01<09:06, 60.69s/station]

Valid indices for station 12009: [143, 172, 144, 175, 147, 140]


2024-06-09 15:14:04,652 - INFO - Cross-Validation Score for station 12009: -1.9127507494343697e-05
2024-06-09 15:14:08,071 - INFO - Mean Squared Error for station 12009: 1.8310373450150323e-07
Training models:  84%|█████████████████████████████████████████████████████████▉           | 42/50 [29:07<08:18, 62.29s/station]

Valid indices for station 1201: [8, 145, 43, 51, 50, 73, 74, 72, 133]


2024-06-09 15:14:17,658 - INFO - Cross-Validation Score for station 1201: -0.0008961434260177914
2024-06-09 15:14:17,926 - INFO - Mean Squared Error for station 1201: 0.0008584895586171736
Training models:  86%|███████████████████████████████████████████████████████████▎         | 43/50 [29:17<05:25, 46.56s/station]

Valid indices for station 12010: [168, 146, 196, 167, 155]


2024-06-09 15:14:58,923 - INFO - Cross-Validation Score for station 12010: -5.686674394099612e-05
2024-06-09 15:15:00,210 - INFO - Mean Squared Error for station 12010: 4.568258908608307e-06
Training models:  88%|████████████████████████████████████████████████████████████▋        | 44/50 [29:59<04:31, 45.28s/station]

Valid indices for station 12011: [144, 175, 147, 171, 158, 199]


2024-06-09 15:15:10,519 - INFO - Cross-Validation Score for station 12011: -0.0013821083024350127
2024-06-09 15:15:10,910 - INFO - Mean Squared Error for station 12011: 0.0012604058771132091
Training models:  90%|██████████████████████████████████████████████████████████████       | 45/50 [30:10<02:54, 34.90s/station]

Valid indices for station 12013: [158, 178, 148, 159, 176]


2024-06-09 15:15:51,497 - INFO - Cross-Validation Score for station 12013: -1.3848225050050136e-05
2024-06-09 15:15:53,478 - INFO - Mean Squared Error for station 12013: 1.8490932893175017e-06
Training models:  92%|███████████████████████████████████████████████████████████████▍     | 46/50 [30:53<02:28, 37.20s/station]

Valid indices for station 12015: [152, 680, 149, 150, 85]


2024-06-09 15:16:39,328 - INFO - Cross-Validation Score for station 12015: -5.270456083483028e-05
2024-06-09 15:16:41,182 - INFO - Mean Squared Error for station 12015: 1.842342395723786e-05
Training models:  94%|████████████████████████████████████████████████████████████████▊    | 47/50 [31:40<02:01, 40.35s/station]

Valid indices for station 12016: [680, 149, 150, 151]
Valid indices for station 12016: [179, 146, 196, 198, 152, 165, 166, 153, 680, 149, 167, 150, 155, 151, 137, 136, 85, 719, 692, 686, 749, 740, 197, 682, 685]


2024-06-09 15:17:50,831 - INFO - Cross-Validation Score for station 12016: -2.311510708353459e-05
2024-06-09 15:17:53,788 - INFO - Mean Squared Error for station 12016: 1.9280515208504337e-05
Training models:  96%|██████████████████████████████████████████████████████████████████▏  | 48/50 [32:53<01:40, 50.03s/station]

Valid indices for station 12017: [150, 155, 151, 682]
Valid indices for station 12017: [146, 196, 152, 153, 680, 149, 167, 150, 155, 151, 85, 686, 749, 687, 195, 197, 154, 722, 682, 684, 685]


2024-06-09 15:18:45,413 - INFO - Cross-Validation Score for station 12017: -1.4479296512144236e-05
2024-06-09 15:18:46,430 - INFO - Mean Squared Error for station 12017: 2.9206573659434223e-05
Training models:  98%|███████████████████████████████████████████████████████████████████▌ | 49/50 [33:46<00:50, 50.81s/station]

Valid indices for station 12018: [198, 152, 153, 680, 149, 137, 136, 85]


2024-06-09 15:19:42,221 - INFO - Cross-Validation Score for station 12018: -0.0003230846812102185
2024-06-09 15:19:45,434 - INFO - Mean Squared Error for station 12018: 1.7649660817461223e-05
Training models: 100%|█████████████████████████████████████████████████████████████████████| 50/50 [34:45<00:00, 41.70s/station]
2024-06-09 15:19:50,073 - INFO - Intermediate results saved for batch 1


In [6]:
scaler_file_path = get_absolute_path('../data/scaler_final.pkl')
model_file_path = get_absolute_path('../data/model_final.pkl')
nearby_stations_file_path = get_absolute_path('../data/nearby_stations_final.pkl')

In [7]:
with open(scaler_file_path, 'wb') as f:
    pickle.dump(all_scalers, f)
print(f"Scaler saved to {scaler_file_path}")

with open(model_file_path, 'wb') as f:
    pickle.dump(all_models, f)
print(f"Models saved to {model_file_path}")

with open(nearby_stations_file_path, 'wb') as f:
    pickle.dump(all_nearby_station_results, f)
print(f"Nearby station results saved to {nearby_stations_file_path}")

print("Model training complete.")

Scaler saved to /Users/anthonybellon/Comp_Documents/VelibVisualisation/data/scaler_final.pkl
Models saved to /Users/anthonybellon/Comp_Documents/VelibVisualisation/data/model_final.pkl
Nearby station results saved to /Users/anthonybellon/Comp_Documents/VelibVisualisation/data/nearby_stations_final.pkl
Model training complete.


In [None]:
# Save the notebook (optional)
!jupyter nbconvert --to notebook --execute train_model.ipynb

[NbConvertApp] Converting notebook train_model.ipynb to notebook
