In [1]:
import os
import pandas as pd
import numpy as np
import datetime

In [2]:
!find data -type f -name '*.csv'

data/swpc/kpindices-2022.csv
data/swpc/kpindices-2023.csv
data/dscovr/dsc_fc_summed_spectra_2022_v01.csv
data/dscovr/dsc_fc_summed_spectra_2023_v01.csv


In [145]:
x_df = pd.read_csv("./data/dscovr/dsc_fc_summed_spectra_2022_v01.csv", delimiter = ',', parse_dates=[0], infer_datetime_format=True, na_values='0', header = None)
y_df = pd.read_csv("./data/swpc/kpindices-2022.csv", delimiter = ',', parse_dates=[0], infer_datetime_format=True, na_values='0', header = 0)

In [146]:
x_df.describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,44,45,46,47,48,49,50,51,52,53
count,524450.0,524450.0,524450.0,511816.0,511811.0,511809.0,511809.0,511815.0,511815.0,511814.0,...,20115.0,11191.0,8416.0,3971.0,3859.0,213.0,139.0,44.0,40.0,35.0
mean,0.098939,-0.219225,0.060599,58.218162,9.514369,63.246343,56.563212,74.062121,65.428809,94.261894,...,384.711976,332.966637,388.589832,326.915073,294.150454,390.130451,384.123885,376.326977,403.068593,367.789749
std,3.943064,4.535931,3.667523,38.506281,17.031107,44.696797,45.047853,60.528255,82.509457,114.361904,...,40.774888,48.215582,38.280578,57.801942,85.771695,52.754546,51.58345,69.89348,124.156716,143.007012
min,-16.7123,-26.3765,-23.06,0.231726,0.231726,0.231726,0.231726,0.231726,0.231726,0.231726,...,214.319,188.544,211.826,205.87,108.95,202.247,112.29,187.778,0.231726,89.9962
25%,-3.00014,-3.305092,-1.967988,28.843,0.231726,32.3525,27.916,37.30155,25.6404,36.973425,...,365.23,302.5325,367.20375,274.956,211.853,359.223,352.9465,354.073,366.69175,241.622
50%,0.212469,-0.288369,0.034746,51.9893,0.386813,55.5706,46.9776,61.3553,45.4702,68.83805,...,383.495,326.713,393.8075,323.581,309.98,389.128,395.347,383.1295,427.067,403.663
75%,3.164427,3.017987,2.039108,84.7379,15.8128,90.0339,80.5927,100.403,84.1425,114.472,...,404.069,363.7825,409.08075,377.611,367.0345,416.967,410.484,411.97125,471.866,449.1185
max,19.7253,22.8347,27.9059,415.389,385.676,493.748,746.807,1136.67,1562.55,1804.56,...,662.013,735.132,772.122,521.745,541.136,637.731,568.053,528.47,655.892,646.111


In [147]:
y_df.head()

Unnamed: 0,date,3,6,9,12,15,18,21,24
0,2022-04-01,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0
1,2022-04-02,4.0,5.0,4.0,2.0,2.0,3.0,4.0,3.0
2,2022-04-03,4.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0
3,2022-04-04,4.0,4.0,3.0,2.0,2.0,2.0,2.0,1.0
4,2022-04-05,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0


We can see that the timestamp is different between data sources and we will need to fix that. Because we want to train on a relative measure of time we will convert the data into training examples with an integer ordinal number.

What other changes will be needed? 

- convert date and time to an ordinal count of seconds since the start time of one example
- convert the kp indices to an integer
- transform dscovr NaNs into 0 (for now - maybe there is a better value to use) 
- filter kp rows that have invalid data (-1s)
- normalize dscovr mag field values
- normalize dscovr solar wind values

The last transform we need to make is to organize the data from dscovr into training examples having a shape of (4320, 53, 2). In axis 0 we have 4,320 minutes per 3 days.
In axis 1 we have the 53 data points for each minute.
In axis 2 we have the ISO date timestamp of the Kp index we are learning and the Kp index we want to learn to predict.

Note that we will generate a new training example for every 3 hour Kp index for each day so we want to cache the pr with evious data and shift it for efficiency.

We need to save our training set to files with random training/valid/test splits. We will split 10% of the examples for validation and save them as 'valid.h5' and the remaining 90% will be our training examples stored as 'train.h5'.which is

We still don't have a held-out test set for evaluating the performance of our model. We will use the 2022 data for training and validation sets and the 2023 data as our test set. Of course, you could come back and change this by combining 2022 and 2023 into one big set then splitting it 3 ways, maybe with an 80/10/10 split which is commonly used.



First we will remove any rows where the timestamp is missing

In [148]:
x_df = x_df.drop(x_df[x_df[1].isna()].index)
print(x_df.shape)

(524450, 54)


Now we will remove any rows where all of the solar wind values are missing (NaN)

In [149]:
x_df = x_df.drop(x_df[x_df.loc[:,4:].isnull().all(1)].index)
print(x_df.shape)

(511911, 54)


Now let's replace any remaining NaNs with 0

In [150]:
print(x_df.shape)
values = {}
for i in range(4, 54):
  values[i] = 0
print(values)
x_df = x_df.fillna(values)
print(x_df.shape)

(511911, 54)
{4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0, 16: 0, 17: 0, 18: 0, 19: 0, 20: 0, 21: 0, 22: 0, 23: 0, 24: 0, 25: 0, 26: 0, 27: 0, 28: 0, 29: 0, 30: 0, 31: 0, 32: 0, 33: 0, 34: 0, 35: 0, 36: 0, 37: 0, 38: 0, 39: 0, 40: 0, 41: 0, 42: 0, 43: 0, 44: 0, 45: 0, 46: 0, 47: 0, 48: 0, 49: 0, 50: 0, 51: 0, 52: 0, 53: 0}
(511911, 54)


In [151]:
x_df.describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,44,45,46,47,48,49,50,51,52,53
count,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,...,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0,511911.0
mean,0.115983,-0.245809,0.071499,58.207358,9.51251,63.233741,56.551942,74.048232,65.416539,94.244032,...,15.116849,7.279058,6.388556,2.535948,2.21743,0.162329,0.104302,0.032346,0.031495,0.025146
std,3.950377,4.545212,3.67673,38.510873,17.029962,44.701258,45.05044,60.531076,82.506584,114.358428,...,75.182786,49.208948,49.656902,29.129386,26.51012,8.028428,6.385241,3.547122,3.723996,3.256721
min,-16.7123,-26.3765,-23.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-2.98153,-3.328865,-1.954055,28.8294,0.231726,32.3374,27.89995,37.2888,25.6254,36.95785,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.237204,-0.323418,0.042795,51.9797,0.386135,55.5538,46.9678,61.3369,45.4631,68.8172,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,3.17778,2.994075,2.049095,84.7337,15.8097,90.02565,80.5821,100.3975,84.1298,114.466,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,19.7253,22.8347,27.9059,415.389,385.676,493.748,746.807,1136.67,1562.55,1804.56,...,662.013,735.132,772.122,521.745,541.136,637.731,568.053,528.47,655.892,646.111


In [152]:
M = y_df.shape[0]*(y_df.shape[1]-1)
print(M, 'Kp indices')
print(4320*x_df.shape[1], 'DSCOVR data points for each Kp index')

2920 Kp indices
233280 DSCOVR data points for each Kp index


In [153]:
x_M = int(x_df.shape[0] / 4320)
print(y_df.shape)

(365, 9)


In [154]:
from sklearn.model_selection import train_test_split

train_ydf, valid_ydf = train_test_split(y_df, test_size=0.1)

In [155]:
train_M = train_ydf.shape[0] * (train_ydf.shape[1]-1)
valid_M = valid_ydf.shape[0] * (valid_ydf.shape[1]-1)
print(train_M, ' training examples and ', valid_M, ' validation examples')

2624  training examples and  296  validation examples


In [156]:
from datetime import timedelta
def get_X(t, indf):
  match = indf.loc[:,0]==t
  if indf[match].shape[0] == 1:
    x_idx = indf[match].index[0]
  else:
    x_idx = -1
    for h in range(24):
      nt = pd.to_datetime(t) - timedelta(hours=h)
      match = indf.loc[:,0]==nt
      if indf[match].shape[0] == 1:
        x_idx = indf[match].index[0]
        break
  return x_idx, x_idx+4320

Check that get_X performs as expected

In [157]:
t = valid_ydf.iloc[0,0]
begin, end = get_X(t, x_df)
print('Start at ',x_df.loc[begin,0])
print('End   at ',x_df.loc[end,0])
print(valid_M / 8)

Start at  2022-09-24 00:00:00
End   at  2022-09-27 00:00:00
37.0


Now step through each validation example and construct one training example with 4320 one minute (3 days) solar wind samples 

In [161]:
valid_X = np.zeros((valid_M, 4320, 53))
valid_Y = np.zeros((valid_M, 1))
for m in range(valid_M):
  y_0 = int(m / 8)
  y_1 = m % 8
  print(m, y_0, y_1, valid_ydf.shape, t)
  t = valid_ydf.iloc[y_0, 0]
  #dt = datetime.datetime.strptime(t, '%Y%m%d')
  #print(t, dt)
  seq = get_X(t, x_df)
  # check for exactly 3 days of data
  print(seq)
  if seq[1] < x_df.shape[0]:
    print(m, 'Start at ',x_df.iloc[seq[0],0], ' for ', x_df.iloc[seq[1],0] - x_df.iloc[seq[0],0])
    if x_df.iloc[seq[1],0] - x_df.iloc[seq[0],0] == '3 days 00:00:00':
      #print(seq[1], x_df.shape[0])
      valid_X[m] = x_df.iloc[seq[0]:seq[1],1:].to_numpy()
      valid_Y[m] = valid_ydf.iloc[y_0, y_1+1]
      break

0 0 0 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
0 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
1 0 1 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
1 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
2 0 2 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
2 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
3 0 3 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
3 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
4 0 4 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
4 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
5 0 5 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
5 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
6 0 6 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
6 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
7 0 7 (37, 9) 2022-09-24 00:00:00
(383040, 387360)
7 Start at  2022-10-01 14:12:00  for  3 days 00:00:00
8 1 0 (37, 9) 2022-09-24 00:00:00
(375840, 380160)
8 Start at  2022-09-26 14:10:00  for  3 days 00:02:00
9 1 1 (37, 9) 2022-09-19 00:00:00
(375840, 380160)
9 St

(416160, 420480)
152 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
153 19 1 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
153 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
154 19 2 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
154 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
155 19 3 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
155 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
156 19 4 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
156 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
157 19 5 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
157 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
158 19 6 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
158 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
159 19 7 (37, 9) 2022-10-17 00:00:00
(416160, 420480)
159 Start at  2022-10-24 14:32:00  for  3 days 03:57:00
160 20 0 (37, 9) 2022-10-17 00:00:00
(243360, 247680)
160 Start at  2022-06-24 12:05:00  for  3 days 00:23:00
161 20 1 (37, 9) 2022-06-19 00:00:00
(243360, 2

In [162]:
train_X = np.zeros((train_M, 4320, 53))
train_Y = np.zeros((train_M, 1))
for m in range(train_M):
  y_0 = int(m / 8)
  y_1 = m % 8
  t = train_ydf.iloc[y_0, 0]
  print(m, y_0, y_1, train_ydf.shape, t)
  #dt = datetime.datetime.strptime(t, '%Y%m%d')
  #print(t, dt)
  seq = get_X(t, x_df)
  # check for exactly 3 days of data
  print(seq)
  if seq[1] < x_df.shape[0]:
    print(m, 'Start at ',x_df.iloc[seq[0],0], ' for ', x_df.iloc[seq[1],0] - x_df.iloc[seq[0],0])
    if x_df.iloc[seq[1],0] - x_df.iloc[seq[0],0] == '3 days 00:00:00':
      #print(seq[1], x_df.shape[0])
      train_X[m] = x_df.iloc[seq[0]:seq[1],1:].to_numpy()
      train_Y[m] = train_ydf.iloc[y_0, y_1+1]
      break

0 0 0 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
0 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
1 0 1 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
1 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
2 0 2 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
2 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
3 0 3 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
3 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
4 0 4 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
4 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
5 0 5 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
5 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
6 0 6 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
6 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
7 0 7 (328, 9) 2022-11-25 00:00:00
(472320, 476640)
7 Start at  2022-12-03 07:53:00  for  3 days 00:00:00
8 1 0 (328, 9) 2022-10-24 00:00:00
(426240, 430560)
8 Start at  2022-10-31 18:52:00  for  3 days 00:03:00
9 1 1 (328, 9) 2022-10-24 00:00:00
(426240, 43

(452160, 456480)
153 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
154 19 2 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
154 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
155 19 3 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
155 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
156 19 4 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
156 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
157 19 5 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
157 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
158 19 6 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
158 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
159 19 7 (328, 9) 2022-11-11 00:00:00
(452160, 456480)
159 Start at  2022-11-19 01:01:00  for  3 days 00:00:00
160 20 0 (328, 9) 2022-05-10 00:00:00
(185760, 190080)
160 Start at  2022-05-15 00:01:00  for  3 days 00:00:00
161 20 1 (328, 9) 2022-05-10 00:00:00
(185760, 190080)
161 Start at  2022-05-15 00:01:00  for  3 days 00:00:00
162 20 2 (328, 9) 2022-05-10 00:00:00
(

(5760, 10080)
308 Start at  2022-01-05 09:27:00  for  3 days 09:42:00
309 38 5 (328, 9) 2022-01-05 00:00:00
(5760, 10080)
309 Start at  2022-01-05 09:27:00  for  3 days 09:42:00
310 38 6 (328, 9) 2022-01-05 00:00:00
(5760, 10080)
310 Start at  2022-01-05 09:27:00  for  3 days 09:42:00
311 38 7 (328, 9) 2022-01-05 00:00:00
(5760, 10080)
311 Start at  2022-01-05 09:27:00  for  3 days 09:42:00
312 39 0 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
312 Start at  2022-03-08 21:54:00  for  3 days 00:01:00
313 39 1 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
313 Start at  2022-03-08 21:54:00  for  3 days 00:01:00
314 39 2 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
314 Start at  2022-03-08 21:54:00  for  3 days 00:01:00
315 39 3 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
315 Start at  2022-03-08 21:54:00  for  3 days 00:01:00
316 39 4 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
316 Start at  2022-03-08 21:54:00  for  3 days 00:01:00
317 39 5 (328, 9) 2022-03-05 00:00:00
(90720, 95040)
317 Star

(408960, 413280)
461 Start at  2022-10-19 14:29:00  for  3 days 00:03:00
462 57 6 (328, 9) 2022-10-12 00:00:00
(408960, 413280)
462 Start at  2022-10-19 14:29:00  for  3 days 00:03:00
463 57 7 (328, 9) 2022-10-12 00:00:00
(408960, 413280)
463 Start at  2022-10-19 14:29:00  for  3 days 00:03:00
464 58 0 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
464 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
465 58 1 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
465 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
466 58 2 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
466 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
467 58 3 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
467 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
468 58 4 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
468 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
469 58 5 (328, 9) 2022-12-04 00:00:00
(485280, 489600)
469 Start at  2022-12-12 07:59:00  for  3 days 01:08:00
470 58 6 (328, 9) 2022-12-04 00:00:00
(

(289440, 293760)
614 Start at  2022-07-28 04:47:00  for  3 days 00:00:00
615 76 7 (328, 9) 2022-07-21 00:00:00
(289440, 293760)
615 Start at  2022-07-28 04:47:00  for  3 days 00:00:00
616 77 0 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
616 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
617 77 1 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
617 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
618 77 2 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
618 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
619 77 3 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
619 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
620 77 4 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
620 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
621 77 5 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
621 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
622 77 6 (328, 9) 2022-02-06 00:00:00
(51840, 56160)
622 Start at  2022-02-09 05:52:00  for  3 days 00:13:00
623 77 7 (328, 9) 2022-02-06 00:00:00
(51840, 56160)


(88740, 93060)
740 Start at  2022-03-07 12:10:00  for  3 days 00:45:00
741 92 5 (328, 9) 2022-03-04 00:00:00
(88740, 93060)
741 Start at  2022-03-07 12:10:00  for  3 days 00:45:00
742 92 6 (328, 9) 2022-03-04 00:00:00
(88740, 93060)
742 Start at  2022-03-07 12:10:00  for  3 days 00:45:00
743 92 7 (328, 9) 2022-03-04 00:00:00
(88740, 93060)
743 Start at  2022-03-07 12:10:00  for  3 days 00:45:00
744 93 0 (328, 9) 2022-05-07 00:00:00
(181440, 185760)
744 Start at  2022-05-12 00:01:00  for  3 days 00:00:00
745 93 1 (328, 9) 2022-05-07 00:00:00
(181440, 185760)
745 Start at  2022-05-12 00:01:00  for  3 days 00:00:00
746 93 2 (328, 9) 2022-05-07 00:00:00
(181440, 185760)
746 Start at  2022-05-12 00:01:00  for  3 days 00:00:00
747 93 3 (328, 9) 2022-05-07 00:00:00
(181440, 185760)
747 Start at  2022-05-12 00:01:00  for  3 days 00:00:00
748 93 4 (328, 9) 2022-05-07 00:00:00
(181440, 185760)
748 Start at  2022-05-12 00:01:00  for  3 days 00:00:00
749 93 5 (328, 9) 2022-05-07 00:00:00
(181440, 

(354240, 358560)
875 Start at  2022-09-11 12:33:00  for  3 days 00:00:00
876 109 4 (328, 9) 2022-09-04 00:00:00
(354240, 358560)
876 Start at  2022-09-11 12:33:00  for  3 days 00:00:00
877 109 5 (328, 9) 2022-09-04 00:00:00
(354240, 358560)
877 Start at  2022-09-11 12:33:00  for  3 days 00:00:00
878 109 6 (328, 9) 2022-09-04 00:00:00
(354240, 358560)
878 Start at  2022-09-11 12:33:00  for  3 days 00:00:00
879 109 7 (328, 9) 2022-09-04 00:00:00
(354240, 358560)
879 Start at  2022-09-11 12:33:00  for  3 days 00:00:00
880 110 0 (328, 9) 2022-11-13 00:00:00
(455040, 459360)
880 Start at  2022-11-21 01:01:00  for  3 days 03:45:00
881 110 1 (328, 9) 2022-11-13 00:00:00
(455040, 459360)
881 Start at  2022-11-21 01:01:00  for  3 days 03:45:00
882 110 2 (328, 9) 2022-11-13 00:00:00
(455040, 459360)
882 Start at  2022-11-21 01:01:00  for  3 days 03:45:00
883 110 3 (328, 9) 2022-11-13 00:00:00
(455040, 459360)
883 Start at  2022-11-21 01:01:00  for  3 days 03:45:00
884 110 4 (328, 9) 2022-11-13 0

(302400, 306720)
1028 Start at  2022-08-06 04:48:00  for  3 days 00:01:00
1029 128 5 (328, 9) 2022-07-30 00:00:00
(302400, 306720)
1029 Start at  2022-08-06 04:48:00  for  3 days 00:01:00
1030 128 6 (328, 9) 2022-07-30 00:00:00
(302400, 306720)
1030 Start at  2022-08-06 04:48:00  for  3 days 00:01:00
1031 128 7 (328, 9) 2022-07-30 00:00:00
(302400, 306720)
1031 Start at  2022-08-06 04:48:00  for  3 days 00:01:00
1032 129 0 (328, 9) 2022-07-09 00:00:00
(272160, 276480)
1032 Start at  2022-07-15 21:29:00  for  3 days 00:00:00
1033 129 1 (328, 9) 2022-07-09 00:00:00
(272160, 276480)
1033 Start at  2022-07-15 21:29:00  for  3 days 00:00:00
1034 129 2 (328, 9) 2022-07-09 00:00:00
(272160, 276480)
1034 Start at  2022-07-15 21:29:00  for  3 days 00:00:00
1035 129 3 (328, 9) 2022-07-09 00:00:00
(272160, 276480)
1035 Start at  2022-07-15 21:29:00  for  3 days 00:00:00
1036 129 4 (328, 9) 2022-07-09 00:00:00
(272160, 276480)
1036 Start at  2022-07-15 21:29:00  for  3 days 00:00:00
1037 129 5 (32

(325440, 329760)
1143 Start at  2022-08-22 09:18:00  for  3 days 00:00:00
1144 143 0 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1144 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1145 143 1 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1145 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1146 143 2 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1146 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1147 143 3 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1147 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1148 143 4 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1148 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1149 143 5 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1149 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1150 143 6 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1150 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1151 143 7 (328, 9) 2022-07-08 00:00:00
(270720, 275040)
1151 Start at  2022-07-14 02:45:00  for  3 days 18:44:00
1152 144 0 (32

(509760, 514080)
1297 162 1 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1298 162 2 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1299 162 3 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1300 162 4 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1301 162 5 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1302 162 6 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1303 162 7 (328, 9) 2022-12-21 00:00:00
(509760, 514080)
1304 163 0 (328, 9) 2022-11-09 00:00:00
(449280, 453600)
1304 Start at  2022-11-17 01:01:00  for  3 days 00:00:00
1305 163 1 (328, 9) 2022-11-09 00:00:00
(449280, 453600)
1305 Start at  2022-11-17 01:01:00  for  3 days 00:00:00
1306 163 2 (328, 9) 2022-11-09 00:00:00
(449280, 453600)
1306 Start at  2022-11-17 01:01:00  for  3 days 00:00:00
1307 163 3 (328, 9) 2022-11-09 00:00:00
(449280, 453600)
1307 Start at  2022-11-17 01:01:00  for  3 days 00:00:00
1308 163 4 (328, 9) 2022-11-09 00:00:00
(449280, 453600)
1308 Start at  2022-11-17 01:01:00  for  3 days 00:00:00
1309 163 5 (32

(319680, 324000)
1451 Start at  2022-08-18 09:11:00  for  3 days 00:07:00
1452 181 4 (328, 9) 2022-08-11 00:00:00
(319680, 324000)
1452 Start at  2022-08-18 09:11:00  for  3 days 00:07:00
1453 181 5 (328, 9) 2022-08-11 00:00:00
(319680, 324000)
1453 Start at  2022-08-18 09:11:00  for  3 days 00:07:00
1454 181 6 (328, 9) 2022-08-11 00:00:00
(319680, 324000)
1454 Start at  2022-08-18 09:11:00  for  3 days 00:07:00
1455 181 7 (328, 9) 2022-08-11 00:00:00
(319680, 324000)
1455 Start at  2022-08-18 09:11:00  for  3 days 00:07:00
1456 182 0 (328, 9) 2022-11-04 00:00:00
(442080, 446400)
1456 Start at  2022-11-12 01:00:00  for  3 days 00:01:00
1457 182 1 (328, 9) 2022-11-04 00:00:00
(442080, 446400)
1457 Start at  2022-11-12 01:00:00  for  3 days 00:01:00
1458 182 2 (328, 9) 2022-11-04 00:00:00
(442080, 446400)
1458 Start at  2022-11-12 01:00:00  for  3 days 00:01:00
1459 182 3 (328, 9) 2022-11-04 00:00:00
(442080, 446400)
1459 Start at  2022-11-12 01:00:00  for  3 days 00:01:00
1460 182 4 (32

(126720, 131040)
1604 Start at  2022-04-03 15:15:00  for  3 days 00:00:00
1605 200 5 (328, 9) 2022-03-30 00:00:00
(126720, 131040)
1605 Start at  2022-04-03 15:15:00  for  3 days 00:00:00
1606 200 6 (328, 9) 2022-03-30 00:00:00
(126720, 131040)
1606 Start at  2022-04-03 15:15:00  for  3 days 00:00:00
1607 200 7 (328, 9) 2022-03-30 00:00:00
(126720, 131040)
1607 Start at  2022-04-03 15:15:00  for  3 days 00:00:00
1608 201 0 (328, 9) 2022-10-27 00:00:00
(430560, 434880)
1608 Start at  2022-11-03 18:55:00  for  3 days 00:01:00
1609 201 1 (328, 9) 2022-10-27 00:00:00
(430560, 434880)
1609 Start at  2022-11-03 18:55:00  for  3 days 00:01:00
1610 201 2 (328, 9) 2022-10-27 00:00:00
(430560, 434880)
1610 Start at  2022-11-03 18:55:00  for  3 days 00:01:00
1611 201 3 (328, 9) 2022-10-27 00:00:00
(430560, 434880)
1611 Start at  2022-11-03 18:55:00  for  3 days 00:01:00
1612 201 4 (328, 9) 2022-10-27 00:00:00
(430560, 434880)
1612 Start at  2022-11-03 18:55:00  for  3 days 00:01:00
1613 201 5 (32

(332640, 336960)
1758 Start at  2022-08-27 09:18:00  for  3 days 00:00:00
1759 219 7 (328, 9) 2022-08-20 00:00:00
(332640, 336960)
1759 Start at  2022-08-27 09:18:00  for  3 days 00:00:00
1760 220 0 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1760 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1761 220 1 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1761 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1762 220 2 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1762 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1763 220 3 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1763 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1764 220 4 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1764 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1765 220 5 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1765 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1766 220 6 (328, 9) 2022-05-15 00:00:00
(192960, 197280)
1766 Start at  2022-05-20 00:01:00  for  3 days 00:00:00
1767 220 7 (32

(501120, 505440)
1911 Start at  2022-12-23 14:32:00  for  3 days 05:32:00
1912 239 0 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1912 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1913 239 1 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1913 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1914 239 2 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1914 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1915 239 3 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1915 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1916 239 4 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1916 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1917 239 5 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1917 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1918 239 6 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1918 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1919 239 7 (328, 9) 2022-04-28 00:00:00
(168480, 172800)
1919 Start at  2022-05-02 23:58:00  for  3 days 00:00:00
1920 240 0 (32

(67680, 72000)
2065 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2066 258 2 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2066 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2067 258 3 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2067 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2068 258 4 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2068 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2069 258 5 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2069 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2070 258 6 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2070 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2071 258 7 (328, 9) 2022-02-17 00:00:00
(67680, 72000)
2071 Start at  2022-02-20 06:05:00  for  3 days 00:00:00
2072 259 0 (328, 9) 2022-02-13 00:00:00
(61920, 66240)
2072 Start at  2022-02-16 06:05:00  for  3 days 00:00:00
2073 259 1 (328, 9) 2022-02-13 00:00:00
(61920, 66240)
2073 Start at  2022-02-16 06:05:00  for  3 days 00:00:00
2074 259 2 (328, 9) 2022-02-13 0

(423360, 427680)
2200 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2201 275 1 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2201 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2202 275 2 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2202 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2203 275 3 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2203 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2204 275 4 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2204 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2205 275 5 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2205 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2206 275 6 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2206 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2207 275 7 (328, 9) 2022-10-22 00:00:00
(423360, 427680)
2207 Start at  2022-10-29 18:52:00  for  3 days 00:02:00
2208 276 0 (328, 9) 2022-12-20 00:00:00
(508320, 512640)
2209 276 1 (328, 9) 2022-12-20 00:00:00
(508320, 512640)
2210 276 2 (32

(99360, 103680)
2354 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2355 294 3 (328, 9) 2022-03-11 00:00:00
(99360, 103680)
2355 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2356 294 4 (328, 9) 2022-03-11 00:00:00
(99360, 103680)
2356 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2357 294 5 (328, 9) 2022-03-11 00:00:00
(99360, 103680)
2357 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2358 294 6 (328, 9) 2022-03-11 00:00:00
(99360, 103680)
2358 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2359 294 7 (328, 9) 2022-03-11 00:00:00
(99360, 103680)
2359 Start at  2022-03-14 21:56:00  for  3 days 01:17:00
2360 295 0 (328, 9) 2022-07-15 00:00:00
(280380, 284700)
2360 Start at  2022-07-21 21:41:00  for  3 days 00:06:00
2361 295 1 (328, 9) 2022-07-15 00:00:00
(280380, 284700)
2361 Start at  2022-07-21 21:41:00  for  3 days 00:06:00
2362 295 2 (328, 9) 2022-07-15 00:00:00
(280380, 284700)
2362 Start at  2022-07-21 21:41:00  for  3 days 00:06:00
2363 295 3 (328, 9) 

(115980, 120300)
2443 Start at  2022-03-27 00:36:00  for  3 days 03:36:00
2444 305 4 (328, 9) 2022-03-23 00:00:00
(115980, 120300)
2444 Start at  2022-03-27 00:36:00  for  3 days 03:36:00
2445 305 5 (328, 9) 2022-03-23 00:00:00
(115980, 120300)
2445 Start at  2022-03-27 00:36:00  for  3 days 03:36:00
2446 305 6 (328, 9) 2022-03-23 00:00:00
(115980, 120300)
2446 Start at  2022-03-27 00:36:00  for  3 days 03:36:00
2447 305 7 (328, 9) 2022-03-23 00:00:00
(115980, 120300)
2447 Start at  2022-03-27 00:36:00  for  3 days 03:36:00
2448 306 0 (328, 9) 2022-04-15 00:00:00
(149760, 154080)
2448 Start at  2022-04-19 17:17:00  for  3 days 03:07:00
2449 306 1 (328, 9) 2022-04-15 00:00:00
(149760, 154080)
2449 Start at  2022-04-19 17:17:00  for  3 days 03:07:00
2450 306 2 (328, 9) 2022-04-15 00:00:00
(149760, 154080)
2450 Start at  2022-04-19 17:17:00  for  3 days 03:07:00
2451 306 3 (328, 9) 2022-04-15 00:00:00
(149760, 154080)
2451 Start at  2022-04-19 17:17:00  for  3 days 03:07:00
2452 306 4 (32

(522720, 527040)
2570 321 2 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2571 321 3 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2572 321 4 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2573 321 5 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2574 321 6 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2575 321 7 (328, 9) 2022-12-30 00:00:00
(522720, 527040)
2576 322 0 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2576 Start at  2022-01-01 00:00:00  for  3 days 05:48:00
2577 322 1 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2577 Start at  2022-01-01 00:00:00  for  3 days 05:48:00
2578 322 2 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2578 Start at  2022-01-01 00:00:00  for  3 days 05:48:00
2579 322 3 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2579 Start at  2022-01-01 00:00:00  for  3 days 05:48:00
2580 322 4 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2580 Start at  2022-01-01 00:00:00  for  3 days 05:48:00
2581 322 5 (328, 9) 2022-01-01 00:00:00
(0, 4320)
2581 Start at  2022-01-01 00:00:00  for  3 days 05:48:00

In [163]:
print(train_X.shape, train_Y.shape)
print(valid_X.shape, valid_Y.shape)

(2624, 4320, 53) (2624, 1)
(296, 4320, 53) (296, 1)


In [164]:
with open('train.dat', 'wb') as f:
    np.save(f, train_X)
    np.save(f, train_Y)

In [165]:
with open('valid.dat', 'wb') as f:
    np.save(f, valid_X)
    np.save(f, valid_Y)