In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
orders = pd.read_csv("orders.csv.zip", sep="|")

In [3]:
orders['time'] = pd.to_datetime(orders['time'])
orders['day_of_week'] = orders['time'].dt.day_of_week
orders['week_of_month'] = (orders['time'].dt.day - 1) // 7 + 1
orders['orderValue'] = orders['order'] * orders['salesPrice']

In [4]:
orders

Unnamed: 0,time,transactID,itemID,order,salesPrice,day_of_week,week_of_month,orderValue
0,2018-01-01 00:01:56,2278968,450,1,17.42,0,1,17.42
1,2018-01-01 00:01:56,2278968,83,1,5.19,0,1,5.19
2,2018-01-01 00:07:11,2255797,7851,2,20.47,0,1,40.94
3,2018-01-01 00:09:24,2278968,450,1,17.42,0,1,17.42
4,2018-01-01 00:09:24,2278968,83,1,5.19,0,1,5.19
...,...,...,...,...,...,...,...,...
2181950,2018-06-29 23:54:22,2040347,9217,1,23.50,4,5,23.50
2181951,2018-06-29 23:55:09,2260943,2175,1,11.60,4,5,11.60
2181952,2018-06-29 23:55:09,2260943,2061,1,1.03,4,5,1.03
2181953,2018-06-29 23:55:09,2260943,2195,1,18.73,4,5,18.73


In [5]:
total_orders = orders.groupby([orders['time'].dt.date, orders['itemID']])['order'].sum().unstack(1).fillna(0)
total_values = orders.groupby([orders['time'].dt.date, orders['itemID']])['orderValue'].sum().unstack(1).fillna(0)
total_orders.isna().sum().any()

np.False_

In [6]:
def split_series(series):
    vals = series.values[:166]
    X_train, y_train = [], []
    for i in range(0, 166, 14):
        _x = vals[i: i+36]
        if len(_x) != 36:
            continue
        
        _y = vals[i+36: i+36+14]
        if len(_y) != 14:
            continue

        j = i
        _y = _y.sum()
    
        X_train.append(_x)
        y_train.append(_y)

    X_test = [series.values[130: 130+36]]
    y_test = [series.values[130+36: 130+36+14].sum()]

    return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [7]:
def get_train_data(df):
    X_train, y_train, X_test, y_test = [], [], [], []
    for c in df.columns:
        x_tr, y_tr, x_te, y_te = split_series(df[c])
        X_train.extend(x_tr)
        y_train.extend(y_tr)
        X_test.extend(x_te)
        y_test.extend(y_te)
    return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [8]:
X_train, y_train, X_test, y_test = get_train_data(total_orders)

In [9]:
X_train.shape, X_test.shape

((88560, 36), (9840, 36))

In [10]:
np.random.seed(42)
i = np.random.permutation(len(X_train))

In [11]:
X_train = X_train[i]
y_train = y_train[i]

In [12]:
_max = X_train.max()
_min = X_train.min()

In [13]:
X_train = (X_train - _min) / (_max - _min)
X_test = (X_test - _min) / (_max - _min)

In [14]:
X_train.shape

(88560, 36)

In [15]:
X_train = X_train.reshape(-1, 36, 1)

In [16]:
input_shape = X_train.shape[1:]
num_blocks = 1
num_layers_per_block = 3
filters = 16
batch_norm = True
dropout = 0.1
dense_layers = 128
output_shape = 1
out_activation = "linear"
loss = 'mse'
metrics = ['mae']

In [17]:
inputs = tf.keras.Input(shape=input_shape)
x = inputs
for _ in range(num_blocks):
    for i in range(num_layers_per_block):
        dilation_rate = 2 ** i  
        x = tf.keras.layers.Conv1D(filters, kernel_size=2, dilation_rate=dilation_rate, padding='causal')(x)
        if batch_norm:
            x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)

x = tf.keras.layers.GlobalAveragePooling1D()(x)

x = tf.keras.layers.Dense(dense_layers, activation='relu')(x)
if dropout is not None:
    x = tf.keras.layers.Dropout(dropout)(x)

x = tf.keras.layers.Dense(dense_layers // 2, activation='relu')(x)
if dropout is not None:
    x = tf.keras.layers.Dropout(dropout)(x)

x = tf.keras.layers.Dense(dense_layers // 4, activation='relu')(x)
if dropout is not None:
    x = tf.keras.layers.Dropout(dropout)(x)

x = tf.keras.layers.Dense(output_shape, activation=out_activation)(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.compile(optimizer='adam', loss=loss, metrics=metrics)
model.summary()

In [18]:
model.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
[1m2768/2768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - loss: 12016.5732 - mae: 37.2390
Epoch 2/5
[1m2768/2768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 11862.0400 - mae: 37.8632
Epoch 3/5
[1m2768/2768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 11843.9766 - mae: 37.7824
Epoch 4/5
[1m2768/2768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 12892.4395 - mae: 38.8125
Epoch 5/5
[1m2768/2768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 11680.3955 - mae: 38.1793


<keras.src.callbacks.history.History at 0x1cfd8671220>