In [33]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Flatten, Concatenate

In [2]:
np.random.seed(123)

In [7]:
# Params
n_stores = 3
n_days = 365
store_ids = [f"Store_{i}" for i in range(n_stores)]

In [10]:
data = []

for store in store_ids:
    base_sales = np.random.randint(80,120)
    seasonality = 10 * np.sin(np.linspace(0, 2 * np.pi, n_days))
    promo = np.random.binomial(1, 0.2, n_days)
    temp = 15 + 10 * np.sin(np.linspace(0,4 * np.pi, n_days)) + np.random.normal(0,2,n_days)
    noise = np.random.normal(0,5,n_days)
    sales = base_sales + seasonality + 5 * promo - 0.5 * temp + noise

    df_store = pd.DataFrame({
            "Date": pd.date_range("2022-01-01", periods=n_days),
            "Store_ID": store,
            "Sales" : sales,
            "Promo": promo,
            "Temperature": temp
    })

    data.append(df_store)

df = pd.concat(data).reset_index(drop=True)

In [14]:
df.sort_values(["Date","Store_ID"]).head()

Unnamed: 0,Date,Store_ID,Sales,Promo,Temperature
0,2022-01-01,Store_0,96.733955,0,17.218379
365,2022-01-01,Store_1,104.964523,1,17.631894
730,2022-01-01,Store_2,98.484318,0,13.112631
1,2022-01-02,Store_0,97.466878,0,17.656102
366,2022-01-02,Store_1,102.483851,0,15.146514


In [15]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df["Store_ID_encoded"] = le.fit_transform(df["Store_ID"])

In [16]:
df.sort_values(["Date","Store_ID"]).head()

Unnamed: 0,Date,Store_ID,Sales,Promo,Temperature,Store_ID_encoded
0,2022-01-01,Store_0,96.733955,0,17.218379,0
365,2022-01-01,Store_1,104.964523,1,17.631894,1
730,2022-01-01,Store_2,98.484318,0,13.112631,2
1,2022-01-02,Store_0,97.466878,0,17.656102,0
366,2022-01-02,Store_1,102.483851,0,15.146514,1


In [17]:
def create_sequences(df, time_steps=14):
    sequences = []
    targets = []
    store_ids = []

    for store_id in df["Store_ID"].unique():
        df_store = df[df["Store_ID"]==store_id].sort_values("Date")
        features = df_store[["Sales", "Promo", "Temperature"]].values
        store_encoded = df_store["Store_ID_encoded"].values[0]

        for i in range(len(features) - time_steps):
            seq = features[i:i+time_steps]
            target = features[i+time_steps][0] # Sales at t+1
            sequences.append(seq)
            targets.append(target)
            store_ids.append(store_encoded)
            
    return np.array(sequences), np.array(targets), np.array(store_ids)

X_seq, y, store_ids = create_sequences(df,21)

In [32]:
X_seq[0]

array([[ 96.73395547,   0.        ,  17.21837897],
       [ 97.46687836,   0.        ,  17.65610187],
       [105.47296197,   0.        ,  16.77973395],
       [104.60549496,   0.        ,  16.90528932],
       [ 95.22110011,   0.        ,  15.21726274],
       [ 99.78980794,   0.        ,  17.77378505],
       [ 99.71626927,   0.        ,  19.42291076],
       [100.34467893,   1.        ,  20.22900481],
       [107.064007  ,   1.        ,  16.62695493],
       [104.34039125,   0.        ,  17.63608405],
       [106.02790665,   1.        ,  17.82406727],
       [102.05496872,   0.        ,  18.33264598],
       [100.09416368,   0.        ,  21.34020804],
       [102.58882601,   0.        ,  16.2015938 ],
       [ 96.80002051,   0.        ,  18.30626837],
       [101.89236542,   1.        ,  24.69641668],
       [101.20737493,   0.        ,  22.8239614 ],
       [ 98.67302774,   0.        ,  20.76499139],
       [100.11434766,   0.        ,  22.98175165],
       [ 97.07718602,   0.     

In [30]:
y

array([116.85997809, 103.94244817,  91.11231017, ..., 107.93276704,
        99.12227511,  99.9014225 ])

In [29]:
df.head(25)

Unnamed: 0,Date,Store_ID,Sales,Promo,Temperature,Store_ID_encoded
0,2022-01-01,Store_0,96.733955,0,17.218379,0
1,2022-01-02,Store_0,97.466878,0,17.656102,0
2,2022-01-03,Store_0,105.472962,0,16.779734,0
3,2022-01-04,Store_0,104.605495,0,16.905289,0
4,2022-01-05,Store_0,95.2211,0,15.217263,0
5,2022-01-06,Store_0,99.789808,0,17.773785,0
6,2022-01-07,Store_0,99.716269,0,19.422911,0
7,2022-01-08,Store_0,100.344679,1,20.229005,0
8,2022-01-09,Store_0,107.064007,1,16.626955,0
9,2022-01-10,Store_0,104.340391,0,17.636084,0


In [34]:
# Entrées
seq_input = Input(shape=(X_seq.shape[1], X_seq.shape[2])) # (21,3)
store_input = Input(shape=(1,))

# Embedding magasin
store_embedding = Embedding(input_dim=n_stores, output_dim=4)(store_input)
store_embedding_flat = Flatten()(store_embedding)

# LSTM
x = LSTM(64)(seq_input)

# Fusion
x = Concatenate()([x, store_embedding_flat])
output = Dense(1)(x)

model = Model(inputs=[seq_input, store_input], outputs=output)
model.compile(optimizer="adam", loss="mse")

In [40]:
x

<KerasTensor shape=(None, 68), dtype=float32, sparse=False, ragged=False, name=keras_tensor_5>

In [41]:
output

<KerasTensor shape=(None, 1), dtype=float32, sparse=False, ragged=False, name=keras_tensor_6>

In [42]:
model

<Functional name=functional, built=True>