In [10]:
import pandas as pd
import numpy as np

#input the clean df (with only id, d, features, and sales)
def df_to_tensor(df, validation_day, train_day):
    # Read the CSV file
    df = df[(df['d'] > 1941 - validation_day - train_day) & (df['d'] <= 1941)]
    # Get unique ids, d values, and column names
    ids = df['id'].unique()

    columns = [col for col in df.columns if col not in ['id', 'd']]

    d_values_train = range(1941 - validation_day - train_day + 1, 1941 - validation_day + 1)
    d_values_validation = range(1941 - validation_day + 1, 1941 + 1)

    # Initialize the tensor
    num_ids = len(ids)
    num_d_values = len(d_values_train)
    depth = len(columns)

    tensor = np.zeros((num_ids, num_d_values, depth))

    # Initialize the sales tensor
    sales_tensor = np.zeros((num_ids, len(d_values_validation)))

    # Fill the tensor and sales_tensor
    for i, id_val in enumerate(ids):
        for j, d_val in enumerate(d_values_train):
            for k, col in enumerate(columns):
                value = df[(df['id'] == id_val) & (df['d'] == d_val)][col].values
                if len(value) > 0:
                    tensor[i, j, k] = value[0]
        
        # Fill sales_tensor
        for j, d_val in enumerate(d_values_validation):
            sales_value = df[(df['id'] == id_val) & (df['d'] == d_val)]['sales'].values
            if len(sales_value) > 0:
                sales_tensor[i, j] = sales_value[0]

    return tensor, sales_tensor

# Example usage:
# tensor, sales_tensor = df_to_tensor(df, lag_day)

def df_to_tensor_test(df, train_day):
    # Read the CSV file
    df = df[(df['d'] > 1941 - train_day) & (df['d'] <= 1941)]
    # Get unique ids, d values, and column names
    ids = df['id'].unique()

    columns = [col for col in df.columns if col not in ['id', 'd']]

    d_values_train = range(1941 - train_day + 1, 1941 + 1)

    # Initialize the tensor
    num_ids = len(ids)
    num_d_values = len(d_values_train)
    depth = len(columns)

    tensor = np.zeros((num_ids, num_d_values, depth))

    # Fill the tensor and sales_tensor
    for i, id_val in enumerate(ids):
        for j, d_val in enumerate(d_values_train):
            for k, col in enumerate(columns):
                value = df[(df['id'] == id_val) & (df['d'] == d_val)][col].values
                if len(value) > 0:
                    tensor[i, j, k] = value[0]
    
    return tensor, ids

In [11]:
#load data
import numpy as np
import pandas as pd
import os, sys, gc, time, warnings, pickle, psutil, random

raw_data_dir = '..\datasets\sales_train_evaluation.csv'
processed_data_dir = '..\datasets\\'
ORIGINAL = raw_data_dir
BASE     = processed_data_dir+'grid_part_1.pkl'
PRICE    = processed_data_dir+'grid_part_2.pkl'
CALENDAR = processed_data_dir+'grid_part_3.pkl'
LAGS     = processed_data_dir+'lags_df_28.pkl'
MEAN_ENC = processed_data_dir+'mean_encoding_df.pkl'

STORES_IDS = ['CA_1','CA_2','CA_3','CA_4','TX_1','TX_2','TX_3','WI_1','WI_2','WI_3']

#LIMITS and const
TARGET      = 'sales'            
START_TRAIN = 0                  
END_TRAIN   = 1941
P_HORIZON   = 28

mean_features   = ['enc_cat_id_mean','enc_cat_id_std',
                   'enc_dept_id_mean','enc_dept_id_std',
                   'enc_item_id_mean','enc_item_id_std'] 

remove_features = [TARGET,'id','state_id','store_id',
                    'item_id', 'dept_id', 'cat_id','date','wm_yr_wk','d', \
                    'release', 'price_min','price_max',  \
                    'price_norm', 'price_nunique', 'item_nunique','price_std', \
                    'price_momentum', 'price_momentum_y',  'price_momentum_m', 'price_mean', \
                    'tm_d', 'tm_w', 'tm_m', 'tm_y', 'tm_wm', 'tm_dw', 'tm_w_end', \
                    'enc_cat_id_std', 'event_type_1', 'event_name_2', 'event_type_2', \
                    'enc_dept_id_std', 'enc_item_id_std', \
                    'rolling_std_7', 'rolling_std_14', \
                    'rolling_std_30', 'rolling_mean_tmp_1_7', 'rolling_mean_tmp_1_14', \
                    'rolling_mean_tmp_1_30', 'rolling_mean_tmp_7_7', \
                    'rolling_mean_tmp_7_14', 'rolling_mean_tmp_7_30', \
                    'rolling_mean_tmp_14_7', 'rolling_mean_tmp_14_14', \
                    'sales_lag_36', 'sales_lag_37', 'sales_lag_38', 'sales_lag_39', 'sales_lag_40', 'sales_lag_41', 'sales_lag_42', \
                    'rolling_mean_7', 'rolling_mean_14', 'rolling_mean_30', 'rolling_mean_60', 'rolling_std_60', 'rolling_mean_180', 'rolling_std_180', 'rolling_mean_tmp_1_60', 'rolling_mean_tmp_7_60', 'rolling_mean_tmp_14_60', \
                    'rolling_mean_tmp_14_30','sales_lag_28', 'sales_lag_29', 'sales_lag_30', 'sales_lag_31', 'sales_lag_32', \
                    'sales_lag_33', 'sales_lag_34', 'sales_lag_35']

# Read data
def get_data_by_store(store):
    
    # Read and contact basic feature
    df = pd.concat([pd.read_pickle(BASE),
                    pd.read_pickle(PRICE).iloc[:,2:],
                    pd.read_pickle(CALENDAR).iloc[:,2:]],
                    axis=1)
    

    df = df[df['d']>=START_TRAIN]
    
    df = df[df['store_id']==store]

    df2 = pd.read_pickle(MEAN_ENC)[mean_features]
    df2 = df2[df2.index.isin(df.index)]
    
    df3 = pd.read_pickle(LAGS).iloc[:,3:]
    df3 = df3[df3.index.isin(df.index)]
    
    df = pd.concat([df, df2], axis=1)
    del df2
    
    df = pd.concat([df, df3], axis=1)
    del df3

    state = "snap_" + store.split('_')[0]
    states = ['snap_CA','snap_TX','snap_WI']
    deleted_states = []
    for i in states:
        if i != state:
            deleted_states.append(i)
            
    features = ['event_name_1_lag'] + [col for col in list(df) if (col not in remove_features and col not in deleted_states)]
    
    df['event_name_1'] = df['event_name_1'].notnull().astype(int)

    # and 'time' is the column indicating the time point
    lag = 1  # Define the lag you want, e.g., lag of 1 time point

    # Sort the DataFrame by 'id' and 'time' columns
    df = df.sort_values(by=['id', 'd'])

    # Create the lagged column
    df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)

    df = df[['id','d',TARGET]+features]
    
    df = df.reset_index(drop=True)
    
    return df, features

# Recombine Test set after training
def get_base_test():
    base_test = pd.DataFrame()

    for store_id in STORES_IDS:
        temp_df = pd.read_pickle(processed_data_dir+'test_'+store_id+'.pkl')
        temp_df['store_id'] = store_id
        base_test = pd.concat([base_test, temp_df]).reset_index(drop=True)
    
    return base_test

In [12]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization

# Assuming tensor_train is already created as per the previous code snippets
# tensor_train shape: (num_ids, num_d_values, depth)
for store_id in STORES_IDS:
    df, features = get_data_by_store(store_id)
    tensor_train, tensor_label = df_to_tensor(df, train_day=56, validation_day= 28)

    # Save tensor_train as a pickle file
    np.save(f'tensor_train_{store_id}.npy', tensor_train)
    # Save tensor_label as a pickle file
    np.save(f'tensor_label_{store_id}.npy', tensor_label)
    # Reshape the tensor_train to fit the Transformer input shape
    num_ids, num_d_values, depth = tensor_train.shape
    input_shape = (num_d_values, depth)

    # Define the Transformer model with linear output
    def transformer_model(input_shape):
        inputs = Input(shape=input_shape)
        # Transformer layers
        transformer_layer = MultiHeadAttention(num_heads=12, key_dim=depth)
        x = transformer_layer(inputs, inputs)
        x = LayerNormalization(epsilon=1e-6)(x)
        x = Dropout(0.2)(x)
        # Flatten the output
        x = tf.keras.layers.Flatten()(x)
        outputs = Dense(28, activation='linear')(x)  # Linear activation for continuous output
        model = Model(inputs=inputs, outputs=outputs)
        return model

    # Instantiate the model
    model = transformer_model(input_shape)

    from tensorflow.keras.callbacks import EarlyStopping

    # Define early stopping criteria
    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')  # Use mean squared error for regression

    # Train the model (assuming you have labels for training)
    # Replace tensor_train and y_train with your actual training data and labels
    model.fit(tensor_train, tensor_label, epochs=200, batch_size=50, validation_split=0.2, callbacks=[early_stopping])

    # Save the model
    model.save(f"transformer_model_{store_id}_12heads.h5")


  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200


  saving_api.save_model(
  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200


  saving_api.save_model(


In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model

# Assuming df_test is your test dataframe
for store_id in STORES_IDS:
    columns = ['id'] + [f'F{i}' for i in range(1, 29)]
    df_test = pd.DataFrame(columns=columns)
    # Load the saved model
    model = load_model(f"transformer_model_{store_id}_12heads.h5")

    df, features = get_data_by_store(store_id)
    tensor_test, id = df_to_tensor_test(df, train_day=56)

    # Preprocess df_test to match the input shape expected by the model
    # Assuming you have a function preprocess_test_data() for this purpose


    # Make predictions
    predictions = model.predict(tensor_test)

    # Ensure predictions have shape (num_samples, 28)
    assert predictions.shape[1] == 28, "The model's output shape is not as expected"

    # Do something with the predictions, like save them to a dataframe or use them for further analysis
    # For example, if you want to add predictions to df_test
    df_test['id'] = id
    df_test.iloc[:,1:] = predictions

    df_test.to_csv(f"predicted_12heads_{store_id}",index=False)

# Now df_test contains predictions for each store


  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




  df['event_name_1_lag'] = df.groupby('id')['event_name_1'].shift(lag)




In [15]:
import pandas as pd
columns = ['id'] + [f'F{i}' for i in range(1, 29)]
df_submission = pd.DataFrame(columns=columns)

for store_id in STORES_IDS:
    df_new = pd.read_csv(f"predicted_12heads_{store_id}")
    df_submission = pd.concat([df_submission,df_new],axis=0)

# Read the first CSV file
df1 = pd.read_csv("..\datasets\sample_submission.csv")

# Merge the two DataFrames on the 'id' column
merged_df = pd.merge(df1, df_submission, on='id', how="left", suffixes=('_original', ''))

# Replace values in columns F1 to F28 in df1 with corresponding values from df2
for col in df1.columns[1:]:
    merged_df[col+'_original'] = merged_df[col+'_original'].fillna(merged_df[col])

# Drop the extra columns
merged_df.drop(columns=[col+'_original' for col in df1.columns[1:]], inplace=True)
merged_df.fillna(0, inplace=True)

for column in merged_df.columns[1:]:
    # Replace negative values with zero
    merged_df[column] = merged_df[column].apply(lambda x: max(0, x) if isinstance(x, (int, float)) else x)
# Save the updated DataFrame to a new CSV file
merged_df.to_csv("merged_file_transformer_12heads.csv", index=False)

  df_submission = pd.concat([df_submission,df_new],axis=0)
