# Loading important libraries

In [None]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, Embedding, concatenate, Flatten
from tensorflow.keras.optimizers import Adam

# Loading the data

In [None]:
train_subset = pd.read_csv("/content/drive/MyDrive/FinalHack Datasets/train_subset.csv",parse_dates = ['date'])

test = pd.read_csv("/content/drive/MyDrive/FinalHack Datasets/test.csv",parse_dates = ['date'])

In [None]:
train_subset['Month'] =pd.DatetimeIndex(train_subset['date']).month.astype('int8')
train_subset['Day'] =pd.DatetimeIndex(train_subset['date']).day.astype('int8')
train_subset['Week'] =pd.DatetimeIndex(train_subset['date']).weekday.astype('int8')

train_subset = train_subset.drop(['Unnamed: 0','date'],axis = 1)


train_subset.head()

Unnamed: 0,locationId,item_id,unit_sales,onpromotion,Month,Day,Week
0,location_25,item_105574,12.0,False,1,1,0
1,location_25,item_105575,9.0,False,1,1,0
2,location_25,item_105857,3.0,False,1,1,0
3,location_25,item_108634,3.0,False,1,1,0
4,location_25,item_108701,2.0,True,1,1,0


In [None]:
test['Month'] =pd.DatetimeIndex(test['date']).month.astype('int8')
test['Day'] =pd.DatetimeIndex(test['date']).day.astype('int8')
test['Week'] =pd.DatetimeIndex(test['date']).weekday.astype('int8')

ID = test['id']

test = test.drop(['id','date'],axis = 1)


test.head()

Unnamed: 0,locationId,item_id,onpromotion,Month,Day,Week
0,location_25,item_99197,False,1,1,1
1,location_25,item_103665,False,1,1,1
2,location_25,item_105574,False,1,1,1
3,location_25,item_105857,False,1,1,1
4,location_25,item_106716,False,1,1,1


In [None]:
### Removing negative values in train data 

train_subset = train_subset[(train_subset['unit_sales']>0)]

In [None]:
### Removing outliers 

Q1 = train_subset.unit_sales.quantile(0.25)
Q3 = train_subset.unit_sales.quantile(0.75)
print(Q1,Q3)


IQR = Q3 - Q1
print(IQR)

lower_limit = Q1 - 1.5*IQR
upper_limit = Q3 + 1.5*IQR
print( lower_limit,upper_limit)


train_subset = train_subset[(train_subset.unit_sales < upper_limit)]

2.0 8.0
6.0
-7.0 17.0


In [None]:
## Label Encoding 


from sklearn import preprocessing
def df_lbl_enc(df):
    for c in df.columns:
        if df[c].dtype == 'object':
            lbl = preprocessing.LabelEncoder()
            df[c] = lbl.fit_transform(df[c])
            print(c)
    return df


train_subset = df_lbl_enc(train_subset)
X_test = df_lbl_enc(test)

locationId
item_id
locationId
item_id


In [None]:
 from sklearn.preprocessing import LabelEncoder
 
 lb = LabelEncoder()

 train_subset['onpromotion']= lb.fit_transform(train_subset['onpromotion'])

X_test['onpromotion']= lb.fit_transform(X_test['onpromotion'])

In [None]:
train_subset.head()

Unnamed: 0,locationId,item_id,unit_sales,onpromotion,Month,Day,Week
0,17,120,12.0,0,1,1,0
1,17,121,9.0,0,1,1,0
2,17,133,3.0,0,1,1,0
3,17,194,3.0,0,1,1,0
4,17,197,2.0,1,1,1,0


In [None]:
X_test.head()

Unnamed: 0,locationId,item_id,onpromotion,Month,Day,Week
0,17,4008,0,1,1,1
1,17,45,0,1,1,1
2,17,120,0,1,1,1
3,17,133,0,1,1,1
4,17,144,0,1,1,1


In [None]:
#cat_cols = ['locationId','item_id','onpromotion','category_of_item','class','Month','Day','Week']

In [None]:
X_train = train_subset.drop(['unit_sales'], axis = 1)

y_train = train_subset['unit_sales'].values


In [None]:
X_train

Unnamed: 0,locationId,item_id,onpromotion,Month,Day,Week
0,17,120,0,1,1,0
1,17,121,0,1,1,0
2,17,133,0,1,1,0
3,17,194,0,1,1,0
4,17,197,1,1,1,0
...,...,...,...,...,...,...
21679746,48,2399,0,8,15,2
21679747,48,2400,0,8,15,2
21679748,48,2402,0,8,15,2
21679749,48,2403,0,8,15,2


In [None]:
y_train = np.log1p(y_train)

In [None]:
X_train[['Month','Day']] = X_train[['Month','Day']] - 1

In [None]:
X_test

Unnamed: 0,locationId,item_id,onpromotion,Month,Day,Week
0,17,4008,0,1,1,1
1,17,45,0,1,1,1
2,17,120,0,1,1,1
3,17,133,0,1,1,1
4,17,144,0,1,1,1
...,...,...,...,...,...,...
23517675,48,2736,0,8,15,3
23517676,48,2746,1,8,15,3
23517677,48,2751,0,8,15,3
23517678,48,2759,1,8,15,3


In [None]:
X_test[['Month','Day']] = X_test[['Month','Day']] - 1

# Getting unique levels for each categorical features

In [None]:
# Train Data Attributes 

loc_attr = X_train.locationId.values
#item_attr = X_train.item_id.values
onpromotion_attr = X_train.onpromotion.values
#cat_item_attr = X_train.category_of_item.values
#class_attr = X_train['class'].values
month_attr = X_train.Month.values
day_attr = X_train.Day.values
week_attr = X_train.Week.values

In [None]:
# Test Data Attributes 

test_loc_attr = X_test.locationId.values
#item_attr = X_train.item_id.values
test_onpromotion_attr = X_test.onpromotion.values
#test_cat_item_attr = X_test.category_of_item.values
#test_class_attr = X_test['class'].values
test_month_attr = X_test.Month.values
test_day_attr = X_test.Day.values
test_week_attr = X_test.Week.values

In [None]:
loc_attr_level =  np.size(np.unique(loc_attr, return_counts=True)[0])
#item_attr_level =  np.size(np.unique(item_attr, return_counts=True)[0])
onpromotion_attr_level =  np.size(np.unique(onpromotion_attr, return_counts=True)[0])
#cat_item_attr_level =  np.size(np.unique(cat_item_attr, return_counts=True)[0])
#class_attr_level =  np.size(np.unique(class_attr, return_counts=True)[0])
month_attr_level =  np.size(np.unique(month_attr, return_counts=True)[0])
day_attr_level =  np.size(np.unique(day_attr, return_counts=True)[0])
week_attr_level =  np.size(np.unique(week_attr, return_counts=True)[0])

Categorical Embedding for locationId

In [None]:
loc_input = Input(shape=(1, ), name="loc")
loc_embed = Embedding(input_dim=loc_attr_level, output_dim=5,)(loc_input)

Categorical Embedding for item_id

In [None]:
#item_input = Input(shape=(1, ), name="item")
#item_embed = Embedding(input_dim=item_attr_level, output_dim=5,)(item_input)

Categorical Embedding for onpromotion

In [None]:
onpromo_input = Input(shape=(1, ), name="onpromo")
onpromo_embed = Embedding(input_dim=onpromotion_attr_level, output_dim=2,)(onpromo_input)

Categorical Embedding for category of items

In [None]:
#cat_item_input = Input(shape=(1, ), name="cat_item")
#cat_item_embed = Embedding(input_dim=cat_item_attr_level, output_dim=5,)(cat_item_input)

Categorical Embedding for class

In [None]:
#class_input = Input(shape=(1, ), name="class")
#class_embed = Embedding(input_dim=class_attr_level, output_dim=5,)(class_input)

Ctaegorical Embedding for month

In [None]:
month_input = Input(shape=(1, ), name="month")
month_embed = Embedding(input_dim=month_attr_level, output_dim=5,)(month_input)

Categorical Embedding for day

In [None]:
day_input = Input(shape=(1, ), name="day")
day_embed = Embedding(input_dim=day_attr_level, output_dim=5,)(day_input)

Categorical Embedding for week

In [None]:
week_input = Input(shape=(1, ), name="week")
week_embed = Embedding(input_dim=week_attr_level, output_dim=5,)(week_input)

Mering and flattning 

In [None]:
merge_emb = concatenate([loc_embed,onpromo_embed,month_embed,day_embed,week_embed])
merge_emb_flat = Flatten()(merge_emb)

In [None]:
merged_layer = Dense(12, activation= 'relu')(merge_emb_flat)
output_layer = Dense(1, activation='linear')(merged_layer)

model = Model(inputs=[loc_input, onpromo_input,month_input,day_input,week_input], outputs=output_layer)

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
loc (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
onpromo (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
month (InputLayer)              [(None, 1)]          0                                            
__________________________________________________________________________________________________
day (InputLayer)                [(None, 1)]          0                                            
______________________________________________________________________________________________

In [None]:
model.compile(loss="mean_absolute_percentage_error", optimizer='adam', metrics=['mape'])

In [None]:
model.fit([loc_attr,onpromotion_attr,month_attr,day_attr,week_attr],y=y_train, epochs=10, batch_size = 1024)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fb460139710>

In [None]:
del train_subset 
del test 

In [None]:
prediction = model.predict([test_loc_attr,test_onpromotion_attr,test_month_attr ,test_day_attr ,test_week_attr])


In [None]:
test_prediction = np.expm1(prediction)

In [None]:
test_prediction

array([[1.9930723],
       [1.9930723],
       [1.9930723],
       ...,
       [1.6557469],
       [1.7866127],
       [1.6557469]], dtype=float32)

In [None]:
res = pd.DataFrame(test_prediction)
ID = pd.DataFrame(ID)

res = res.rename(columns={res.columns[0]: 'unit_sales'})

gb = pd.concat([ID,res], axis = 1)

gb['unit_sales'] = gb['unit_sales'].round(2)

In [None]:
gb

Unnamed: 0,id,unit_sales
0,0,1.99
1,1,1.99
2,2,1.99
3,3,1.99
4,4,1.99
...,...,...
23517675,23808256,1.66
23517676,23808257,1.79
23517677,23808258,1.66
23517678,23808259,1.79


In [None]:
gb.to_csv("Categorical Embeddings.csv",index= False)

Mape for test data is - 64.19