In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from tensorflow import keras

In [2]:
df = pd.read_csv("./Supermart_Grocery_Sales_Retail_Analytics_Dataset.csv")
df.head()

Unnamed: 0,Order ID,Customer Name,Category,Sub Category,City,Order Date,Region,Sales,Discount,Profit,State
0,OD1,Harish,Oil & Masala,Masalas,Vellore,11-08-2017,North,1254,0.12,401.28,Tamil Nadu
1,OD2,Sudha,Beverages,Health Drinks,Krishnagiri,11-08-2017,South,749,0.18,149.8,Tamil Nadu
2,OD3,Hussain,Food Grains,Atta & Flour,Perambalur,06-12-2017,West,2360,0.21,165.2,Tamil Nadu
3,OD4,Jackson,Fruits & Veggies,Fresh Vegetables,Dharmapuri,10-11-2016,South,896,0.25,89.6,Tamil Nadu
4,OD5,Ridhesh,Food Grains,Organic Staples,Ooty,10-11-2016,South,2355,0.26,918.45,Tamil Nadu


In [3]:
df['Category'].value_counts()

Category
Snacks               1514
Eggs, Meat & Fish    1490
Fruits & Veggies     1418
Bakery               1413
Beverages            1400
Food Grains          1398
Oil & Masala         1361
Name: count, dtype: int64

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Category'] = le.fit_transform(df['Category'])
df['Sub Category'] = le.fit_transform(df['Sub Category'])
df['City'] = le.fit_transform(df['City'])
df['Order Date'] = le.fit_transform(df['Order Date'])
df['Region'] = le.fit_transform(df['Region'])
df['State'] = le.fit_transform(df['State'])
df['Sales'] = le.fit_transform(df['Sales'])
df.head()

Unnamed: 0,Order ID,Customer Name,Category,Sub Category,City,Order Date,Region,Sales,Discount,Profit,State
0,OD1,Harish,5,14,21,539,2,748,0.12,401.28,0
1,OD2,Sudha,1,13,8,539,3,246,0.18,149.8,0
2,OD3,Hussain,3,0,13,226,4,1849,0.21,165.2,0
3,OD4,Jackson,4,12,4,439,3,393,0.25,89.6,0
4,OD5,Ridhesh,3,18,12,439,3,1845,0.26,918.45,0


In [5]:
count_class = le.classes_
print(count_class)

[ 500  501  502 ... 2498 2499 2500]


In [6]:
X = df.drop(columns=['Order ID','Customer Name', 'Sales'])
y = df['Sales']
X.head(3)

Unnamed: 0,Category,Sub Category,City,Order Date,Region,Discount,Profit,State
0,5,14,21,539,2,0.12,401.28,0
1,1,13,8,539,3,0.18,149.8,0
2,3,0,13,226,4,0.21,165.2,0


In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=69)

In [14]:
y_train.shape

(6995,)

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
print(x_train[:1])
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train[:1])
print(x_test[:1])

      Category  Sub Category  City  Order Date  Region  Discount  Profit  \
8634         6             6    14          14       3      0.23  616.64   

      State  
8634      0  
[[ 1.48231664 -0.7558514   0.3559583  -1.86003455  0.59488587  0.04389688
   0.99653771  0.        ]]
[[ 0.48622606 -0.00964363 -1.65912905 -0.25550309  1.21072525 -0.35766202
   0.00937814  0.        ]]


In [15]:
y_train = keras.utils.to_categorical(y_train, num_classes=6995)
print(y_train[:5])

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [11]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(units=32, activation='relu', input_shape=(x_train.shape[-1], )))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=256, activation='relu'))  # Additional hidden layer
model.add(Dense(units=128, activation='relu'))   # Another additional hidden layer
model.add(Dense(units=64, activation='relu'))   # You can add more layers if needed
model.add(Dropout(0.5))
model.add(Dense(units=22, activation='softmax'))

In [12]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                96        
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 256)               16640     
                                                                 
 dense_3 (Dense)             (None, 128)               32896     
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 22)                1

In [13]:
model.fit(x_train, y_train, epochs=1000, verbose=2)

Epoch 1/1000
1/1 - 1s - loss: 3.1013 - accuracy: 0.0400 - 720ms/epoch - 720ms/step
Epoch 2/1000
1/1 - 0s - loss: 3.0803 - accuracy: 0.0400 - 3ms/epoch - 3ms/step
Epoch 3/1000
1/1 - 0s - loss: 3.0703 - accuracy: 0.1200 - 3ms/epoch - 3ms/step
Epoch 4/1000
1/1 - 0s - loss: 3.0774 - accuracy: 0.0800 - 3ms/epoch - 3ms/step
Epoch 5/1000
1/1 - 0s - loss: 3.0418 - accuracy: 0.1600 - 2ms/epoch - 2ms/step
Epoch 6/1000
1/1 - 0s - loss: 3.0561 - accuracy: 0.1600 - 2ms/epoch - 2ms/step
Epoch 7/1000
1/1 - 0s - loss: 3.0247 - accuracy: 0.2000 - 3ms/epoch - 3ms/step
Epoch 8/1000
1/1 - 0s - loss: 3.0150 - accuracy: 0.1200 - 3ms/epoch - 3ms/step
Epoch 9/1000
1/1 - 0s - loss: 2.9976 - accuracy: 0.1200 - 3ms/epoch - 3ms/step
Epoch 10/1000
1/1 - 0s - loss: 3.0398 - accuracy: 0.1200 - 3ms/epoch - 3ms/step
Epoch 11/1000
1/1 - 0s - loss: 2.9829 - accuracy: 0.2000 - 3ms/epoch - 3ms/step
Epoch 12/1000
1/1 - 0s - loss: 2.9724 - accuracy: 0.1600 - 3ms/epoch - 3ms/step
Epoch 13/1000
1/1 - 0s - loss: 2.9438 - accur

<keras.src.callbacks.History at 0x1e856825e20>

In [14]:
prediction = model.predict(x_test)



In [15]:
model.save('item_model.h5', model)

  saving_api.save_model(
