# Training the "small" model

The code above will load training data and train the so-called "small" model.

Out of a prescribed number (given by the parameter `seq_len`) of last orders of every user, only the order date and the bill amount is taken into the account by this model. The model ignores every other features of orders.

In [7]:
from pndapetzim.data import load_datasets, LABEL_FILE_NAME, ORDER_FILE_NAME
from pndapetzim.models import CategoricalFeatureDescriptor

seq_len = 20
returning_weight = 4.0

train, test, encodings = load_datasets(
    order_path='../data/' + ORDER_FILE_NAME,
    label_path='../data/' + LABEL_FILE_NAME,
    seq_len=seq_len,
    train_ratio=100,
    returning_weight=returning_weight,
)

cat_features = {
    'restaurant_id': CategoricalFeatureDescriptor(
        vocab_size=encodings['restaurant_id'].vocab_size, embedding_size=15
    ),
    'city_id': CategoricalFeatureDescriptor(
        vocab_size=encodings['city_id'].vocab_size, embedding_size=12
    ),
    'payment_id': CategoricalFeatureDescriptor(
        vocab_size=encodings['payment_id'].vocab_size, embedding_size=3
    ),
    'platform_id': CategoricalFeatureDescriptor(
        vocab_size=encodings['platform_id'].vocab_size, embedding_size=4
    ),
    'transmission_id': CategoricalFeatureDescriptor(
        vocab_size=encodings['transmission_id'].vocab_size, embedding_size=4
    ),
}

In [14]:
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import AUC, Recall
from tensorflow.keras.optimizers import Adam

from pndapetzim.models import build_large_model, CategoricalFeatureDescriptor

lossm = BinaryCrossentropy()
optimiser = Adam(learning_rate=0.01)

model = build_large_model(seq_len, cat_features)
aucm = AUC()
recallm = Recall()
metrics = ['accuracy', aucm, recallm]
model.compile(loss=lossm, optimizer=optimiser, metrics=metrics)
batch_size = 128

In [None]:
model.fit(train.batch(batch_size).prefetch(10), epochs=1)

    175/Unknown - 112s 624ms/step - loss: 1.3492 - accuracy: 0.6301 - auc_4: 0.6605 - recall_4: 0.5972

In [None]:
loss, accuracy, auc, recall = model.evaluate(test.batch(batch_size))
print(f'loss: {loss}, accuracy: {accucacy}, AUC: {auc}, recall: {recallm}')

In [4]:
cat_features

{'restaurant_id': CategoricalFeatureDescriptor(vocab_size=13570, embedding_size=10),
 'city_id': CategoricalFeatureDescriptor(vocab_size=3750, embedding_size=3),
 'payment_id': CategoricalFeatureDescriptor(vocab_size=6, embedding_size=2),
 'platform_id': CategoricalFeatureDescriptor(vocab_size=15, embedding_size=2),
 'transmission_id': CategoricalFeatureDescriptor(vocab_size=11, embedding_size=2)}