In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

import pathlib

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchinfo import summary
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np

from dataset import CriteoAdDataset
from model import DeepFM

## Dataset


In [3]:
data_dir = pathlib.Path("data/criteo-ad-data")
train_dataset = CriteoAdDataset(data_dir, type="train")
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)


In [4]:
label, count_features, category_features = next(iter(train_dataloader))
label.shape, count_features.shape, category_features.shape


(torch.Size([10]), torch.Size([10, 13]), torch.Size([10, 26]))

## DeepFM

In [5]:
embedding_dims = 20
category_feature_names = train_dataset.category_feature_columns
category_cardinalities = train_dataset.category_cardinalities

dense_embedding_in_features = len(train_dataset.count_feature_columns)
dense_embedding_hidden_features = 30
deep_layer_out_features = 10

deepfm = DeepFM(
    embedding_dims=embedding_dims,
    category_cardinalities=category_cardinalities,
    dense_embedding_in_features=dense_embedding_in_features,
    dense_embedding_hidden_features=dense_embedding_hidden_features,
    deep_layer_out_features=deep_layer_out_features,
)

logits = deepfm(
    count_features=count_features,
    category_features=category_features,
    category_feature_names=category_feature_names,
)
logits.shape


torch.Size([10, 1])

In [6]:
print(deepfm)

DeepFM(
  (sparse_embedding): SparseEmbedding(
    (category_embeddings): ModuleDict(
      (category_feature_0): EmbeddingBag(541, 20, mode='mean')
      (category_feature_1): EmbeddingBag(497, 20, mode='mean')
      (category_feature_10): EmbeddingBag(3799, 20, mode='mean')
      (category_feature_11): EmbeddingBag(41312, 20, mode='mean')
      (category_feature_12): EmbeddingBag(2796, 20, mode='mean')
      (category_feature_13): EmbeddingBag(26, 20, mode='mean')
      (category_feature_14): EmbeddingBag(5238, 20, mode='mean')
      (category_feature_15): EmbeddingBag(34617, 20, mode='mean')
      (category_feature_16): EmbeddingBag(10, 20, mode='mean')
      (category_feature_17): EmbeddingBag(2548, 20, mode='mean')
      (category_feature_18): EmbeddingBag(1303, 20, mode='mean')
      (category_feature_19): EmbeddingBag(4, 20, mode='mean')
      (category_feature_2): EmbeddingBag(43870, 20, mode='mean')
      (category_feature_20): EmbeddingBag(38618, 20, mode='mean')
      (categ

In [7]:
summary(
    deepfm,
    count_features=count_features,
    category_features=category_features,
    category_feature_names=category_feature_names,
)


Layer (type:depth-idx)                   Param #
DeepFM                                   --
├─SparseEmbedding: 1-1                   --
│    └─ModuleDict: 2-1                   --
│    │    └─EmbeddingBag: 3-1            10,820
│    │    └─EmbeddingBag: 3-2            9,940
│    │    └─EmbeddingBag: 3-3            75,980
│    │    └─EmbeddingBag: 3-4            826,240
│    │    └─EmbeddingBag: 3-5            55,920
│    │    └─EmbeddingBag: 3-6            520
│    │    └─EmbeddingBag: 3-7            104,760
│    │    └─EmbeddingBag: 3-8            692,340
│    │    └─EmbeddingBag: 3-9            200
│    │    └─EmbeddingBag: 3-10           50,960
│    │    └─EmbeddingBag: 3-11           26,060
│    │    └─EmbeddingBag: 3-12           80
│    │    └─EmbeddingBag: 3-13           877,400
│    │    └─EmbeddingBag: 3-14           772,360
│    │    └─EmbeddingBag: 3-15           220
│    │    └─EmbeddingBag: 3-16           280
│    │    └─EmbeddingBag: 3-17           246,700
│    │    └─Em