In [8]:
from multiprocessing.sharedctypes import Value
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


class DeepFM(nn.Module):
    def __init__(self, args) -> None:
        super(DeepFM, self).__init__()
        os.environ['CUDA_VISIBLE_DEVICES'] = args['gpuid']

        self.lr = args['lr']
        self.l2_reg = args['l2_reg']
        self.epochs = args['epochs']

        self.num_fetures = args['num_features']
        self.emb_dim = args['embedding_dim']
        self.feature_embs = nn.Embedding(sum(args['field_size']), args['embedding_dim'])
        self.bias_embs = nn.Embedding(sum(args['field_size']), 1)

        # self.num_layers = args['num_layers'] # 2
        self.deep_neurons = args['dense_size']
        self.early_stop = True
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.batch_norm = args['batch_norm']
        self.opt = args['opt_name']

        if args['deep_layer_act'] == 'relu':
            self.deep_layer_act = nn.ReLU()
        else:
            raise ValueError('Invalid activation function name for deep layers')

        self.dropout_fm_1o = nn.Dropout(p=args['1o_dropout_p'])
        self.dropout_fm_2o = nn.Dropout(p=args['2o_dropout_p'])

        deep_modules = []
        layers_size = [self.num_fetures * self.emb_dim] + args['dense_size']
        for in_size, out_size in zip(layers_size[:-1], layers_size[1:]):
            deep_modules.append(nn.Linear(in_size, out_size))
            if self.batch_norm:
                deep_modules.append(nn.BatchNorm1d(num_features=out_size))
            deep_modules.append(self.deep_layer_act)
            deep_modules.append(nn.Dropout(p=args['deep_dropout_p']))
        self.deep = nn.Sequential(*deep_modules)

        self.output = nn.Linear(args['dense_size'][-1] + self.num_fetures + self.emb_dim, 1, bias=False) # concat projection

        # self._init_weights()

    def _init_weights(self):
        nn.init.normal_(self.feature_embs.weight, std=0.01)

        glorot = np.sqrt(2.0 / (self.num_fetures * self.emb_dim + self.deep_neurons[0]))

        for la in self.deep:
            if isinstance(la, nn.Linear):
                nn.init.normal_(la.weight, std=glorot)
                nn.init.constant_(la.bias, 0.)
                glorot = np.sqrt(2.0 / (la.weight.size()[0] + la.weight.size()[1]))

        glorot = np.sqrt(2.0 / (self.deep_neurons[-1] + self.num_fetures + self.emb_dim + 1))
        nn.init.normal_(self.output.weight, std=glorot)

    def forward(self, idxs, vals): # idx/vals: batchsize * feature_size
        feat_emb = self.feature_embs(idxs)  # batch_size * feature_size * embedding_size
        feat_emb = torch.multiply(feat_emb, torch.reshape(vals, [feat_emb.size(0), feat_emb.size(1), 1]).expand([feat_emb.size(0), feat_emb.size(1), self.emb_dim])) # batch_size * feature_size * embedding_size
        # first order part
        y_first_order = self.bias_embs(idxs) # batch_size * feature_size * 1
        y_first_order = torch.multiply(y_first_order, torch.reshape(vals, [feat_emb.size(0), feat_emb.size(1), 1])) # batch_size * feature_size * 1
        y_first_order = torch.sum(y_first_order, dim=2) # batch_size * feature_size
        y_first_order = self.dropout_fm_1o(y_first_order) # batch_size * feature_size
        # second order part
        summed_features_emb = torch.sum(feat_emb, 1)  # batch_size * embedding_size
        summed_features_emb_square = torch.square(summed_features_emb) # batch_size * embedding_size
        squared_features_emb = torch.square(feat_emb)  # batch_size * feature_size * embedding_size
        squared_sum_features_emb = torch.sum(squared_features_emb, 1)  # batch_size * embedding_size
        y_second_order = 0.5 * torch.subtract(summed_features_emb_square, squared_sum_features_emb) # batch_size * embedding_size
        y_second_order = self.dropout_fm_2o(y_second_order) # batch_size * embedding_size
        # deep part
        y_deep = feat_emb.view(feat_emb.size(0), -1) # batch_size * (feature_size * embedding_size)
        y_deep = self.deep(y_deep)
        
        concat_input = torch.cat([y_first_order, y_second_order, y_deep], dim=1) # batchsize * (embedding_size+feature_size+last_layer_out_size)
        output = self.output(concat_input)   # batch_size * 1
        out = torch.sigmoid(output)

        return out.view(-1)

    def fit(self, train_loader, valid_loader=None):
        if torch.cuda.is_available():
            self.cuda()
        else:
            self.cpu()
        if self.opt == 'adam':
            optimizer = optim.Adam(self.parameters(), lr=self.lr)
        elif self.opt == 'adagrad':
            optimizer = optim.Adagrad(self.parameters(), lr=self.lr)
        elif self.opt == 'sgd':
            optimizer = optim.SGD(self.parameters(), lr=self.lr)
        else:
            raise ValueError(f'Invalid optimizer name: {self.opt}')

        criterion = nn.BCEWithLogitsLoss(reduction='sum') # CE_log_loss for binary classification
        
        last_loss = 0.
        for epoch in range(1, self.epochs + 1):
            self.train()
            current_loss = 0.
            total_sample_num = 0
            for labels, idxs, vals in train_loader:
                total_sample_num += labels.size()[0]
                if torch.cuda.is_available():
                    labels = labels.cuda()
                    idxs = idxs.cuda()
                    vals = vals.cuda()
                else:
                    labels = labels.cpu()
                    idxs = idxs.cpu()
                    vals = vals.cpu()
                self.zero_grad()
                # TODO remember to let batches in loader put on GPU or CPU
                prediction = self.forward(idxs, vals)
                loss = criterion(prediction, labels)
                loss += self.l2_reg * self.output.weight.norm()
                for la in self.deep:
                    if la is nn.Linear:
                        loss += self.l2_reg * la.weight.norm()
                if torch.isnan(loss):
                    raise ValueError(f'Loss=Nan or Infinity: current settings does not fit the recommender')
                
                loss.backward()
                optimizer.step()
                current_loss += loss.item()

            print(f'[Epoch {epoch:03d}] - training loss={current_loss / total_sample_num:.4f}')
            delta_loss = float(current_loss - last_loss)
            if (abs(delta_loss) < 1e-5) and self.early_stop:
                print('Satisfy early stop mechanism')
                break
            else:
                last_loss = current_loss

            if valid_loader is not None:
                self.eval()
                # TODO if need valdiation
                pass

    def predict(self, test_loader):
        self.eval()
        _, idxs, vals = next(iter(test_loader))
        idxs = idxs.to(self.device)
        vals = vals.to(self.device)
        preds = self.forward(idxs, vals).cpu().detach()

        return preds

In [9]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, recall_score

train_data = pd.read_csv('./train_data.csv').reset_index(drop=True)
test_data = pd.read_csv('./test_data.csv').reset_index(drop=True)

category_cols = ['FLAG_DOCUMENT_14_0.0',
 'FLAG_DOCUMENT_14_1.0',
 'FLAG_DOCUMENT_2_0.0',
 'FLAG_PHONE_0.0',
 'FLAG_PHONE_1.0',
 'FLAG_DOCUMENT_9_0.0',
 'FLAG_DOCUMENT_9_1.0',
 'FLAG_DOCUMENT_11_0.0',
 'FLAG_DOCUMENT_11_1.0',
 'FLAG_DOCUMENT_20_0.0',
 'FLAG_DOCUMENT_20_1.0',
 'FLAG_DOCUMENT_19_0.0',
 'FLAG_DOCUMENT_19_1.0',
 'WEEKDAY_APPR_PROCESS_START_FRIDAY',
 'WEEKDAY_APPR_PROCESS_START_MONDAY',
 'WEEKDAY_APPR_PROCESS_START_SATURDAY',
 'WEEKDAY_APPR_PROCESS_START_SUNDAY',
 'WEEKDAY_APPR_PROCESS_START_THURSDAY',
 'WEEKDAY_APPR_PROCESS_START_TUESDAY',
 'WEEKDAY_APPR_PROCESS_START_WEDNESDAY',
 'FLAG_DOCUMENT_16_0.0',
 'FLAG_DOCUMENT_16_1.0',
 'FLAG_DOCUMENT_12_0.0',
 'FLAG_MOBIL_1.0',
 'FLAG_DOCUMENT_7_0.0',
 'FLAG_DOCUMENT_10_0.0',
 'NAME_INCOME_TYPE_Commercial associate',
 'NAME_INCOME_TYPE_Pensioner',
 'NAME_INCOME_TYPE_State servant',
 'NAME_INCOME_TYPE_Working',
 'NAME_CONTRACT_TYPE_Cash loans',
 'NAME_CONTRACT_TYPE_Revolving loans',
 'FLAG_DOCUMENT_17_0.0',
 'FLAG_DOCUMENT_17_1.0',
 'NAME_HOUSING_TYPE_Co-op apartment',
 'NAME_HOUSING_TYPE_House / apartment',
 'NAME_HOUSING_TYPE_Municipal apartment',
 'NAME_HOUSING_TYPE_Office apartment',
 'NAME_HOUSING_TYPE_Rented apartment',
 'NAME_HOUSING_TYPE_With parents',
 'ORGANIZATION_TYPE_Advertising',
 'ORGANIZATION_TYPE_Agriculture',
 'ORGANIZATION_TYPE_Bank',
 'ORGANIZATION_TYPE_Business Entity Type 1',
 'ORGANIZATION_TYPE_Business Entity Type 2',
 'ORGANIZATION_TYPE_Business Entity Type 3',
 'ORGANIZATION_TYPE_Cleaning',
 'ORGANIZATION_TYPE_Construction',
 'ORGANIZATION_TYPE_Culture',
 'ORGANIZATION_TYPE_Electricity',
 'ORGANIZATION_TYPE_Emergency',
 'ORGANIZATION_TYPE_Government',
 'ORGANIZATION_TYPE_Hotel',
 'ORGANIZATION_TYPE_Housing',
 'ORGANIZATION_TYPE_Industry: type 1',
 'ORGANIZATION_TYPE_Industry: type 10',
 'ORGANIZATION_TYPE_Industry: type 11',
 'ORGANIZATION_TYPE_Industry: type 12',
 'ORGANIZATION_TYPE_Industry: type 13',
 'ORGANIZATION_TYPE_Industry: type 2',
 'ORGANIZATION_TYPE_Industry: type 3',
 'ORGANIZATION_TYPE_Industry: type 4',
 'ORGANIZATION_TYPE_Industry: type 5',
 'ORGANIZATION_TYPE_Industry: type 7',
 'ORGANIZATION_TYPE_Industry: type 8',
 'ORGANIZATION_TYPE_Industry: type 9',
 'ORGANIZATION_TYPE_Insurance',
 'ORGANIZATION_TYPE_Kindergarten',
 'ORGANIZATION_TYPE_Legal Services',
 'ORGANIZATION_TYPE_Medicine',
 'ORGANIZATION_TYPE_Military',
 'ORGANIZATION_TYPE_Mobile',
 'ORGANIZATION_TYPE_Other',
 'ORGANIZATION_TYPE_Police',
 'ORGANIZATION_TYPE_Postal',
 'ORGANIZATION_TYPE_Realtor',
 'ORGANIZATION_TYPE_Religion',
 'ORGANIZATION_TYPE_Restaurant',
 'ORGANIZATION_TYPE_School',
 'ORGANIZATION_TYPE_Security',
 'ORGANIZATION_TYPE_Security Ministries',
 'ORGANIZATION_TYPE_Self-employed',
 'ORGANIZATION_TYPE_Services',
 'ORGANIZATION_TYPE_Telecom',
 'ORGANIZATION_TYPE_Trade: type 1',
 'ORGANIZATION_TYPE_Trade: type 2',
 'ORGANIZATION_TYPE_Trade: type 3',
 'ORGANIZATION_TYPE_Trade: type 4',
 'ORGANIZATION_TYPE_Trade: type 5',
 'ORGANIZATION_TYPE_Trade: type 6',
 'ORGANIZATION_TYPE_Trade: type 7',
 'ORGANIZATION_TYPE_Transport: type 1',
 'ORGANIZATION_TYPE_Transport: type 2',
 'ORGANIZATION_TYPE_Transport: type 3',
 'ORGANIZATION_TYPE_Transport: type 4',
 'ORGANIZATION_TYPE_University',
 'ORGANIZATION_TYPE_XNA',
 'FLAG_DOCUMENT_3_0.0',
 'FLAG_DOCUMENT_3_1.0',
 'FLAG_DOCUMENT_21_0.0',
 'FLAG_DOCUMENT_21_1.0',
 'FLAG_WORK_PHONE_0.0',
 'FLAG_WORK_PHONE_1.0',
 'FLAG_OWN_CAR_0',
 'FLAG_OWN_CAR_1',
 'FLAG_CONT_MOBILE_0.0',
 'FLAG_CONT_MOBILE_1.0',
 'FLAG_DOCUMENT_8_0.0',
 'FLAG_DOCUMENT_8_1.0',
 'FLAG_EMP_PHONE_0.0',
 'FLAG_EMP_PHONE_1.0',
 'FLAG_DOCUMENT_4_0.0',
 'FLAG_DOCUMENT_18_0.0',
 'FLAG_DOCUMENT_18_1.0',
 'NAME_TYPE_SUITE_Children',
 'NAME_TYPE_SUITE_Family',
 'NAME_TYPE_SUITE_Group of people',
 'NAME_TYPE_SUITE_Other_A',
 'NAME_TYPE_SUITE_Other_B',
 'NAME_TYPE_SUITE_Spouse, partner',
 'NAME_TYPE_SUITE_Unaccompanied',
 'FLAG_DOCUMENT_15_0.0',
 'FLAG_DOCUMENT_15_1.0',
 'NAME_EDUCATION_TYPE_Academic degree',
 'NAME_EDUCATION_TYPE_Higher education',
 'NAME_EDUCATION_TYPE_Incomplete higher',
 'NAME_EDUCATION_TYPE_Lower secondary',
 'NAME_EDUCATION_TYPE_Secondary / secondary special',
 'FLAG_DOCUMENT_6_0.0',
 'FLAG_DOCUMENT_6_1.0',
 'FLAG_OWN_REALTY_0',
 'FLAG_OWN_REALTY_1',
 'FLAG_DOCUMENT_5_0.0',
 'FLAG_DOCUMENT_5_1.0',
 'NAME_FAMILY_STATUS_Civil marriage',
 'NAME_FAMILY_STATUS_Married',
 'NAME_FAMILY_STATUS_Separated',
 'NAME_FAMILY_STATUS_Single / not married',
 'NAME_FAMILY_STATUS_Widow',
 'FLAG_DOCUMENT_13_0.0',
 'FLAG_DOCUMENT_13_1.0',
 'FLAG_EMAIL_0.0',
 'FLAG_EMAIL_1.0']

dummy_cols = ['Unnamed: 0']
target_col = 'TARGET'
numeric_cols = list(set(train_data.columns) - set(category_cols + dummy_cols + [target_col]))

def data_massage(data,  category_cols, numeric_cols):
    feat_cols = category_cols + numeric_cols
    fields = []
    for feat_col in feat_cols:
        if feat_col not in category_cols:
            fields.append(1)
        else:
            fields.append(data[feat_col].nunique())
    start_idx = [0] + np.cumsum(fields)[:-1].tolist()

    return feat_cols, start_idx, fields

class FMDataset(Dataset):
    def __init__(self, data, feat_start_idx, fields_size, feat_cols, target_col):
        self.data = data
        self.label = np.asarray(self.data[target_col])

        self.feat_cols = feat_cols
        self.fields = fields_size
        self.start_idx = feat_start_idx

    def __getitem__(self, index):
        row = self.data.loc[index, self.feat_cols]
        idxs = list()
        vals = list()
        # label = self.data.loc[index, self.]
        label = self.label[index]
        for i in range(len(row)):
            if self.fields[i] == 1:
                idxs.append(self.start_idx[i])
                vals.append(row[i])
            else:
                idxs.append(int(self.start_idx[i] + row[i]))
                vals.append(1)

        label = torch.tensor(label, dtype=torch.float32)
        idxs = torch.tensor(idxs, dtype=torch.long)
        vals = torch.tensor(vals, dtype=torch.float32)
        
        return label, idxs, vals

    def __len__(self):
        return len(self.data)

feat_cols, feat_start_idx, fields_size = data_massage(train_data,  category_cols, numeric_cols)

args = {
    'batch_size': 128,
    'gpuid': '0',
    'lr': 0.001,
    'l2_reg': 0.,
    'epochs': 30,
    'num_features': len(feat_cols),
    'embedding_dim': 8,
    'field_size': fields_size,
    'dense_size': [256, 512, 256, 128, 32],# [256, 512, 256, 128, 32]
    '1o_dropout_p': 1, 
    '2o_dropout_p': 1, 
    'deep_dropout_p': 0.2,
    'batch_norm': True,
    'deep_layer_act': 'relu',
    'opt_name': 'adam'
}

# train_data, test_data = train_test_split(data, test_size=0.2)
train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)

train_dataset = FMDataset(train_data, feat_start_idx, fields_size, feat_cols, target_col)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)

test_dataset = FMDataset(test_data, feat_start_idx, fields_size, feat_cols, target_col)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

model = DeepFM(args)
model.fit(train_loader)

prediction = model.predict(test_loader)
print('AUC score: ', roc_auc_score(test_data[target_col].values, prediction))
prediction_label = np.where(prediction.detach().numpy() >= 0.5, 1, 0)
print('Recall score: ', recall_score(test_data[target_col].values, prediction_label))

[Epoch 001] - training loss=0.8532
[Epoch 002] - training loss=0.7397
[Epoch 003] - training loss=0.7021
[Epoch 004] - training loss=0.6904
[Epoch 005] - training loss=0.6849
[Epoch 006] - training loss=0.6817
[Epoch 007] - training loss=0.6802
[Epoch 008] - training loss=0.6783
[Epoch 009] - training loss=0.6775
[Epoch 010] - training loss=0.6773
[Epoch 011] - training loss=0.6766
[Epoch 012] - training loss=0.6762
[Epoch 013] - training loss=0.6761
[Epoch 014] - training loss=0.6767
[Epoch 015] - training loss=0.6756
[Epoch 016] - training loss=0.6755
[Epoch 017] - training loss=0.6748
[Epoch 018] - training loss=0.6750
[Epoch 019] - training loss=0.6750
[Epoch 020] - training loss=0.6746
[Epoch 021] - training loss=0.6745
[Epoch 022] - training loss=0.6747
[Epoch 023] - training loss=0.6755
[Epoch 024] - training loss=0.6743
[Epoch 025] - training loss=0.6743
[Epoch 026] - training loss=0.6748
[Epoch 027] - training loss=0.6747
[Epoch 028] - training loss=0.6743
[Epoch 029] - traini