In [None]:
import sys
sys.path.append('../')
import os
import logging
from fuxictr import datasets
from datetime import datetime
from fuxictr.utils import load_config, set_logger, print_to_json
from fuxictr.features import FeatureMap
from fuxictr.pytorch.torch_utils import seed_everything
from fuxictr.pytorch.dataloaders import H5DataLoader
from model_zoo import DeepFM
import pickle
# ^^suppress output

# Load params from config files
config_dir = './config/full_h5_config'
experiment_id = 'full_h5_initial' # corresponds to h5 input `data/tiny_h5`
params = load_config(config_dir, experiment_id)

# set up logger and random seed
set_logger(params)
logging.info("Params: " + print_to_json(params))
seed_everything(seed=params['seed'])

# Load feature_map from json
data_dir = os.path.join(params['data_root'], params['dataset_id'])
feature_map_json = os.path.join(data_dir, "feature_map.json")
feature_map = FeatureMap(params['dataset_id'], data_dir)
_ = feature_map.load(feature_map_json, params)
_ = logging.info("Feature specs: " + print_to_json(feature_map.features))


In [2]:
# Get train and validation data generators from h5
train_gen, valid_gen = H5DataLoader(feature_map,
                                    stage='train',
                                    train_data=params['train_data'],
                                    valid_data=params['valid_data'],
                                    batch_size=params['batch_size'],
                                    shuffle=params['shuffle']).make_iterator()


2023-05-15 16:39:24,486 P71318 INFO Loading data...
2023-05-15 16:39:24,486 P71318 INFO Loading data from h5: ../data/full_h5/smadex_train.h5
2023-05-15 16:42:50,151 P71318 INFO Train samples: total/307843, blocks/1
2023-05-15 16:42:50,169 P71318 INFO Loading data from h5: ../data/full_h5/smadex_val.h5
2023-05-15 16:45:41,474 P71318 INFO Validation samples: total/241205, blocks/1
2023-05-15 16:45:41,491 P71318 INFO Loading train and validation data done.


In [3]:
# Model initialization and fitting
model = DeepFM(feature_map, gpu=1, **params)
start_time = datetime.now()
model.fit(train_gen, validation_data=valid_gen, epochs=params['epochs'])
train_time = datetime.now() - start_time



gpu NOT available. False 1


2023-05-15 16:45:42,565 P71318 INFO Start training: 2406 batches/epoch
2023-05-15 16:45:42,565 P71318 INFO ************ Epoch=1 start ************


100%|█████████▉| 2405/2406 [18:44<00:00,  2.20it/s]

2023-05-15 17:04:28,035 P71318 INFO Train loss: 1.426142
2023-05-15 17:04:28,037 P71318 INFO Evaluation @epoch 1 - batch 2406: 


100%|██████████| 1885/1885 [01:51<00:00, 16.84it/s]

2023-05-15 17:06:20,088 P71318 INFO [Metrics] AUC: 0.839460
2023-05-15 17:06:20,089 P71318 INFO Save best model: monitor(max)=0.839460



100%|██████████| 2406/2406 [20:37<00:00,  1.94it/s]

2023-05-15 17:06:20,435 P71318 INFO ************ Epoch=1 end ************
2023-05-15 17:06:20,435 P71318 INFO Training finished.
2023-05-15 17:06:20,436 P71318 INFO Load best model: /Users/home/PycharmProjects/CTR_repos/FuxiCTR/demo/checkpoints/full_h5/full_h5_initial.model





In [4]:

start_time = datetime.now()
y_pred = model.predict(valid_gen)
pred_time = datetime.now() - start_time

with open(os.path.join(config_dir, 'model_config.yaml'), 'r') as cfg_file:
    model_config = cfg_file.read()

100%|██████████| 1885/1885 [01:48<00:00, 17.34it/s]


In [6]:
import numpy as np
pred_time = str(pred_time)
train_time = str(train_time)
y_true=[]
for batch_data in valid_gen:
    y_true.extend(model.get_labels(batch_data).data.cpu().numpy().reshape(-1))
y_true = np.array(y_true, np.float64)

from dump_results import dump_results

dump_results(y_true, y_pred,model_config, experiment_id, pred_time, train_time, model, model_type='DeepFM' )
