In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf 
import os 

from tqdm.autonotebook import tqdm 
import warnings
warnings.filterwarnings("ignore")



In [2]:
from utils import *

# 读取和处理数据

In [3]:
data = pd.read_csv("../data/criteo/criteo_data.csv")

In [4]:
data.head()

Unnamed: 0,Label,I1,I2,I3,I4,I5,I6,I7,I8,I9,...,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,0,1.0,1,5.0,0.0,1382.0,4.0,15.0,2.0,181.0,...,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16
1,0,2.0,0,44.0,1.0,102.0,8.0,2.0,2.0,4.0,...,07c540c4,b04e4670,21ddcdc9,5840adea,60f6221e,,3a171ecb,43f13e8b,e8b83407,731c3655
2,0,2.0,0,1.0,14.0,767.0,89.0,4.0,2.0,245.0,...,8efede7f,3412118d,,,e587c466,ad3062eb,3a171ecb,3b183c5c,,
3,0,,893,,,4392.0,,0.0,0.0,0.0,...,1e88c74f,74ef3502,,,6b3a5ca6,,3a171ecb,9117a34a,,
4,0,3.0,-1,,0.0,2.0,0.0,3.0,0.0,0.0,...,1e88c74f,26b3c7a7,,,21c9516a,,32c7478e,b34f3128,,


In [5]:
## 取出连续型特征和类别型特征
con = [f for f in data.columns if f.startswith("I")]
cat = [f for f in data.columns if f.startswith("C")]

In [6]:
# 定义fieldhandler对象
field_handler = FieldHandler(train_file_path="../data/criteo/criteo_data.csv", continuation_columns=con,
                           category_columns=cat)

In [7]:
# 获取要输入的特征和标签值
features, labels = transformation_data(file_path="../data/criteo/criteo_data.csv",
                                      field_hander=field_handler, label="Label")

cat:  C1
cat:  C2
cat:  C3
cat:  C4
cat:  C5
cat:  C6
cat:  C7
cat:  C8
cat:  C9
cat:  C10
cat:  C11
cat:  C12
cat:  C13
cat:  C14
cat:  C15
cat:  C16
cat:  C17
cat:  C18
cat:  C19
cat:  C20
cat:  C21
cat:  C22
cat:  C23
cat:  C24
cat:  C25
cat:  C26
con:  I1
con:  I2
con:  I3
con:  I4
con:  I5
con:  I6
con:  I7
con:  I8
con:  I9
con:  I10
con:  I11
con:  I12
con:  I13


# 构造模型

## 定义基础配置

In [8]:
class Config(dict):
    def __init__(self, field_handler):
        ## 设置参数
        ## 模型参数
        self.use_deep = False
        self.model = "fm"  # fm和ffm可选
        self.deep_input_keep_dropout = 0.9 
        self.deep_mid_keep_dropout = 0.8 
        self.deep_output_keep_dropout = 0.9 
        self.line_output_keep_dropout = 0.9 
        self.fm_output_keep_dropout = 0.9
        self.layers = [20, 20]
        
        ## 训练参数
        self.field_nums = 39
        self.feature_nums = field_handler.feature_nums
        self.lr = 0.01 
        self.embedding_size = 5 
        self.use_batch_norm = False
        self['num_epochs'] = 5 
        self.seed = 2019 
        
        
        self.activation = "relu"  # 可选tanh和sigmoid
        self.threshold = 0.5 
        
        ## 其他参数
        self.batch_size = 128
        self.max_to_keep = 5 
        self.eval = 1000 
        self.checkpoint = 1000         
        self["checkpoint_dir"] = "../model/FM/checkpoint"
        self['summary_dir'] = "../model/FM/summary"

## 定义基础模型

In [10]:
class Model(BaseModel):
    def __init__(self, config):
        super().__init__(config)  
        self.build_model()
        self.init_saver()
        
        
    def build_model(self):
        # 定义输入占位符
        self.df_i = tf.placeholder(tf.int64, [None, self.config.field_nums], name="df_i")
        self.df_v = tf.placeholder(tf.float32, [None, self.config.field_nums], name="df_v")
        self.labels = tf.placeholder(tf.float32, [None, 1], name="labels")
        
        if self.config.model == "fm":
            model = FM(self.config, self.df_i, self.df_v)
        elif self.config.model == "ffm":
            model = FFM(self.config, self.df_i, self.df_v)
        
        
        with tf.name_scope("output"):
            self.logits = model.logits
            ## 得到预测的概率值
            self.predictions = tf.cast(tf.sigmoid(self.logits),tf.float32,name="predictions")
        
        with tf.name_scope("loss"):
            losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, 
                                                            labels=tf.cast(tf.reshape(self.labels, [-1, 1]),
                                                                          dtype=tf.float32))
            
            self.loss = tf.reduce_mean(losses)
            
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.train_step = tf.train.AdamOptimizer(self.config.lr).minimize(self.loss,
                                                                             global_step=self.global_step_tensor)
    
    def init_saver(self):
        self.saver = tf.train.Saver(max_to_keep=self.config.max_to_keep)

## 定义训练类

In [11]:
class Trainer(BaseTrain):
    def __init__(self, sess, model, data, config, logger):
        '''
        data是列表的形式，data[0]表示训练集，data[1]表示验证集
        '''
        super().__init__(sess, model, data, config, logger)
        self.train = data[0]
        self.eval = data[1]
        
    def train_epoch(self):
        # 定义迭代次数
        num_iter_per_epoch = self.train.length // self.config.batch_size
        
        for _ in tqdm(range(num_iter_per_epoch)):
            loss, metrics, step = self.train_step()
            train_acc, train_f_score = metrics["accuracy"], metrics["f_score"]
            
            ## 将训练过程的损失写入
            summaries_dict = {"loss": loss,
                             "acc": np.array(train_acc), 
                             "f_score": np.array(train_f_score)}
            self.logger.summarize(step, summarizer="train", scope="train_summary",
                                 summaries_dict=summaries_dict)
            
            if step % self.config.eval == 0: 
                print("Train - Step: {} | Loss: {} | Acc: {} | F1_Score: {}".format(
                    step, loss, train_acc, train_f_score))
                # 对测试测试集进行评估
                eval_losses = []
                eval_pred = []
                eval_true = []
                for batchEval in self.eval.iter_all(self.config.batch_size):
                    loss, predictions = self.eval_step(batchEval[0], batchEval[1], batchEval[2])
                    eval_losses.append(loss)
                    eval_pred.extend(predictions)
                    eval_true.extend(batchEval[2])
                getMetric = Metric(np.array(eval_pred), np.array(eval_true), self.config)
                metrics = getMetric.get_metrics()
                acc_mean = np.round(metrics["accuracy"], 5)
                gini_mean = np.round(metrics["gini_norm"], 5)
                loss_mean = np.round(np.mean(eval_losses), 5)
                print("Eval | Loss: {} | Accuracy: {} | Gini: {}".format(
                    loss_mean, acc_mean, gini_mean))
                summaries_dict = {"loss": np.array(loss_mean), 
                                 "accuracy": np.array(acc_mean), 
                                 "gini": np.array(gini_mean)}
                self.logger.summarize(step, summarizer="test", scope="test_summary",
                                     summaries_dict=summaries_dict)
            if step % self.config.checkpoint == 0: 
                self.model.save(self.sess)
    
    def train_step(self):
        batch_i, batch_v, batch_y = next(self.train.next_batch(self.config.batch_size))
        
        #####################################################
        # 重新定义dropout
        self.model.config.deep_input_keep_dropout = 0.9 
        self.model.config.deep_mid_keep_dropout = 0.8 
        self.model.config.deep_output_keep_dropout = 0.9 
        self.model.config.line_output_keep_dropout = 0.9 
        self.model.config.fm_output_keep_dropout = 0.9
        #######################################################
        
        feed_dict = {self.model.df_i: batch_i, self.model.df_v: batch_v,
                    self.model.labels: batch_y}
        _, loss, predictions, step = self.sess.run([self.model.train_step, 
                                                   self.model.loss, 
                                                   self.model.predictions,
                                                   self.model.global_step_tensor],
                                                  feed_dict=feed_dict)
        getMetric = Metric(predictions, batch_y, self.config)
        metrics = getMetric.get_metrics()
        
        return loss, metrics, step
    
    def eval_step(self, batch_i, batch_v, batch_y):
        """
        使用验证集测试
        """
        #####################################################
        # 重新定义dropout
        self.model.config.deep_input_keep_dropout = 1.0 
        self.model.config.deep_mid_keep_dropout = 1.0 
        self.model.config.deep_output_keep_dropout = 1.0 
        self.model.config.line_output_keep_dropout = 1.0 
        self.model.config.fm_output_keep_dropout = 1.0
        #######################################################
        
        feed_dict = {self.model.df_i: batch_i, self.model.df_v: batch_v,
                    self.model.labels: batch_y}
        loss, predictions = self.sess.run([self.model.loss, self.model.predictions],
                                         feed_dict=feed_dict)
        return loss, predictions

# 训练过程

In [12]:
def create_dirs(dirs):
    try:
        for dir_ in dirs: 
            if not os.path.exists(dir_):
                os.makedirs(dir_)
        return 0 
    except Exception as e:
        print("Creating directories error: {}".format(e))
        exit(-1)

## 使用FM

In [13]:
# 划分数据集
train_idx = slice(0, int(len(labels)*0.8))
val_idx = slice(int(len(labels)*0.8), int(len(labels)))

train_df_i, train_df_v, train_df_y = features["df_i"][train_idx], features["df_v"][train_idx], labels[train_idx]
val_df_i, val_df_v, val_df_y = features["df_i"][val_idx], features["df_v"][val_idx], labels[val_idx]

In [14]:
train = DataGenerator(train_df_y, df_i=train_df_i, df_v = train_df_v)
val = DataGenerator(val_df_y, df_i=val_df_i, df_v=val_df_v)

In [22]:
def main():
    config = Config(field_handler)
    config['num_epochs'] = 4
    
    create_dirs([config['summary_dir'], config['checkpoint_dir']])
    tf.reset_default_graph()
    sess = tf.Session()
    model = Model(config)

    pack_data = [train, val]
    logger = Logger(sess, config)
    trainer = Trainer(sess, model, pack_data, config, logger)
    trainer.train_all()

In [23]:
main()


当前正处于第1次迭代


HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 1000 | Loss: 2.843748092651367 | Acc: 0.71875 | F1_Score: 0.4
Eval | Loss: 3.6068599224090576 | Accuracy: 0.67274 | Gini: 0.21771
Saving model...
Model saved
Train - Step: 2000 | Loss: 2.1741790771484375 | Acc: 0.71094 | F1_Score: 0.4127
Eval | Loss: 2.8014800548553467 | Accuracy: 0.68362 | Gini: 0.24614
Saving model...
Model saved
Train - Step: 3000 | Loss: 1.608072280883789 | Acc: 0.76562 | F1_Score: 0.55882
Eval | Loss: 2.516469955444336 | Accuracy: 0.68789 | Gini: 0.25821
Saving model...
Model saved
Train - Step: 4000 | Loss: 1.2308402061462402 | Acc: 0.77344 | F1_Score: 0.45283
Eval | Loss: 2.3469998836517334 | Accuracy: 0.68445 | Gini: 0.25876
Saving model...
Model saved
Train - Step: 5000 | Loss: 2.0178942680358887 | Acc: 0.65625 | F1_Score: 0.38889
Eval | Loss: 2.3405799865722656 | Accuracy: 0.68592 | Gini: 0.25957
Saving model...
Model saved
Train - Step: 6000 | Loss: 1.3102518320083618 | Acc: 0.77344 | F1_Score: 0.56716
Eval | Loss: 2.291599988937378 | Accuracy:

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 7000 | Loss: 1.4917993545532227 | Acc: 0.77344 | F1_Score: 0.50847
Eval | Loss: 2.2232799530029297 | Accuracy: 0.68946 | Gini: 0.25929
Saving model...
Model saved
Train - Step: 8000 | Loss: 1.2748816013336182 | Acc: 0.77344 | F1_Score: 0.59155
Eval | Loss: 2.22514009475708 | Accuracy: 0.69567 | Gini: 0.25955
Saving model...
Model saved
Train - Step: 9000 | Loss: 1.349900484085083 | Acc: 0.74219 | F1_Score: 0.44068
Eval | Loss: 2.1889400482177734 | Accuracy: 0.6809 | Gini: 0.26281
Saving model...
Model saved
Train - Step: 10000 | Loss: 0.588290274143219 | Acc: 0.82812 | F1_Score: 0.60714
Eval | Loss: 2.1901400089263916 | Accuracy: 0.68746 | Gini: 0.26089
Saving model...
Model saved
Train - Step: 11000 | Loss: 1.0311897993087769 | Acc: 0.83594 | F1_Score: 0.63158
Eval | Loss: 2.165800094604492 | Accuracy: 0.69102 | Gini: 0.26242
Saving model...
Model saved
Train - Step: 12000 | Loss: 0.7797985076904297 | Acc: 0.84375 | F1_Score: 0.67742
Eval | Loss: 2.1755499839782715 | Acc

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 13000 | Loss: 0.9808182716369629 | Acc: 0.80469 | F1_Score: 0.59016
Eval | Loss: 2.1798601150512695 | Accuracy: 0.69346 | Gini: 0.26678
Saving model...
Model saved
Train - Step: 14000 | Loss: 0.6256831884384155 | Acc: 0.82031 | F1_Score: 0.64615
Eval | Loss: 2.1508700847625732 | Accuracy: 0.69184 | Gini: 0.26643
Saving model...
Model saved
Train - Step: 15000 | Loss: 1.1319273710250854 | Acc: 0.75 | F1_Score: 0.51515
Eval | Loss: 2.1723899841308594 | Accuracy: 0.68795 | Gini: 0.25702
Saving model...
Model saved
Train - Step: 16000 | Loss: 1.1819818019866943 | Acc: 0.75 | F1_Score: 0.40741
Eval | Loss: 2.1847100257873535 | Accuracy: 0.68586 | Gini: 0.25853
Saving model...
Model saved
Train - Step: 17000 | Loss: 0.8569005727767944 | Acc: 0.79688 | F1_Score: 0.51852
Eval | Loss: 2.205310106277466 | Accuracy: 0.69638 | Gini: 0.26599
Saving model...
Model saved
Train - Step: 18000 | Loss: 1.0183281898498535 | Acc: 0.75781 | F1_Score: 0.53731
Eval | Loss: 2.187380075454712 | Ac

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 19000 | Loss: 0.8229737281799316 | Acc: 0.75781 | F1_Score: 0.56338
Eval | Loss: 2.252729892730713 | Accuracy: 0.68994 | Gini: 0.25784
Saving model...
Model saved
Train - Step: 20000 | Loss: 1.1135423183441162 | Acc: 0.75 | F1_Score: 0.48387
Eval | Loss: 2.222640037536621 | Accuracy: 0.68914 | Gini: 0.25884
Saving model...
Model saved
Train - Step: 21000 | Loss: 0.9212350249290466 | Acc: 0.84375 | F1_Score: 0.61538
Eval | Loss: 2.2360999584198 | Accuracy: 0.68886 | Gini: 0.25451
Saving model...
Model saved
Train - Step: 22000 | Loss: 0.4012300670146942 | Acc: 0.875 | F1_Score: 0.76471
Eval | Loss: 2.26882004737854 | Accuracy: 0.68136 | Gini: 0.25109
Saving model...
Model saved
Train - Step: 23000 | Loss: 0.645366370677948 | Acc: 0.84375 | F1_Score: 0.64286
Eval | Loss: 2.2310800552368164 | Accuracy: 0.69132 | Gini: 0.25892
Saving model...
Model saved
Train - Step: 24000 | Loss: 0.36616775393486023 | Acc: 0.9375 | F1_Score: 0.88235
Eval | Loss: 2.2476799488067627 | Accurac

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 26000 | Loss: 0.6189393997192383 | Acc: 0.80469 | F1_Score: 0.63768
Eval | Loss: 2.263309955596924 | Accuracy: 0.69127 | Gini: 0.25439
Saving model...
Model saved
Train - Step: 27000 | Loss: 0.7212117910385132 | Acc: 0.89062 | F1_Score: 0.79412
Eval | Loss: 2.2644500732421875 | Accuracy: 0.68914 | Gini: 0.25256
Saving model...
Model saved
Train - Step: 28000 | Loss: 0.7462536096572876 | Acc: 0.84375 | F1_Score: 0.69697
Eval | Loss: 2.294990062713623 | Accuracy: 0.69116 | Gini: 0.25378
Saving model...
Model saved
Train - Step: 29000 | Loss: 0.5516830086708069 | Acc: 0.89062 | F1_Score: 0.75
Eval | Loss: 2.3077900409698486 | Accuracy: 0.6961 | Gini: 0.25338
Saving model...
Model saved
Train - Step: 30000 | Loss: 0.4610145688056946 | Acc: 0.88281 | F1_Score: 0.78261
Eval | Loss: 2.3410799503326416 | Accuracy: 0.68512 | Gini: 0.24818
Saving model...
Model saved
Train - Step: 31000 | Loss: 0.5864483714103699 | Acc: 0.79688 | F1_Score: 0.59375
Eval | Loss: 2.3536999225616455 | 

## 使用DeepFM

In [24]:
def main():
    config = Config(field_handler)
    config['summary_dir'] = "../model/DeepFM/summary"
    config["checkpoint_dir"] = "../model/DeepFM/checkpoint"
    config.model = "fm"
    config.use_deep = True
    config["num_epochs"] = 4
    
    create_dirs([config['summary_dir'], config['checkpoint_dir']])
    tf.reset_default_graph()
    sess = tf.Session()
    model = Model(config)

    pack_data = [train, val]
    logger = Logger(sess, config)
    trainer = Trainer(sess, model, pack_data, config, logger)
    trainer.train_all()

In [25]:
main()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.dense instead.

当前正处于第1次迭代


HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 1000 | Loss: 0.581747829914093 | Acc: 0.73438 | F1_Score: 0.26087
Eval | Loss: 0.49772000312805176 | Accuracy: 0.76687 | Gini: 0.46901
Saving model...
Model saved
Train - Step: 2000 | Loss: 0.48941609263420105 | Acc: 0.8125 | F1_Score: 0.53846
Eval | Loss: 0.493910014629364 | Accuracy: 0.76818 | Gini: 0.48269
Saving model...
Model saved
Train - Step: 3000 | Loss: 0.40516722202301025 | Acc: 0.82031 | F1_Score: 0.53061
Eval | Loss: 0.5389000177383423 | Accuracy: 0.75066 | Gini: 0.41398
Saving model...
Model saved
Train - Step: 4000 | Loss: 0.3600786328315735 | Acc: 0.84375 | F1_Score: 0.6
Eval | Loss: 0.49531999230384827 | Accuracy: 0.76842 | Gini: 0.49019
Saving model...
Model saved
Train - Step: 5000 | Loss: 0.4578368365764618 | Acc: 0.79688 | F1_Score: 0.45833
Eval | Loss: 0.5147299766540527 | Accuracy: 0.76698 | Gini: 0.48147
Saving model...
Model saved
Train - Step: 6000 | Loss: 0.4350472092628479 | Acc: 0.79688 | F1_Score: 0.48
Eval | Loss: 0.5027700066566467 | Accura

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 7000 | Loss: 0.46058157086372375 | Acc: 0.82031 | F1_Score: 0.59649
Eval | Loss: 0.5013399720191956 | Accuracy: 0.76744 | Gini: 0.48816
Saving model...
Model saved
Train - Step: 8000 | Loss: 0.3820863962173462 | Acc: 0.83594 | F1_Score: 0.55319
Eval | Loss: 0.5048999786376953 | Accuracy: 0.76422 | Gini: 0.47266
Saving model...
Model saved
Train - Step: 9000 | Loss: 0.4069592356681824 | Acc: 0.80469 | F1_Score: 0.5614
Eval | Loss: 0.5099300146102905 | Accuracy: 0.7629 | Gini: 0.45977
Saving model...
Model saved
Train - Step: 10000 | Loss: 0.43662944436073303 | Acc: 0.82031 | F1_Score: 0.56604
Eval | Loss: 0.5204799771308899 | Accuracy: 0.76193 | Gini: 0.46175
Saving model...
Model saved
Train - Step: 11000 | Loss: 0.42212411761283875 | Acc: 0.8125 | F1_Score: 0.6129
Eval | Loss: 0.5224199891090393 | Accuracy: 0.76186 | Gini: 0.46046
Saving model...
Model saved
Train - Step: 12000 | Loss: 0.28619515895843506 | Acc: 0.91406 | F1_Score: 0.76596
Eval | Loss: 0.5160899758338928

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 13000 | Loss: 0.3746969401836395 | Acc: 0.83594 | F1_Score: 0.64407
Eval | Loss: 0.5327200293540955 | Accuracy: 0.75678 | Gini: 0.45474
Saving model...
Model saved
Train - Step: 14000 | Loss: 0.3635534644126892 | Acc: 0.82031 | F1_Score: 0.53061
Eval | Loss: 0.518589973449707 | Accuracy: 0.7596 | Gini: 0.45193
Saving model...
Model saved
Train - Step: 15000 | Loss: 0.34279581904411316 | Acc: 0.83594 | F1_Score: 0.53333
Eval | Loss: 0.5327699780464172 | Accuracy: 0.76061 | Gini: 0.45727
Saving model...
Model saved
Train - Step: 16000 | Loss: 0.3331886827945709 | Acc: 0.85156 | F1_Score: 0.59574
Eval | Loss: 0.5227900147438049 | Accuracy: 0.75966 | Gini: 0.45849
Saving model...
Model saved
Train - Step: 17000 | Loss: 0.34837234020233154 | Acc: 0.84375 | F1_Score: 0.58333
Eval | Loss: 0.5496000051498413 | Accuracy: 0.75528 | Gini: 0.43057
Saving model...
Model saved
Train - Step: 18000 | Loss: 0.38770627975463867 | Acc: 0.82812 | F1_Score: 0.59259
Eval | Loss: 0.554170012474

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 19000 | Loss: 0.41824907064437866 | Acc: 0.79688 | F1_Score: 0.63889
Eval | Loss: 0.5507799983024597 | Accuracy: 0.74864 | Gini: 0.43594
Saving model...
Model saved
Train - Step: 20000 | Loss: 0.3997281789779663 | Acc: 0.82031 | F1_Score: 0.58182
Eval | Loss: 0.6031799912452698 | Accuracy: 0.73928 | Gini: 0.43306
Saving model...
Model saved
Train - Step: 21000 | Loss: 0.36520686745643616 | Acc: 0.83594 | F1_Score: 0.67692
Eval | Loss: 0.5749899744987488 | Accuracy: 0.74745 | Gini: 0.43242
Saving model...
Model saved
Train - Step: 22000 | Loss: 0.38950446248054504 | Acc: 0.82812 | F1_Score: 0.7027
Eval | Loss: 0.5671200156211853 | Accuracy: 0.74701 | Gini: 0.43944
Saving model...
Model saved
Train - Step: 23000 | Loss: 0.41431522369384766 | Acc: 0.8125 | F1_Score: 0.6129
Eval | Loss: 0.6001600027084351 | Accuracy: 0.73604 | Gini: 0.41352
Saving model...
Model saved
Train - Step: 24000 | Loss: 0.2607157230377197 | Acc: 0.89062 | F1_Score: 0.76667
Eval | Loss: 0.586619973182

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

Train - Step: 26000 | Loss: 0.30157357454299927 | Acc: 0.82031 | F1_Score: 0.59649
Eval | Loss: 0.5966799855232239 | Accuracy: 0.74396 | Gini: 0.4023
Saving model...
Model saved
Train - Step: 27000 | Loss: 0.409065306186676 | Acc: 0.82812 | F1_Score: 0.60714
Eval | Loss: 0.582889974117279 | Accuracy: 0.74978 | Gini: 0.40913
Saving model...
Model saved
Train - Step: 28000 | Loss: 0.2873910963535309 | Acc: 0.875 | F1_Score: 0.66667
Eval | Loss: 0.54066002368927 | Accuracy: 0.75663 | Gini: 0.43325
Saving model...
Model saved
Train - Step: 29000 | Loss: 0.31948184967041016 | Acc: 0.85938 | F1_Score: 0.71875
Eval | Loss: 0.5696099996566772 | Accuracy: 0.7462 | Gini: 0.41049
Saving model...
Model saved
Train - Step: 30000 | Loss: 0.29437991976737976 | Acc: 0.83594 | F1_Score: 0.66667
Eval | Loss: 0.6128299832344055 | Accuracy: 0.73604 | Gini: 0.40549
Saving model...
Model saved
Train - Step: 31000 | Loss: 0.40698811411857605 | Acc: 0.83594 | F1_Score: 0.53333
Eval | Loss: 0.6116799712181091 

## 使用FFM

In [15]:
def main():
    config = Config(field_handler)
    config['summary_dir'] = "../model/FFM/summary"
    config["checkpoint_dir"] = "../model/FFM/checkpoint"
    config.model = "ffm"

    config["num_epochs"] = 1
    
    create_dirs([config['summary_dir'], config['checkpoint_dir']])
    tf.reset_default_graph()
    sess = tf.Session()
    model = Model(config)

    pack_data = [train, val]
    logger = Logger(sess, config)
    trainer = Trainer(sess, model, pack_data, config, logger)
    trainer.train_all()

In [None]:
main()

Instructions for updating:
Colocations handled automatically by placer.
正在使用FFM进行预测...
当前将第0个特征和第1个特征进行组合...
当前将第0个特征和第2个特征进行组合...
当前将第0个特征和第3个特征进行组合...
当前将第0个特征和第4个特征进行组合...
当前将第0个特征和第5个特征进行组合...
当前将第0个特征和第6个特征进行组合...
当前将第0个特征和第7个特征进行组合...
当前将第0个特征和第8个特征进行组合...
当前将第0个特征和第9个特征进行组合...
当前将第0个特征和第10个特征进行组合...
当前将第0个特征和第11个特征进行组合...
当前将第0个特征和第12个特征进行组合...
当前将第0个特征和第13个特征进行组合...
当前将第0个特征和第14个特征进行组合...
当前将第0个特征和第15个特征进行组合...
当前将第0个特征和第16个特征进行组合...
当前将第0个特征和第17个特征进行组合...
当前将第0个特征和第18个特征进行组合...
当前将第0个特征和第19个特征进行组合...
当前将第0个特征和第20个特征进行组合...
当前将第0个特征和第21个特征进行组合...
当前将第0个特征和第22个特征进行组合...
当前将第0个特征和第23个特征进行组合...
当前将第0个特征和第24个特征进行组合...
当前将第0个特征和第25个特征进行组合...
当前将第0个特征和第26个特征进行组合...
当前将第0个特征和第27个特征进行组合...
当前将第0个特征和第28个特征进行组合...
当前将第0个特征和第29个特征进行组合...
当前将第0个特征和第30个特征进行组合...
当前将第0个特征和第31个特征进行组合...
当前将第0个特征和第32个特征进行组合...
当前将第0个特征和第33个特征进行组合...
当前将第0个特征和第34个特征进行组合...
当前将第0个特征和第35个特征进行组合...
当前将第0个特征和第36个特征进行组合...
当前将第0个特征和第37个特征进行组合...
当前将第0个特征和第38个特征进行组合...
当前将第1个特征和第2个特征进行组合...
当前将第1个特征和第3个特征进行组合...
当前将第

当前将第10个特征和第30个特征进行组合...
当前将第10个特征和第31个特征进行组合...
当前将第10个特征和第32个特征进行组合...
当前将第10个特征和第33个特征进行组合...
当前将第10个特征和第34个特征进行组合...
当前将第10个特征和第35个特征进行组合...
当前将第10个特征和第36个特征进行组合...
当前将第10个特征和第37个特征进行组合...
当前将第10个特征和第38个特征进行组合...
当前将第11个特征和第12个特征进行组合...
当前将第11个特征和第13个特征进行组合...
当前将第11个特征和第14个特征进行组合...
当前将第11个特征和第15个特征进行组合...
当前将第11个特征和第16个特征进行组合...
当前将第11个特征和第17个特征进行组合...
当前将第11个特征和第18个特征进行组合...
当前将第11个特征和第19个特征进行组合...
当前将第11个特征和第20个特征进行组合...
当前将第11个特征和第21个特征进行组合...
当前将第11个特征和第22个特征进行组合...
当前将第11个特征和第23个特征进行组合...
当前将第11个特征和第24个特征进行组合...
当前将第11个特征和第25个特征进行组合...
当前将第11个特征和第26个特征进行组合...
当前将第11个特征和第27个特征进行组合...
当前将第11个特征和第28个特征进行组合...
当前将第11个特征和第29个特征进行组合...
当前将第11个特征和第30个特征进行组合...
当前将第11个特征和第31个特征进行组合...
当前将第11个特征和第32个特征进行组合...
当前将第11个特征和第33个特征进行组合...
当前将第11个特征和第34个特征进行组合...
当前将第11个特征和第35个特征进行组合...
当前将第11个特征和第36个特征进行组合...
当前将第11个特征和第37个特征进行组合...
当前将第11个特征和第38个特征进行组合...
当前将第12个特征和第13个特征进行组合...
当前将第12个特征和第14个特征进行组合...
当前将第12个特征和第15个特征进行组合...
当前将第12个特征和第16个特征进行组合...
当前将第12个特征和第17个特征进行组合...
当前将第12个特征和第18个特征

当前将第29个特征和第36个特征进行组合...
当前将第29个特征和第37个特征进行组合...
当前将第29个特征和第38个特征进行组合...
当前将第30个特征和第31个特征进行组合...
当前将第30个特征和第32个特征进行组合...
当前将第30个特征和第33个特征进行组合...
当前将第30个特征和第34个特征进行组合...
当前将第30个特征和第35个特征进行组合...
当前将第30个特征和第36个特征进行组合...
当前将第30个特征和第37个特征进行组合...
当前将第30个特征和第38个特征进行组合...
当前将第31个特征和第32个特征进行组合...
当前将第31个特征和第33个特征进行组合...
当前将第31个特征和第34个特征进行组合...
当前将第31个特征和第35个特征进行组合...
当前将第31个特征和第36个特征进行组合...
当前将第31个特征和第37个特征进行组合...
当前将第31个特征和第38个特征进行组合...
当前将第32个特征和第33个特征进行组合...
当前将第32个特征和第34个特征进行组合...
当前将第32个特征和第35个特征进行组合...
当前将第32个特征和第36个特征进行组合...
当前将第32个特征和第37个特征进行组合...
当前将第32个特征和第38个特征进行组合...
当前将第33个特征和第34个特征进行组合...
当前将第33个特征和第35个特征进行组合...
当前将第33个特征和第36个特征进行组合...
当前将第33个特征和第37个特征进行组合...
当前将第33个特征和第38个特征进行组合...
当前将第34个特征和第35个特征进行组合...
当前将第34个特征和第36个特征进行组合...
当前将第34个特征和第37个特征进行组合...
当前将第34个特征和第38个特征进行组合...
当前将第35个特征和第36个特征进行组合...
当前将第35个特征和第37个特征进行组合...
当前将第35个特征和第38个特征进行组合...
当前将第36个特征和第37个特征进行组合...
当前将第36个特征和第38个特征进行组合...
当前将第37个特征和第38个特征进行组合...
特征组合完成，得到FFM结果...
得到预测结果...
Instructions for updating:
Use tf.ca

HBox(children=(IntProgress(value=0, max=6250), HTML(value='')))

正在进行第一步训练...


速度太慢了，跑不下去