In [1]:
import os
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from scipy.stats import entropy, kurtosis
import xgboost as xgb
import seaborn as sns
from xgboost import plot_importance
import warnings
import matplotlib.pyplot as plt
import pandas as pd
from math import *
import numpy as np
import tensorflow as tf
import time

pd.set_option('display.max_columns', None)
from IPython.display import display

warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
pathf = os.path.join("..", "data", "particles")
model_path = os.path.join(pathf, "model")
log_path = os.path.join(pathf, "model")
trainpd = pd.read_csv(os.path.join(pathf, "train.csv"))
print(trainpd.head(1))
trainshape = trainpd.shape
print(trainshape)
eventpd = pd.read_csv(os.path.join(pathf, "event.csv"))
print(eventpd.head(1))
print(eventpd.shape)
testpd = pd.read_csv(os.path.join(pathf, "test.csv"))
testshape = testpd.shape
print(testpd.head(1))
print(testpd.shape)

data = pd.concat([trainpd, testpd], ignore_index=True)
data = pd.merge(data, eventpd, on='event_id', how='left')

       x      y  z        t   terror        q  flag  event_id  hit_id
0 -142.5 -147.5  0  767.879  2.02966  1.05052     0         7       1
(9473201, 9)
   event_id  nhit  nhitreal  energymc  thetamc    phimc   xcmc    ycmc
0         7   426        70   48348.9  63.1686  11.0982 -40.83  114.03
(13315, 8)
       x      y  z        t  terror        q  event_id  hit_id
0 -142.5 -127.5  0  848.061  1.9984  1.15067         9       1
(4086511, 8)


In [3]:
# (k(q,mc)*(t0+l))^2 + dis^2 -dis*cos(phi)*sin(thmc)*(t0+l) = (t+l)^2
data['fx'] = data['x'] - data['xcmc']
data['fy'] = data['y'] - data['ycmc']
data['phimc'] = data['phimc'] * np.pi / 180.
data['fphi'] = np.arctan2(data['fy'], data['fx']) - data['phimc']
data['fdis'] = np.sqrt(data['fx'] ** 2 + data['fy'] ** 2)
data['thetamc'] = data['thetamc'] * np.pi / 180.
data['fsinthmc'] = np.sin(data['thetamc'])
data['fcosphi'] = np.cos(data['fphi'])

data['ft2'] = data['t'] ** 2
data['fdis2'] = data['fdis'] ** 2
data['fsencond'] = data['fdis'] * data['fcosphi'] * data['fsinthmc']

data['fttrue'] = data['t'] / data['terror']
data['nhitratio'] = data['nhit'] / data['nhitreal']

data['fenergymc2'] = data['energymc'] ** 2

del data['fx']
del data['fy']
del data['x']
del data['y']
del data['z']


In [4]:
info_new = pd.DataFrame()
info_new["event_id"] = data.groupby(["event_id"])["event_id"].mean()
info_new["fdis_mean"] = data.groupby(["event_id"])["fdis"].mean()
info_new["fdis_std"] = data.groupby(["event_id"])["fdis"].std()
info_new["fdis_stdmean"] = info_new["fdis_std"] / info_new["fdis_mean"]
info_new["ft_mean"] = data.groupby(["event_id"])["t"].mean()
info_new["ft_std"] = data.groupby(["event_id"])["t"].std()
info_new["ft_stdmean"] = info_new["ft_std"] / info_new["ft_mean"]
info_new["ft_mean2"] = info_new["ft_mean"] ** 2
info_new.reset_index(drop=True, inplace=True)
data = pd.merge(data, info_new, on='event_id', how='left')

data['fsencond2'] = data['fsencond'] * data['ft_mean']

In [5]:
trainpd = data[:trainshape[0]].reset_index()
testpd = data[trainshape[0]:].reset_index()
del data

In [6]:
print(trainpd.columns)
feature = [x for x in trainpd.columns if x not in ['flag', 'index', 'hit_id', 'event_id']]
labels = trainpd['flag']
del trainpd['flag']
del testpd['flag']

Index(['index', 'event_id', 'flag', 'hit_id', 'q', 't', 'terror', 'nhit',
       'nhitreal', 'energymc', 'thetamc', 'phimc', 'xcmc', 'ycmc', 'fx', 'fy',
       'fdis', 'fsinth', 'fcosth', 'fphi', 'fsinphi', 'fcosphi', 'fttrue',
       'nhitratio', 'vfsinphi', 'vfcosphi', 'vfdis', 'vfsinth', 'vfcosth',
       'fdis_stdmean'],
      dtype='object')


In [7]:
def batch_iter_list(data_list, batch_size, num_epochs, shuffle=True):
    data_size = len(data_list[0])
    num_batches_per_epoch = data_size // batch_size  # 每个epoch中包含的batch数量
    for epoch in range(num_epochs):
        # 每个epoch是否进行shuflle
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data_list = [data[shuffle_indices] for data in data_list]
        else:
            shuffled_data_list = data_list

        for batch_num in range(num_batches_per_epoch + 1):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield [shuffled_data[start_index:end_index] for shuffled_data in shuffled_data_list]

class AbstractModeltensor(object):
    def __init__(self, config=None):
        self.config = config

    # You need to override this method.
    def buildModel(self):
        raise NotImplementedError("You need to implement your own model.")


class NeurousNet(AbstractModeltensor):
    def __init__(self, xlenth, config=None):
        super(NeurousNet, self).__init__(config)
        self.graph = tf.Graph()  # 为每个类(实例)单独创建一个graph
        self.modeldic = {
            "cnn_dense_less": self._cnn_dense_less_model,
            "nomul_model": self._nomul_model,
        }
        self.ydim = 1
        self.keep_prob_ph = config["dropout"]
        self.input_dim = xlenth
        self.out_dim = 1
        with self.graph.as_default():
            with tf.name_scope('Inputs'):
                self.input_p = tf.placeholder(tf.float32, [None, self.input_dim])
                self.learn_rate_p = tf.placeholder(dtype=tf.float32, shape=[], name="lr")
                self.lr_decay = tf.placeholder(dtype=tf.float32, shape=[])
            with tf.name_scope('Outputs'):
                self.target_y = tf.placeholder(dtype=tf.float32, shape=[None, self.out_dim])

    def buildModel(self):
        tf.reset_default_graph()
        with self.graph.as_default():
            # 不同选择加载
            self.modeldic[self.config["modelname"]]()
            # 打印打包
            self.merged = tf.summary.merge_all()
            # 损失目标
            tvars = tf.trainable_variables()  # 返回需要训练的variable
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.train_list, tvars), 2)
            grads_and_vars = tuple(zip(grads, tvars))
            self.train_op = tf.train.AdamOptimizer(self.learn_rate_p).apply_gradients(grads_and_vars)
            #             self.train_op = []
            #             for i2 in self.train_list:
            #                 self.train_op.append(tf.train.AdamOptimizer(self.learn_rate_p).minimize(i2))
            # 同一保存加载
            self.saver = tf.train.Saver(tf.global_variables())
            # [print(n.name) for n in tf.get_default_graph().as_graph_def().node]
            # return self.saver

    def _cnn_dense_less_model(self):
        with self.graph.as_default():
            # 部分1，预测值
            dense1 = tf.layers.dense(inputs=self.input_p, units=self.input_dim, activation=tf.nn.softmax,
                                     name="layer_dense1")
            tf.summary.histogram('dense1', dense1)  # 记录标量的变化
            mult_layer1 = tf.nn.softmax(dense1 * self.input_p, name='mult_layer1')
            mult_layer2 = tf.nn.softmax(mult_layer1 * self.input_p, name='mult_layer2')
            concat1 = tf.concat([self.input_p, dense1, mult_layer1, mult_layer2], 1, name='concat1')
            tf.summary.histogram('concat1', concat1)  # 记录标量的变化
            denseo1 = tf.nn.dropout(concat1, keep_prob=self.keep_prob_ph)
            denseo2 = tf.layers.dense(inputs=denseo1, units=self.input_dim, activation=tf.nn.elu, name="layer_dense2")
            denseo3 = tf.layers.dense(inputs=denseo2, units=self.input_dim // 4, activation=tf.nn.elu,
                                      name="layer_dense3")
            y_res_t = tf.layers.dense(inputs=denseo3, units=self.out_dim, activation=None)
            y_res_v = tf.nn.sigmoid(y_res_t, name="y_res_v")
            tf.summary.histogram('y_res_v', y_res_v)  # 记录标量的变化
            # 损失返回值
            y_los = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_res_t, labels=self.target_y, name="y_los")
            y_loss_t = tf.reduce_mean(y_los, name="y_loss_t")
            y_loss_v = tf.add(y_loss_t, 0, name="y_loss_v")

            one = tf.ones_like(y_res_t)
            zero = tf.zeros_like(y_res_t)
            label_bool = tf.where(y_res_t < 0.5, x=zero, y=one)
            self.auc_value, self.auc_op = tf.metrics.auc(self.target_y, label_bool, num_thresholds=4000)
            # 猜错的获取 实际盈利值的负数
            self.train_list = [y_loss_t]
            self.valid_list = [y_loss_v]
            self.pred_list = [y_res_v]
            # 打印信息
            tf.summary.scalar('y_loss_t', y_loss_t)  # 记录标量的变化
            tf.summary.scalar('y_loss_v', y_loss_v)  # 记录标量的变化
            tf.summary.histogram('mult_layer1', mult_layer1)  # 记录标量的变化
            tf.summary.histogram('mult_layer2', mult_layer2)  # 记录标量的变化

            tf.summary.scalar('lr', self.learn_rate_p)  # 记录标量的变化
            return None

    def _nomul_model(self):
        with self.graph.as_default():
            # 部分1，预测值
            dense1 = tf.layers.dense(inputs=self.input_p, units=self.input_dim, activation=tf.nn.softmax,
                                     name="layer_dense1")
            tf.summary.histogram('dense1', dense1)  # 记录标量的变化
            dense2 = tf.layers.dense(inputs=dense1, units=self.input_dim, activation=tf.nn.elu, name="layer_dense2")
            dense3 = tf.layers.dense(inputs=dense2, units=self.input_dim, activation=tf.nn.elu, name="layer_dense3")
            dense4 = tf.layers.dense(inputs=dense3, units=self.input_dim, activation=tf.nn.elu, name="layer_dense4")
            dense5 = tf.layers.dense(inputs=dense4, units=self.input_dim, activation=tf.nn.elu, name="layer_dense5")
            dense6 = tf.layers.dense(inputs=dense5, units=self.input_dim, activation=tf.nn.elu, name="layer_dense6")
            dense7 = tf.layers.dense(inputs=dense6, units=self.input_dim, activation=tf.nn.elu, name="layer_dense7")
            dense8 = tf.layers.dense(inputs=dense7, units=self.input_dim, activation=tf.nn.elu, name="layer_dense8")
            concat1 = tf.concat([self.input_p, dense1, dense2, dense3, dense4, dense5, dense6, dense7, dense8], 1,
                                name='concat1')
            tf.summary.histogram('concat1', concat1)  # 记录标量的变化
            denseo1 = tf.nn.dropout(concat1, keep_prob=self.keep_prob_ph)
            denseo2 = tf.layers.dense(inputs=denseo1, units=self.input_dim * 4, activation=tf.nn.elu,
                                      name="layer_denseo2")
            denseo3 = tf.layers.dense(inputs=denseo2, units=self.input_dim, activation=tf.nn.elu,
                                      name="layer_denseo3")
            denseo4 = tf.layers.dense(inputs=denseo3, units=self.input_dim // 4, activation=tf.nn.elu,
                                      name="layer_denseo4")
            y_res_t = tf.layers.dense(inputs=denseo4, units=self.out_dim, activation=None)
            y_res_v = tf.nn.sigmoid(y_res_t, name="y_res_v")
            tf.summary.histogram('y_res_v', y_res_v)  # 记录标量的变化
            # 损失返回值
            y_los = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_res_t, labels=self.target_y, name="y_los")
            y_loss_t = tf.reduce_mean(y_los, name="y_loss_t")
            y_loss_v = tf.add(y_loss_t, 0, name="y_loss_v")

            one = tf.ones_like(y_res_t)
            zero = tf.zeros_like(y_res_t)
            label_bool = tf.where(y_res_t < 0.5, x=zero, y=one)
            self.auc_value, self.auc_op = tf.metrics.auc(self.target_y, label_bool, num_thresholds=4000)
            # 猜错的获取 实际盈利值的负数
            self.train_list = [y_loss_t]
            self.valid_list = [y_loss_v]
            self.pred_list = [y_res_v]
            # 打印信息
            tf.summary.scalar('y_loss_t', y_loss_t)  # 记录标量的变化
            tf.summary.scalar('y_loss_v', y_loss_v)  # 记录标量的变化

            tf.summary.scalar('lr', self.learn_rate_p)  # 记录标量的变化
            return None

    def batch_train(self, trainpd, labels, batch_size=8, num_epochs=1, retrain=True):
        # 设置
        sess = tf.Session(graph=self.graph)
        with sess.as_default():
            with self.graph.as_default():
                if self.config["retrain"] == 1:
                    model_dir = os.path.join(model_path, "modelevery_%s" % self.config["tailname"])
                    latest_ckpt = tf.train.latest_checkpoint(model_dir)
                    if os.path.isfile("{}.index".format(latest_ckpt)):
                        self.saver.restore(sess, latest_ckpt)
                        sess.run(tf.local_variables_initializer())
                        print("retraining {}".format(latest_ckpt))
                    else:
                        sess.run(tf.global_variables_initializer())
                        sess.run(tf.local_variables_initializer())
                        print("no old model, training new----")
                writer = tf.summary.FileWriter(os.path.join(log_path, "logsevery_%s" % self.config["tailname"]),
                                               sess.graph)
                global_n = 0
                stop_n = 0
                startt = time.time()
                pre_t_base_loss = pre_t_much_loss = pre_v_much_loss = pre_v_base_loss = 100000

                n_splits = 5
                kf = KFold(n_splits=n_splits, shuffle=True, random_state=4389)
                for epoch in range(num_epochs):
                    if self.config["learn_rate"]>0.00001:
                        self.config["learn_rate"] *= 0.8
                    for train_index, valid_index in kf.split(trainpd):
                        inputs_t = np.array(trainpd[feature].iloc[train_index])
                        output_t = np.expand_dims(np.array(labels[train_index]),-1)
                        inputs_v = np.array(trainpd[feature].iloc[valid_index])
                        output_v = np.expand_dims(np.array(labels[valid_index]),-1)
                        dataiter = batch_iter_list([inputs_t,output_t], batch_size, num_epochs)
                        starte = time.time()
                        print("iter_trainnum", inputs_t.shape[0] // batch_size + 1)
                        redi = inputs_t.shape[0] % batch_size
                        lenth = inputs_t.shape[0] // batch_size
                        if 0 != redi:
                            lenth += 1
                        counter = 0
                        for batch_num in range(lenth):
                            # 获取数据
                            r_inputs_t,r_output_t = next(dataiter)
                            feed_dict_t = {
                                self.input_p: r_inputs_t,
                                self.target_y: r_output_t,
                                self.learn_rate_p: self.config["learn_rate"],
                                self.lr_decay: 1,
                            }
                            # 更新学习率
                            sess.run(self.train_op, feed_dict_t)
                            global_n += 1
                            losslist_t = sess.run(self.train_list, feed_dict_t)
                            sess.run(self.auc_op, feed_dict=feed_dict_t)
                            accu = sess.run(self.auc_value)
                            result = sess.run(self.merged, feed_dict_t)
                            if batch_num % 200 == 0:
                                writer.add_summary(result, global_n)
                                self.saver.save(sess,
                                                os.path.join(model_path, 'modelevery_%s' % self.config["tailname"],
                                                             self.config["modelfile"]), global_step=global_n)
                                print("epocht {}, batch_num {}, step {}, time: {} s, accu: {}, loss_yt: {}".format(
                                    epoch, batch_num, global_n, time.time() - starte, accu, *losslist_t))
                        # valid part
                        dataiterv = batch_iter_list([inputs_v,output_v], batch_size, num_epochs)
                        redi = inputs_v.shape[0] % batch_size
                        vnum_iter = inputs_v.shape[0] // batch_size
                        if 0 != redi:
                            vnum_iter += 1
                        counter = 0
                        print("iter_validnum", vnum_iter)
                        losslist_va = 0
                        accu_va = 0
                        dataiter = batch_iter_list([inputs_v,output_v], batch_size, num_epochs)
                        for batch_num in range(vnum_iter):
                            # 获取数据
                            r_inputs_v,r_output_v = next(dataiter)
                            feed_dict_v = {
                                self.input_p: r_inputs_v,
                                self.target_y: r_output_v,
                                self.learn_rate_p: self.config["learn_rate"],
                                self.lr_decay: 1,
                            }
                            losslist_v = sess.run(self.valid_list, feed_dict_v)
                            sess.run(self.auc_op, feed_dict=feed_dict_v)
                            accu = sess.run(self.auc_value)
                            losslist_va += losslist_v[0]
                            accu_va += accu
                        losslist_va /= vnum_iter
                        accu_va /= vnum_iter
                        result = sess.run(self.merged, feed_dict_v)
                        writer.add_summary(result, global_n)
                        if losslist_t[0] < pre_t_base_loss and losslist_va < pre_v_base_loss:
                            stop_n += 1
                            if stop_n > self.config["early_stop"]:
                                break
                            else:
                                self.saver.save(sess,
                                                os.path.join(model_path, 'modelevery_%s' % self.config["tailname"],
                                                             self.config["modelfile"]), global_step=global_n)
                        else:
                            stop_n = 0
                            self.saver.save(sess, os.path.join(model_path, 'modelevery_%s' % self.config["tailname"],
                                                               self.config["modelfile"]), global_step=global_n)
                        print("epochv {}, step {}, stop_n {}, time: {} s, accu_va: {}, loss_yv: {}".format(
                            epoch, global_n, stop_n, time.time() - starte, accu_va, losslist_va))
                        pre_t_base_loss = losslist_t[0]
                        pre_v_base_loss = losslist_va
                writer.close()
                print("total time: %s s" % (time.time() - startt))
        # 结束
        print("train finished!")
        return None

    def predict(self, inputs):
        model_dir = os.path.join(model_path, "modelevery_%s" % self.config["tailname"])
        print("loading model...")
        latest_ckpt = tf.train.latest_checkpoint(model_dir)

        sess = tf.Session(graph=self.graph)
        with sess.as_default():
            with self.graph.as_default():
                if os.path.isfile("{}.index".format(latest_ckpt)):
                    self.saver.restore(sess, latest_ckpt)
                else:
                    raise Exception("没有找到模型:{}".format(latest_ckpt))
                nplist = []
                oneiter = 2000
                redi = inputs.shape[0] % oneiter
                lenth = inputs.shape[0] // oneiter
                if 0 != redi:
                    lenth += 1
                counter = 0
                for num in range(lenth):
                    # 获取数据
                    startindex = num * oneiter
                    if num == lenth - 1 and redi != 0:
                        endindex = num * oneiter + redi
                    else:
                        endindex = (num + 1) * oneiter
                    tmppd = inputs.iloc[startindex:endindex][feature]
                    r_inputs_v = np.array(tmppd)
                    feed_dict = {
                        self.input_p: r_inputs_v,
                    }
                    teslis = sess.run(self.pred_list, feed_dict)
                    nplist.append(teslis)
                feed_dict = {
                    self.input_p: inputs,
                }
                teslist = np.concatenate(nplist, axis=1)
                return teslist


trainconfig = {
    "dropout": 0.5,
    "early_stop": 100,
#     "tailname": "nomul_modeltail",
#     "modelname": "nomul_model",
    "tailname": "mul_verse",
    "modelname": "cnn_dense_less",
    "modelfile": "v2",
    "learn_rate": 0.01,
    "retrain": 1
}
modelcrnn = NeurousNet(len(feature), config=trainconfig)
modelcrnn.buildModel()

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [None]:
# batch_size, num_epochs = 4096, 1000
batch_size, num_epochs = 512, 1000
print(trainpd.head())
globalstep = modelcrnn.batch_train(trainpd, labels, batch_size, num_epochs)

   index  event_id  hit_id          q         t   terror  nhit  nhitreal  \
0      0         7       1   1.050520  767.8790  2.02966   426        70   
1      1         7       2   0.999853  -70.5552  2.02966   426        70   
2      2         7       3   2.052540 -837.8410  1.85146   426        70   
3      3         7       4  19.513100 -973.1950  1.39994   426        70   
4      4         7       5   0.800334 -159.1400  2.02966   426        70   

   energymc  thetamc    phimc   xcmc    ycmc      fx      fy        fdis  \
0   48348.9  63.1686  11.0982 -40.83  114.03 -101.67 -261.53  280.597095   
1   48348.9  63.1686  11.0982 -40.83  114.03  -96.67 -266.53  283.519540   
2   48348.9  63.1686  11.0982 -40.83  114.03  -96.67 -246.53  264.805834   
3   48348.9  63.1686  11.0982 -40.83  114.03 -101.67 -231.53  252.869393   
4   48348.9  63.1686  11.0982 -40.83  114.03  -96.67 -231.53  250.900837   

     fsinth    fcosth      fphi   fsinphi   fcosphi      fttrue  nhitratio  \
0  0.892

epocht 0, batch_num 9400, step 9401, time: 163.67134022712708 s, accu: 0.8248606324195862, loss_yt: 0.34573012590408325
epocht 0, batch_num 9600, step 9601, time: 166.72716808319092 s, accu: 0.8255152702331543, loss_yt: 0.2683960497379303
epocht 0, batch_num 9800, step 9801, time: 169.83289766311646 s, accu: 0.8261365294456482, loss_yt: 0.3087097704410553
epocht 0, batch_num 10000, step 10001, time: 173.16594982147217 s, accu: 0.8265796899795532, loss_yt: 0.32787764072418213
epocht 0, batch_num 10200, step 10201, time: 176.2556881904602 s, accu: 0.8270098567008972, loss_yt: 0.38646483421325684
epocht 0, batch_num 10400, step 10401, time: 179.41327095031738 s, accu: 0.8274662494659424, loss_yt: 0.3124796450138092
epocht 0, batch_num 10600, step 10601, time: 182.48706555366516 s, accu: 0.8279534578323364, loss_yt: 0.3220323920249939
epocht 0, batch_num 10800, step 10801, time: 185.7363383769989 s, accu: 0.8284055590629578, loss_yt: 0.2865031361579895
epocht 0, batch_num 11000, step 11001

epocht 0, batch_num 7800, step 22603, time: 137.93930315971375 s, accu: 0.8477323651313782, loss_yt: 0.26592591404914856
epocht 0, batch_num 8000, step 22803, time: 141.09785676002502 s, accu: 0.847889244556427, loss_yt: 0.32570064067840576
epocht 0, batch_num 8200, step 23003, time: 144.40501356124878 s, accu: 0.8480260372161865, loss_yt: 0.32439878582954407
epocht 0, batch_num 8400, step 23203, time: 147.63139390945435 s, accu: 0.848183274269104, loss_yt: 0.3243264853954315
epocht 0, batch_num 8600, step 23403, time: 151.14499044418335 s, accu: 0.8483421206474304, loss_yt: 0.3092232942581177
epocht 0, batch_num 8800, step 23603, time: 154.55989003181458 s, accu: 0.848510205745697, loss_yt: 0.279360294342041
epocht 0, batch_num 9000, step 23803, time: 157.793212890625 s, accu: 0.8486669659614563, loss_yt: 0.2782863974571228
epocht 0, batch_num 9200, step 24003, time: 160.95376229286194 s, accu: 0.8488179445266724, loss_yt: 0.28636598587036133
epocht 0, batch_num 9400, step 24203, time

epocht 0, batch_num 6200, step 35805, time: 108.60123634338379 s, accu: 0.8556593656539917, loss_yt: 0.3121698796749115
epocht 0, batch_num 6400, step 36005, time: 112.04801917076111 s, accu: 0.8557577729225159, loss_yt: 0.24884596467018127
epocht 0, batch_num 6600, step 36205, time: 115.14174699783325 s, accu: 0.8558434844017029, loss_yt: 0.27199894189834595
epocht 0, batch_num 6800, step 36405, time: 118.23151278495789 s, accu: 0.855943500995636, loss_yt: 0.2650752067565918
epocht 0, batch_num 7000, step 36605, time: 121.40675163269043 s, accu: 0.8560405373573303, loss_yt: 0.28499794006347656
epocht 0, batch_num 7200, step 36805, time: 124.80067658424377 s, accu: 0.8561513423919678, loss_yt: 0.2623201906681061
epocht 0, batch_num 7400, step 37005, time: 128.22548532485962 s, accu: 0.8562653064727783, loss_yt: 0.2875611186027527
epocht 0, batch_num 7600, step 37205, time: 131.5665512084961 s, accu: 0.8563296794891357, loss_yt: 0.2658633291721344
epocht 0, batch_num 7800, step 37405, t

epocht 0, batch_num 4800, step 49207, time: 85.93022012710571 s, accu: 0.8614141941070557, loss_yt: 0.3409787714481354
epocht 0, batch_num 5000, step 49407, time: 89.12766861915588 s, accu: 0.8614843487739563, loss_yt: 0.27936074137687683
epocht 0, batch_num 5200, step 49607, time: 92.10668206214905 s, accu: 0.8615529537200928, loss_yt: 0.26920533180236816
epocht 0, batch_num 5400, step 49807, time: 95.33405208587646 s, accu: 0.8615960478782654, loss_yt: 0.2327905148267746
epocht 0, batch_num 5600, step 50007, time: 98.70503878593445 s, accu: 0.8616555333137512, loss_yt: 0.2992474436759949
epocht 0, batch_num 5800, step 50207, time: 101.96834564208984 s, accu: 0.8617175221443176, loss_yt: 0.3618507385253906
epocht 0, batch_num 6000, step 50407, time: 105.06602907180786 s, accu: 0.8617626428604126, loss_yt: 0.31552350521087646
epocht 0, batch_num 6200, step 50607, time: 108.08196473121643 s, accu: 0.8618224263191223, loss_yt: 0.21974018216133118
epocht 0, batch_num 6400, step 50807, tim

epocht 0, batch_num 3200, step 62409, time: 60.373520374298096 s, accu: 0.8653562068939209, loss_yt: 0.2703211009502411
epocht 0, batch_num 3400, step 62609, time: 63.60488271713257 s, accu: 0.8654029965400696, loss_yt: 0.2585136592388153
epocht 0, batch_num 3600, step 62809, time: 66.81432437896729 s, accu: 0.8654552698135376, loss_yt: 0.2443966269493103
epocht 0, batch_num 3800, step 63009, time: 70.11946129798889 s, accu: 0.8654957413673401, loss_yt: 0.23500649631023407
epocht 0, batch_num 4000, step 63209, time: 73.35780048370361 s, accu: 0.8655359745025635, loss_yt: 0.27146071195602417
epocht 0, batch_num 4200, step 63409, time: 76.67891883850098 s, accu: 0.8655788898468018, loss_yt: 0.26053690910339355
epocht 0, batch_num 4400, step 63609, time: 79.99109959602356 s, accu: 0.8656208515167236, loss_yt: 0.24989436566829681
epocht 0, batch_num 4600, step 63809, time: 82.98808097839355 s, accu: 0.8656437397003174, loss_yt: 0.2728290557861328
epocht 0, batch_num 4800, step 64009, time:

epocht 1, batch_num 1600, step 75611, time: 36.57519578933716 s, accu: 0.8660430312156677, loss_yt: 0.30058273673057556
epocht 1, batch_num 1800, step 75811, time: 39.87038493156433 s, accu: 0.8660626411437988, loss_yt: 0.2427511215209961
epocht 1, batch_num 2000, step 76011, time: 42.919262647628784 s, accu: 0.8661043643951416, loss_yt: 0.22428469359874725
epocht 1, batch_num 2200, step 76211, time: 46.014954805374146 s, accu: 0.8661428689956665, loss_yt: 0.24438323080539703
epocht 1, batch_num 2400, step 76411, time: 49.58943247795105 s, accu: 0.8661736845970154, loss_yt: 0.27414199709892273
epocht 1, batch_num 2600, step 76611, time: 52.74096894264221 s, accu: 0.8662055134773254, loss_yt: 0.2616119682788849
epocht 1, batch_num 2800, step 76811, time: 55.98133635520935 s, accu: 0.8662356734275818, loss_yt: 0.26608437299728394
epocht 1, batch_num 3000, step 77011, time: 59.18274426460266 s, accu: 0.866268515586853, loss_yt: 0.259907066822052
epocht 1, batch_num 3200, step 77211, time:

iter_trainnum 14802
epocht 1, batch_num 0, step 88813, time: 9.505575895309448 s, accu: 0.8689063191413879, loss_yt: 0.21974530816078186
epocht 1, batch_num 200, step 89013, time: 12.973304986953735 s, accu: 0.8689393997192383, loss_yt: 0.26399850845336914
epocht 1, batch_num 400, step 89213, time: 16.283453464508057 s, accu: 0.8689720034599304, loss_yt: 0.2834821045398712
epocht 1, batch_num 600, step 89413, time: 19.373223304748535 s, accu: 0.8690106868743896, loss_yt: 0.2646888196468353
epocht 1, batch_num 800, step 89613, time: 22.523805379867554 s, accu: 0.8690456748008728, loss_yt: 0.2991027235984802
epocht 1, batch_num 1000, step 89813, time: 25.692320108413696 s, accu: 0.8690794110298157, loss_yt: 0.24480682611465454
epocht 1, batch_num 1200, step 90013, time: 29.00842523574829 s, accu: 0.8691179752349854, loss_yt: 0.2688157558441162
epocht 1, batch_num 1400, step 90213, time: 32.065285205841064 s, accu: 0.8691517114639282, loss_yt: 0.2132166624069214
epocht 1, batch_num 1600, 

epocht 1, batch_num 13800, step 102613, time: 229.16306900978088 s, accu: 0.8707882165908813, loss_yt: 0.28867435455322266
epocht 1, batch_num 14000, step 102813, time: 232.33854460716248 s, accu: 0.8708097338676453, loss_yt: 0.26806968450546265
epocht 1, batch_num 14200, step 103013, time: 235.53103280067444 s, accu: 0.8708351850509644, loss_yt: 0.2924177050590515
epocht 1, batch_num 14400, step 103213, time: 238.9139904975891 s, accu: 0.8708558082580566, loss_yt: 0.26534655690193176
epocht 1, batch_num 14600, step 103413, time: 241.9977171421051 s, accu: 0.8708789944648743, loss_yt: 0.29542097449302673
epocht 1, batch_num 14800, step 103613, time: 245.00467443466187 s, accu: 0.8708940148353577, loss_yt: 0.28411564230918884
iter_validnum 3701
epochv 1, step 103614, stop_n 2, time: 271.6095678806305 s, accu_va: 0.8711548039185876, loss_yv: 0.26518468712591153
iter_trainnum 14802
epocht 1, batch_num 0, step 103615, time: 9.361968040466309 s, accu: 0.8714136481285095, loss_yt: 0.27158841

epocht 1, batch_num 12200, step 115815, time: 208.32753729820251 s, accu: 0.8726115226745605, loss_yt: 0.2765395939350128
epocht 1, batch_num 12400, step 116015, time: 211.48008179664612 s, accu: 0.872626543045044, loss_yt: 0.29547202587127686
epocht 1, batch_num 12600, step 116215, time: 214.55585741996765 s, accu: 0.8726393580436707, loss_yt: 0.340549111366272
epocht 1, batch_num 12800, step 116415, time: 217.76627445220947 s, accu: 0.8726555109024048, loss_yt: 0.30099666118621826
epocht 1, batch_num 13000, step 116615, time: 221.0255582332611 s, accu: 0.8726626634597778, loss_yt: 0.26294195652008057
epocht 1, batch_num 13200, step 116815, time: 224.41748809814453 s, accu: 0.8726800084114075, loss_yt: 0.2869107127189636
epocht 1, batch_num 13400, step 117015, time: 227.66882753372192 s, accu: 0.872697651386261, loss_yt: 0.3098662197589874
epocht 1, batch_num 13600, step 117215, time: 230.66581177711487 s, accu: 0.8727190494537354, loss_yt: 0.32089605927467346
epocht 1, batch_num 1380

epocht 1, batch_num 10600, step 129017, time: 182.39007711410522 s, accu: 0.8737886548042297, loss_yt: 0.3241809010505676
epocht 1, batch_num 10800, step 129217, time: 185.5905418395996 s, accu: 0.8738038539886475, loss_yt: 0.26461392641067505
epocht 1, batch_num 11000, step 129417, time: 189.13702917099 s, accu: 0.8738196492195129, loss_yt: 0.327557772397995
epocht 1, batch_num 11200, step 129617, time: 192.3544249534607 s, accu: 0.8738354444503784, loss_yt: 0.27171632647514343
epocht 1, batch_num 11400, step 129817, time: 195.43521976470947 s, accu: 0.8738511204719543, loss_yt: 0.27836012840270996
epocht 1, batch_num 11600, step 130017, time: 198.6286473274231 s, accu: 0.8738694190979004, loss_yt: 0.2313752919435501
epocht 1, batch_num 11800, step 130217, time: 201.98869466781616 s, accu: 0.8738833069801331, loss_yt: 0.25407928228378296
epocht 1, batch_num 12000, step 130417, time: 205.1961169242859 s, accu: 0.8738982677459717, loss_yt: 0.32672005891799927
epocht 1, batch_num 12200, 

epocht 1, batch_num 9000, step 142219, time: 157.81499814987183 s, accu: 0.8750262260437012, loss_yt: 0.28408950567245483
epocht 1, batch_num 9200, step 142419, time: 160.77905011177063 s, accu: 0.8750466108322144, loss_yt: 0.3162574768066406
epocht 1, batch_num 9400, step 142619, time: 164.2188491821289 s, accu: 0.8750596642494202, loss_yt: 0.23587597906589508
epocht 1, batch_num 9600, step 142819, time: 167.43425011634827 s, accu: 0.8750666975975037, loss_yt: 0.3402005434036255
epocht 1, batch_num 9800, step 143019, time: 170.77032995224 s, accu: 0.875074565410614, loss_yt: 0.2538970708847046
epocht 1, batch_num 10000, step 143219, time: 174.1113986968994 s, accu: 0.8750888109207153, loss_yt: 0.3092503845691681
epocht 1, batch_num 10200, step 143419, time: 177.4076144695282 s, accu: 0.8751048445701599, loss_yt: 0.24154463410377502
epocht 1, batch_num 10400, step 143619, time: 180.64293003082275 s, accu: 0.8751221299171448, loss_yt: 0.3337242007255554
epocht 1, batch_num 10600, step 1

epocht 2, batch_num 7400, step 155421, time: 129.91959381103516 s, accu: 0.876240074634552, loss_yt: 0.2430693805217743
epocht 2, batch_num 7600, step 155621, time: 132.99938488006592 s, accu: 0.8762562274932861, loss_yt: 0.28564032912254333
epocht 2, batch_num 7800, step 155821, time: 136.36937856674194 s, accu: 0.8762744069099426, loss_yt: 0.2872575521469116
epocht 2, batch_num 8000, step 156021, time: 139.63062500953674 s, accu: 0.8762850165367126, loss_yt: 0.2668343782424927
epocht 2, batch_num 8200, step 156221, time: 142.8420376777649 s, accu: 0.8763025403022766, loss_yt: 0.2532953917980194
epocht 2, batch_num 8400, step 156421, time: 146.32971620559692 s, accu: 0.8763168454170227, loss_yt: 0.2839909493923187
epocht 2, batch_num 8600, step 156621, time: 149.58803153038025 s, accu: 0.876333475112915, loss_yt: 0.2591026723384857
epocht 2, batch_num 8800, step 156821, time: 153.14751887321472 s, accu: 0.8763490319252014, loss_yt: 0.27529236674308777
epocht 2, batch_num 9000, step 15

epocht 2, batch_num 5800, step 168623, time: 104.30611491203308 s, accu: 0.8776137828826904, loss_yt: 0.25603848695755005
epocht 2, batch_num 6000, step 168823, time: 107.44768261909485 s, accu: 0.8776317834854126, loss_yt: 0.2370939999818802
epocht 2, batch_num 6200, step 169023, time: 110.84260582923889 s, accu: 0.8776495456695557, loss_yt: 0.24300052225589752
epocht 2, batch_num 6400, step 169223, time: 114.28938722610474 s, accu: 0.8776641488075256, loss_yt: 0.24862053990364075
epocht 2, batch_num 6600, step 169423, time: 117.42802858352661 s, accu: 0.8776770234107971, loss_yt: 0.27730047702789307
epocht 2, batch_num 6800, step 169623, time: 120.856853723526 s, accu: 0.8776891827583313, loss_yt: 0.26290878653526306
epocht 2, batch_num 7000, step 169823, time: 124.08120465278625 s, accu: 0.8777051568031311, loss_yt: 0.2599285840988159
epocht 2, batch_num 7200, step 170023, time: 127.17096662521362 s, accu: 0.8777178525924683, loss_yt: 0.22684645652770996
epocht 2, batch_num 7400, st

epocht 2, batch_num 4200, step 181825, time: 78.24673056602478 s, accu: 0.8786988258361816, loss_yt: 0.2849293351173401
epocht 2, batch_num 4400, step 182025, time: 81.72542762756348 s, accu: 0.8787044286727905, loss_yt: 0.2917308211326599
epocht 2, batch_num 4600, step 182225, time: 84.9987063407898 s, accu: 0.878704845905304, loss_yt: 0.3006320595741272
epocht 2, batch_num 4800, step 182425, time: 88.22704315185547 s, accu: 0.8787080645561218, loss_yt: 0.26243796944618225
epocht 2, batch_num 5000, step 182625, time: 91.58406472206116 s, accu: 0.8787160515785217, loss_yt: 0.28280875086784363
epocht 2, batch_num 5200, step 182825, time: 94.78553771972656 s, accu: 0.8787283897399902, loss_yt: 0.28291356563568115
epocht 2, batch_num 5400, step 183025, time: 98.20037174224854 s, accu: 0.8787387609481812, loss_yt: 0.2894778549671173
epocht 2, batch_num 5600, step 183225, time: 101.25623869895935 s, accu: 0.8787485361099243, loss_yt: 0.23957812786102295
epocht 2, batch_num 5800, step 183425

epocht 2, batch_num 2600, step 195027, time: 51.499322175979614 s, accu: 0.8795456886291504, loss_yt: 0.26595786213874817
epocht 2, batch_num 2800, step 195227, time: 55.19843077659607 s, accu: 0.8795549273490906, loss_yt: 0.24880680441856384
epocht 2, batch_num 3000, step 195427, time: 58.200403690338135 s, accu: 0.8795626759529114, loss_yt: 0.2658672332763672
epocht 2, batch_num 3200, step 195627, time: 61.41477656364441 s, accu: 0.8795709609985352, loss_yt: 0.21942125260829926
epocht 2, batch_num 3400, step 195827, time: 64.69102025032043 s, accu: 0.879571795463562, loss_yt: 0.2737627625465393
epocht 2, batch_num 3600, step 196027, time: 67.79674339294434 s, accu: 0.8795732855796814, loss_yt: 0.2874433398246765
epocht 2, batch_num 3800, step 196227, time: 71.14977812767029 s, accu: 0.8795819282531738, loss_yt: 0.3144981861114502
epocht 2, batch_num 4000, step 196427, time: 74.28436875343323 s, accu: 0.8795874714851379, loss_yt: 0.2875139117240906
epocht 2, batch_num 4200, step 19662

In [None]:
y_pred = modelcrnn.predict(testpd[feature])
y_pred = np.squeeze(y_pred)

In [None]:
#阈值大概在0.2-0.4之间 本题对召回率较敏感，可适当降低一下阈值
thre = 0.5
#生成提交文件
sub = pd.DataFrame()
sub['hit_id'] = testpd['hit_id']
sub['flag_pred'] = y_pred
sub['event_id'] = testpd['event_id']
sub['flag_pred'] = sub['flag_pred'].apply(lambda x: 1 if x >= thre else 0)
sub.to_csv(os.path.join(pathf, "subsample.csv").format(sub['flag_pred'].mean()), index=False)