In [7]:
import os 
import pandas as pd 
import time 
import datetime 
import logging
import json 
import random
import math

import warnings
warnings.filterwarnings("ignore")

from tqdm.autonotebook import tqdm
from collections import Counter
import gensim
import numpy as np 
import tensorflow as tf 


In [1]:
from utils import *

# 定义参数配置类

In [8]:
class Config(dict):
    def __init__(self, path=None):
        super().__init__()
        ## 定义训练参数
        self['num_epochs'] = 5 
        self['evaluateEvery'] = 100 
        self['checkpointEvery'] = 100 
        self['learningRate'] = 0.001 
        
        ## 定义模型参数
        self['embeddingSize'] = 200 
        self['numFilters'] = 128 
        self['filterSizes'] = [2, 3, 4, 5]
        self['dropoutProb'] = 0.5 
        self['l2RegLambda'] = 0.0 
        
        ## 定义基础参数
        self['sequenceLength'] = 200 
        self['batch_size'] = 64 
        self['dataSource'] = path
        self['stopWordSource'] = "../data/english"
        self['numClasses'] = 1  
        self['train_size'] = 0.8   # 训练集和测试集比例
        self.threshold = 0.5
        
        ## 保存模型参数
        self['checkpoint_dir'] = "../model/textCNN/imdb/checkpoint"
        self['summary_dir'] = "../model/textCNN/imdb/summary"
        self['max_to_keep'] = 5

In [9]:
# 定义数据预处理类
class Dataset(object):
    def __init__(self, config):
        self.config = config
        self._dataSource = config['dataSource']
        self._stopWordSource = config['stopWordSource']
        
        self._sequenceLength = config['sequenceLength'] # 设置序列的输入藏毒
        self._embeddingSize = config['embeddingSize']
        self._batchSize = config['batch_size']
        self._trainRate = config['train_size']
        
        self._stopWordDict = {}
        self.trainReviews = []
        self.trainLabels = []
        self.evalReviews = []
        self.evalLabels = []
        
        self.wordEmbedding = None
        self.labelList = []
        
    def _readData(self, filePath):
        '''
        从csv文件中读取数据集
        '''
        df = pd.read_csv(filePath)
        if self.config['numClasses'] == 1:          
            if "sentiment" in df.columns:
                labels = df["sentiment"].tolist()
            if "emotion" in df.columns:
                labels = df["emotion"].tolist()
        
        elif self.config['numClasses'] > 1: 
            labels = df["rate"].tolist()
        
        review = df['review'].tolist()
        reviews = [line.strip().split() for line in review]
        
        return reviews, labels
    
    def _laeblToIndex(self, labels, label2idx):
        '''
        将标签转换为索引表示
        '''
        labelIds = [label2idx[label] for label in labels]
        return labelIds
    
    def _wordToIndex(self, reviews, word2idx):
        '''
        将词转换为索引表示
        '''
        reviewIds = [[word2idx.get(item, word2idx["UNK"]) for item in review] for review in reviews]
        return reviewIds
    
    def _genTrainEvalData(self, x, y, word2idx, rate):
        '''
        生成训练集和验证集
        '''
        reviews = []
        for review in x: 
            if len(review) >= self._sequenceLength:
                reviews.append(review[:self._sequenceLength])
            else:
                reviews.append(review + [word2idx["PAD"]] * (self._sequenceLength - len(review)))
        
        trainIndex = int(len(x) * rate)
        
        trainReviews = np.asarray(reviews[:trainIndex], dtype="int64")
        trainLabels = np.array(y[:trainIndex], dtype="float32")
        
        evalReviews = np.asarray(reviews[trainIndex:], dtype="int64")
        evalLabels = np.array(y[trainIndex:], dtype="float32")
        
        return trainReviews, trainLabels, evalReviews, evalLabels
    
    def _genVocabulary(self, reviews, labels, path, prefix=""):
        '''
        生成向量和词汇-索引映射字典
        '''
        allWords = [word for review in reviews for word in review]
        # 去掉停用词
        subWords = [word for word in allWords if word not in self.stopWordDict]
        wordCount = Counter(subWords)  # 统计各个词的词频
        sortWordCount = sorted(wordCount.items(), key=lambda x: x[1], reverse=True)
        
        # 去除低频词
        words = [item[0] for item in sortWordCount if item[1] >= 5]
        
        
        vocab, wordEmbedding = self._getWordEmbedding(words, path)
        self.wordEmbedding = wordEmbedding
        
        #print(len(vocab), vocab[10])
        word2idx = dict(zip(vocab, range(len(vocab))))
        
        uniqueLabel = list(set(labels))
        label2idx = dict(zip(uniqueLabel, list(range(len(uniqueLabel)))))
        self.labelList = list(range(len(uniqueLabel)))
        
        # 将词汇表-索引映射表保存为json数据，之后inference时直接加载处理数据
        save_path = "../data/wordJson"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        with open(os.path.join(save_path, prefix+"_word2idx.json"), "w", encoding="utf8") as f: 
            json.dump(word2idx, f)
        
        with open(os.path.join(save_path, prefix+"_label2idx.json"), "w", encoding="utf8") as f: 
            json.dump(label2idx, f)
        
        return word2idx, label2idx
    
    def _getWordEmbedding(self, words, path):
        '''
        按照数据集中的单词去除训练好的词向量
        '''
        wordVec = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(path, "wordvector.bin"),
                                                                 binary=True)
        
        vocab = []
        wordEmbedding = []
        
        # 添加"pad"和"UNK"
        vocab.append("PAD")
        vocab.append("UNK")
        
        wordEmbedding.append(np.zeros(self._embeddingSize))  # 表示对"PAD"用全0向量表示
        wordEmbedding.append(np.random.randn(self._embeddingSize))  # 对"UNK"用随机向量表示
        
        for word in words:
            try:
                vector = wordVec.wv[word]
                vocab.append(word)
                wordEmbedding.append(vector)
            except:
                pass
        
        return vocab, np.array(wordEmbedding)
    
    def _readStopword(self, stopWordPath):
        '''
        读取停用词
        '''
        with open(stopWordPath, "r") as f: 
            stopWords = f.read()
            stopWordList = stopWords.splitlines()
            # 转换成字典的形式，使用hash查找效率更高
            self.stopWordDict = dict(zip(stopWordList, list(range(len(stopWordList)))))
            
    def dataGen(self, path, prefix=""):
        '''
        初始化训练集和验证集 
        prefix: 表示生成单词到索引的文件的前缀
        path: 表示wordvector文件的位置
        '''
        # 初始化停用词
        self._readStopword(self._stopWordSource)
        # 初始化数据集
        reviews, labels = self._readData(self._dataSource)
        # 初始化词汇-索引映射表和词向量矩阵
        word2idx, label2idx = self._genVocabulary(reviews, labels, path, prefix)
        # 将标签和句子数值化
        labelIds = self._laeblToIndex(labels, label2idx)
        reviewsIds = self._wordToIndex(reviews, word2idx)
        
        # 初始化训练集和测试集
        trainReviews, trainLabels, evalReviews, evalLabels = self._genTrainEvalData(reviewsIds, 
                                                                                  labelIds,
                                                                                  word2idx, 
                                                                                  self._trainRate)
        self.trainReviews = trainReviews
        self.trainLabels = trainLabels
        self.evalReviews = evalReviews
        self.evalLabels = evalLabels

# 构建模型

## 定义数据的迭代类

In [10]:

class DataGenerator:
    def __init__(self, x, y):
        self.x = x 
        self.y = y
        self.length = len(y)

    def next_batch(self, batch_size):
        '''
        生成每一个batch的数据集
        '''
        idx = np.random.choice(self.length, batch_size)
        yield self.x[idx], self.y[idx]
        
    def iter_all(self, batch_size):
        '''
        按照batch迭代所有数据 
        '''        
        numBatches = self.length // batch_size
        for i in range(numBatches):
            start = i * batch_size
            end = start + batch_size
            batchX = np.array(self.x[start:end], dtype='int64')
            batchY = np.array(self.y[start:end], dtype="float32")
            yield batchX, batchY

## 定义每个模型都要继承的基类

In [11]:
class BaseModel:
    def __init__(self, config):
        self.config = config 
        self.init_global_step()
        self.init_cur_epoch()
        
    def save(self, sess):
        print("Saving model...")
        self.saver.save(sess, self.config['checkpoint_dir']+"/my_model", self.global_step_tensor)
        print("Model saved")
        
    def load(self, sess):
        ## 获取最近的chekpoint
        latest_checkpoint = tf.train.latest_checkpoint(self.config['checkpoint_dir'])
        if latest_checkpoint:
            print("Loading model checkpoint {} ... \n".format(latest_checkpoint))
            self.saver.restore(sess, latest_checkpoint)
            print("Model loaded")
    
    # 表示每执行一个epoch，对应的变量+1
    def init_cur_epoch(self):
        with tf.variable_scope("cur_epoch"):
            self.cur_epoch_tensor = tf.Variable(0, trainable=False, name="cur_epoch")
            self.increment_cur_epoch_tensor = tf.assign(self.cur_epoch_tensor, self.cur_epoch_tensor+1)
    
    def init_global_step(self):
        # 表示当前模型一共迭代的step
        ## 每次执行都需要放到trainer里面
        with tf.variable_scope("global_step"):
            self.global_step_tensor = tf.Variable(0, trainable=False, name="global_step")
    
    def init_saver(self):
        # 通常只需要在子类中拷贝下面的语句即可
        # self.saver = tf.train.Saver(max_to_keep=self.config['max_to_keep'])
        raise NotImplementedError
        
    def build_model(self):
        raise NotImplementedError

In [12]:
class BaseTrain:
    def __init__(self, sess, model, data, config, logger):
        self.model = model 
        self.logger = logger
        self.config = config
        self.data = data 
        self.sess = sess
        self.init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        self.sess.run(self.init)
        
    def train_all(self):
        for cur_epoch in range(self.model.cur_epoch_tensor.eval(self.sess), self.config['num_epochs']+1, 1):
            print(f"\n当前正处于第{cur_epoch+1}次迭代")
            self.train_epoch()
            ## 将对应的epoch+1
            self.sess.run(self.model.increment_cur_epoch_tensor)
    
    def train_epoch(self):
        '''
        实现一个epoch训练的代码
        - 在config规定的迭代次数上迭代，调用train_step
        - 添加summary
        '''
        raise NotImplementedError
    
    def train_step(self):
        '''
        实现单步训练的逻辑代码
        '''
        raise NotImplementedError

## 定义记录训练过程中一些信息的类

In [13]:
class Logger:
    def __init__(self, sess, config):
        self.sess = sess 
        self.config = config 
        self.summary_placeholders = {}
        self.summary_ops = {}
        self.train_sumary_writer = tf.summary.FileWriter(os.path.join(self.config['summary_dir'], "train"),
                                                        self.sess.graph)
        self.test_summary_writer = tf.summary.FileWriter(os.path.join(self.config['summary_dir'], "test"))
        
    # 保存scalars和images
    def summarize(self, step, summarizer="train", scope="", summaries_dict=None):
        '''
        step: 表示summary的时间步
        summarizer: 表示使用 train 还是 test
        scope: 表示变量空间名 
        summaries_dict: 表示要summaries的值，格式是(tag, value)
        '''
        summary_writer = self.train_sumary_writer if summarizer == "train" else self.test_summary_writer
        with tf.variable_scope(scope):
            if summaries_dict is not None:
                summary_list = []
                for tag, value in summaries_dict.items():
                    if tag not in self.summary_ops:
                        if len(value.shape) <= 1:
                            self.summary_placeholders[tag] = tf.placeholder(tf.float32,shape=value.shape, name=tag)
                        else:
                            self.summary_placeholders[tag] = tf.placeholder("float32", 
                                                                            [None]+list(value.shape[1:]), 
                                                                           name=tag)                                     
                        if len(value.shape) <= 1:
                            ## 添加标量
                            self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag])
                        else:
                            ## 添加为图片
                            self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag])

                    summary_list.append(self.sess.run(self.summary_ops[tag], 
                                                      {self.summary_placeholders[tag]: value}))
                for summary in summary_list:
                    summary_writer.add_summary(summary, step)
                summary_writer.flush()

## 定义模型类

In [15]:
class TextCNN(BaseModel):
    def __init__(self, config, wordEmbedding):
        super(TextCNN, self).__init__(config)
        self.wordEmbedding = wordEmbedding
        self.build_model()
        self.init_saver()
        
    def build_model(self):
        # 定义模型的输入
        self.inputX = tf.placeholder(tf.int32, [None, self.config['sequenceLength']], name="inputX")
        self.inputY = tf.placeholder(tf.int32, [None], name="inputY")
        
        self.dropoutProb = tf.placeholder(tf.float32, name="dropoutProb")
        
        
        # 定义L2损失值
        l2Loss = tf.constant(0.0)
        
        # 词嵌入层
        with tf.name_scope("embedding"):
            ## 利用预训练的词向量，设置trainable=True，表示不冻结可以训练
            self.W = tf.Variable(tf.cast(self.wordEmbedding, dtype=tf.float32, name="word2Vec"), 
                                 name="W", trainable=False)
            ## 利用词嵌入矩阵将输入的数据中的词转换成词向量，输出为 [batch, seq_len, embed_size]
            self.embeddedWords = tf.nn.embedding_lookup(self.W, self.inputX)
            ## 卷积的输入形状是[batch, heigth, width, channel]，所以需要扩维
            self.embeddedWordsExpand = tf.expand_dims(self.embeddedWords, -1)
            
        # 创建卷积层和池化层
        pooledOutputs = []
        ## 根据自己定义的不同的filter_size，将输出进行融合
        for i, filter_size, in enumerate(self.config['filterSizes']):
            ## 卷积层，卷积核尺寸为 filter_size * embeded_size
            filter_shape = [filter_size, self.config['embeddingSize'], 1, self.config['numFilters']]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[self.config['numFilters']]), name="b")
            conv = tf.nn.conv2d(self.embeddedWordsExpand, W, strides=[1,1,1,1],
                               padding="VALID", name="conv")
            ## 利用relu进行非线性映射
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            
            ## 池化层，进行最大池化之后得到一个值
            ## 其中ksize的形状表示 [batch, height, width, channels]
            pooled = tf.nn.max_pool(h, ksize=[1, self.config['sequenceLength']-filter_size+1, 1, 1],
                                   strides=[1,1,1,1], padding="VALID", name="pool")
            pooledOutputs.append(pooled)
            
        # 得到CNN网络的输出长度
        numFiltersTotal = self.config['numFilters'] * len(self.config['filterSizes'])
        ## 池化之后维度为[batch, 1, 1, channels]，按照最后一维进行concat
        self.hPool = tf.concat(pooledOutputs, 3)
        ## 摊平成二维数据输入到全连接层
        self.hPoolFlat = tf.reshape(self.hPool, [-1, numFiltersTotal])
        
        # Dropout层
        with tf.name_scope("dropout"):
            self.hDrop = tf.nn.dropout(self.hPoolFlat, rate=self.dropoutProb)
        
        # 全连接层
        with tf.name_scope("output"):

            self.logits = tf.layers.dense(self.hDrop, self.config['numClasses'], name="dense",
                                         kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=2019),
                                         bias_initializer=tf.constant_initializer(0.1),
                                         activation=None)
            ## 获取该层的权重
            with tf.variable_scope("dense", reuse=True):
                outputW = tf.get_variable("kernel")
            l2Loss += tf.nn.l2_loss(outputW)
            
            if self.config['numClasses'] == 1: 
                self.predictions = tf.sigmoid(self.logits)
            elif self.config['numClasses'] > 1: 
                self.predictions = tf.nn.softmax(self.logits, dim=1)
            
        # 计算二元交叉熵损失
        with tf.name_scope("loss"):
            if self.config['numClasses'] == 1: 
                losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                                labels=tf.cast(tf.reshape(self.inputY, [-1, 1]),
                                                                              dtype=tf.float32))
            elif self.config['numClasses'] > 1: 
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                                       labels=self.inputY)
            
            self.loss = tf.reduce_mean(losses) + self.config["l2RegLambda"] * l2Loss           
            
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_op = tf.train.AdamOptimizer(
                    self.config['learningRate']).minimize(self.loss, global_step=self.global_step_tensor)
            

    def init_saver(self):
        '''
        初始化用于保存模型的对象
        '''
        self.saver = tf.train.Saver(max_to_keep=self.config['max_to_keep'])

## 定义训练的类

In [16]:
class Trainer(BaseTrain):
    def __init__(self, sess, model, data, config, logger):
        '''
        这里的data要求是元组的形式，data[0]表示train对象，data[1]表示eval对象
        '''
        super(Trainer, self).__init__(sess, model, data, config, logger)
        self.train = data[0]
        self.eval = data[1]
    
    def train_epoch(self):
        num_iter_per_epoch = self.train.length // self.config["batch_size"]

        for _ in tqdm(range(num_iter_per_epoch)):
            loss, metrics, step = self.train_step()
            train_acc = metrics['accuracy']
            train_f_score = metrics['f_beta']
            
            ## 将训练过程中的损失写入
            summaries_dict = {"loss": loss, 
                              "acc": np.array(train_acc), 
                              "f_score": np.array(train_f_score)}
            self.logger.summarize(step, summarizer="train", scope="train_summary", summaries_dict=summaries_dict)
            
            if step % self.config['evaluateEvery'] == 0: 
                print("Train —— Step: {} | Loss: {} | Acc: {} | F1_Score: {}".format(
                        step, loss, train_acc, train_f_score))
                ## 对测试集进行评估
                print("\nEvaluation: \n")
                eval_losses = []
                eval_true = []
                eval_pred = []
                for batchEval in self.eval.iter_all(self.config["batch_size"]):
                    loss, predictions = self.eval_step(batchEval[0], batchEval[1])
                    eval_losses.append(loss)
                    eval_true.extend(batchEval[-1])
                    eval_pred.extend(predictions)
                getMetric = Metric(np.array(eval_pred), np.array(eval_true), self.config)
                metrics = getMetric.get_metrics()
                loss_mean = np.round(np.mean(eval_losses), 5)
                prec_mean = np.round(metrics['precision'], 5)
                recall_mean = np.round(metrics['recall'],5)
                time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S %p")
                
                print("{} | Loss: {} | Precision: {} | Recall: {}".format(time_str,
                                                                     loss_mean,
                                                                     prec_mean, recall_mean))
                summaries_dict = {"loss": np.array(loss_mean), 
                                  "precision": np.array(prec_mean), 
                                  "recall": np.array(recall_mean)}
                self.logger.summarize(step, summarizer="test", scope="test_summary", summaries_dict=summaries_dict)
            
            if step % self.config["checkpointEvery"] == 0: 
                self.model.save(self.sess)
        
    
    def train_step(self):
        batch_x, batch_y = next(self.train.next_batch(self.config["batch_size"]))
        feed_dict = {self.model.inputX: batch_x, self.model.inputY: batch_y,
                    self.model.dropoutProb: self.config['dropoutProb']}
        
        _, loss, predicitons, step = self.sess.run([self.model.train_op, self.model.loss, 
                                                   self.model.predictions, self.model.global_step_tensor],
                                                  feed_dict=feed_dict)
        
        getMetric = Metric(predicitons, batch_y, self.config)
        metrics = getMetric.get_metrics()
        
        return loss, metrics, step
    
    def eval_step(self, batch_x, batch_y):
        '''
        使用验证集进行测试
        '''
        feed_dict = {self.model.inputX: batch_x, self.model.inputY: batch_y,
                     self.model.dropoutProb: 0.0}
        loss, predictions = self.sess.run([self.model.loss, self.model.predictions],
                                          feed_dict=feed_dict)
        
        return loss, predictions
    

In [17]:
def create_dirs(dirs):
    try:
        for dir_ in dirs: 
            if not os.path.exists(dir_):
                os.makedirs(dir_)
        return 0 
    except Exception as e: 
        print("Creating directories error: {}".format(e))
        exit(-1)

# 使用数据集进行训练和预测

## 使用IMDB数据集进行训练和预测

In [30]:
def main():
    # 实例化配置参数对象
    ## 指定训练数据的文件名
    path = "../data/imdb/labeldTrain.csv"
    config = Config(path)
    
    
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    
    data = Dataset(config)
    ## 生成训练集数据，第一个参数表示wordembedding文件所在的文件夹
    ## prefix表示生成word2idx和label2idx文件的前缀
    data.dataGen("../data/imdb", prefix="imdb")
    
    train_X, train_y, eval_X, eval_y = data.trainReviews, data.trainLabels, data.evalReviews, data.evalLabels
    wordEmbedding, labels = data.wordEmbedding, data.labelList
    
    train_data = DataGenerator(train_X, train_y)
    eval_data = DataGenerator(eval_X, eval_y)
    pack_data = [train_data, eval_data]
    
    tf.reset_default_graph()
    ## 设置计算图的配置
    session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    session_conf.gpu_options.allow_growth=True
    session_conf.gpu_options.per_process_gpu_memory_fraction=0.9  # 配置GPU占用率
    
    sess = tf.Session(config=session_conf)
    
    ## 创建一个实例
    model = TextCNN(config, wordEmbedding)
    
    logger = Logger(sess, config)
    
    trainer = Trainer(sess, model, pack_data, config, logger)
    
    
    trainer.train_all()

In [31]:
main()


当前正处于第1次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 100 | Loss: 0.7643935680389404 | Acc: 0.625 | F1_Score: 0.6

Evaluation: 

2019-08-03 14:52:49 PM | Loss: 0.5329700112342834 | Precision: 0.88223 | Recall: 0.50786
Saving model...
Model saved
Train —— Step: 200 | Loss: 0.4091358184814453 | Acc: 0.79688 | F1_Score: 0.80597

Evaluation: 

2019-08-03 14:52:52 PM | Loss: 0.39647001028060913 | Precision: 0.82581 | Recall: 0.84945
Saving model...
Model saved
Train —— Step: 300 | Loss: 0.5425980091094971 | Acc: 0.75 | F1_Score: 0.72414

Evaluation: 

2019-08-03 14:52:56 PM | Loss: 0.35016000270843506 | Precision: 0.85357 | Recall: 0.86007
Saving model...
Model saved


当前正处于第2次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 400 | Loss: 0.3960190713405609 | Acc: 0.82812 | F1_Score: 0.83582

Evaluation: 

2019-08-03 14:52:59 PM | Loss: 0.3298099935054779 | Precision: 0.84409 | Recall: 0.89221
Saving model...
Model saved
Train —— Step: 500 | Loss: 0.3414153456687927 | Acc: 0.84375 | F1_Score: 0.82758

Evaluation: 

2019-08-03 14:53:03 PM | Loss: 0.316540002822876 | Precision: 0.87798 | Recall: 0.86941
Saving model...
Model saved
Train —— Step: 600 | Loss: 0.2741347551345825 | Acc: 0.89062 | F1_Score: 0.86793

Evaluation: 

2019-08-03 14:53:06 PM | Loss: 0.30921998620033264 | Precision: 0.89112 | Recall: 0.85972
Saving model...
Model saved


当前正处于第3次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 700 | Loss: 0.21765583753585815 | Acc: 0.9375 | F1_Score: 0.93104

Evaluation: 

2019-08-03 14:53:10 PM | Loss: 0.30570998787879944 | Precision: 0.85079 | Recall: 0.90917
Saving model...
Model saved
Train —— Step: 800 | Loss: 0.24884681403636932 | Acc: 0.9375 | F1_Score: 0.95349

Evaluation: 

2019-08-03 14:53:14 PM | Loss: 0.2946999967098236 | Precision: 0.87991 | Recall: 0.88248
Saving model...
Model saved
Train —— Step: 900 | Loss: 0.3202097415924072 | Acc: 0.85938 | F1_Score: 0.86956

Evaluation: 

2019-08-03 14:53:17 PM | Loss: 0.2917500138282776 | Precision: 0.87662 | Recall: 0.8904
Saving model...
Model saved


当前正处于第4次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 1000 | Loss: 0.2240048348903656 | Acc: 0.90625 | F1_Score: 0.89655

Evaluation: 

2019-08-03 14:53:21 PM | Loss: 0.33980000019073486 | Precision: 0.93362 | Recall: 0.7835
Saving model...
Model saved
Train —— Step: 1100 | Loss: 0.14754396677017212 | Acc: 0.96875 | F1_Score: 0.97059

Evaluation: 

2019-08-03 14:53:24 PM | Loss: 0.30809998512268066 | Precision: 0.91803 | Recall: 0.83023
Saving model...
Model saved
Train —— Step: 1200 | Loss: 0.2614726424217224 | Acc: 0.90625 | F1_Score: 0.90322

Evaluation: 

2019-08-03 14:53:27 PM | Loss: 0.2829900085926056 | Precision: 0.88993 | Recall: 0.8876
Saving model...
Model saved


当前正处于第5次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 1300 | Loss: 0.14328737556934357 | Acc: 0.95312 | F1_Score: 0.95891

Evaluation: 

2019-08-03 14:53:31 PM | Loss: 0.29085999727249146 | Precision: 0.90815 | Recall: 0.86043
Saving model...
Model saved
Train —— Step: 1400 | Loss: 0.12715885043144226 | Acc: 0.9375 | F1_Score: 0.9375

Evaluation: 

2019-08-03 14:53:34 PM | Loss: 0.2922700047492981 | Precision: 0.91497 | Recall: 0.85079
Saving model...
Model saved
Train —— Step: 1500 | Loss: 0.1176362931728363 | Acc: 0.96875 | F1_Score: 0.96875

Evaluation: 

2019-08-03 14:53:38 PM | Loss: 0.2839199900627136 | Precision: 0.87163 | Recall: 0.89887
Saving model...
Model saved


当前正处于第6次迭代


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Train —— Step: 1600 | Loss: 0.13549146056175232 | Acc: 0.95312 | F1_Score: 0.95238

Evaluation: 

2019-08-03 14:53:41 PM | Loss: 0.2854500114917755 | Precision: 0.89732 | Recall: 0.87396
Saving model...
Model saved
Train —— Step: 1700 | Loss: 0.177398219704628 | Acc: 0.90625 | F1_Score: 0.92308

Evaluation: 

2019-08-03 14:53:45 PM | Loss: 0.2809700071811676 | Precision: 0.89118 | Recall: 0.88056
Saving model...
Model saved
Train —— Step: 1800 | Loss: 0.11711357533931732 | Acc: 0.96875 | F1_Score: 0.97297

Evaluation: 

2019-08-03 14:53:48 PM | Loss: 0.28306999802589417 | Precision: 0.87729 | Recall: 0.89745
Saving model...
Model saved



- 不冻结word-embedding的情况下，验证集最优结果为 —— <font size=3 color=red>**P: 0.89297, R: 0.88057**</font> 

- 冻结word-embedding的情况下，验证集最优结果为  —— <font size=3 color=red>**P: 0.89903, R: 0.8614**</font>

In [32]:
def predict(x, config, word2idx_path, label2idx_path, checkpoint_path):
    with open(word2idx_path, "r", encoding="utf-8") as f: 
        word2idx = json.load(f)
    with open(label2idx_path, "r", encoding="utf-8") as f: 
        label2idx = json.load(f)
        
    idx2label  = {value:key for key, value in label2idx.items()}
    
    xIds = [word2idx.get(item, word2idx["UNK"]) for item in x.split(" ")]
    if len(xIds) >= config["sequenceLength"]:
        xIds = xIds[:config["sequenceLength"]]
    else:
        xIds = xIds + [word2idx["PAD"]] * (config["sequenceLength"] - len(xIds))
    
    g = tf.Graph()
    
    with g.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, 
                                     gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # 获取最后保存的模型文件
            checkpoint_file = tf.train.latest_checkpoint(checkpoint_path)
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)
            
            # 获取需要未给模型的参数
            inputX = g.get_operation_by_name("inputX").outputs[0]
            dropoutProb = g.get_operation_by_name("dropoutProb").outputs[0]
            
            # 获取输出的结果
            predictions = g.get_tensor_by_name("output/predictions:0")
            pred = sess.run(predictions, feed_dict={inputX: [xIds], dropoutProb: 1.0})[0]
            
            return pred

In [33]:
x = "this is bad !"

word2idx_path = "../data/wordJson/imdb_word2idx.json"
label2idx_path = "../data/wordJson/imdb_label2idx.json"
checkpoint_path = "../model/textCNN/imdb/checkpoint/"

config = Config()

predict(x, config, word2idx_path, label2idx_path, checkpoint_path)

INFO:tensorflow:Restoring parameters from ../model/textCNN/imdb/checkpoint/my_model-1800


array([0], dtype=int32)

## 使用yelps数据集

In [18]:
def main():
    # 实例化配置参数对象
    ## 指定训练数据的文件名
    ## 由于电脑性能限制，只能用少量的数据集训练
    path = "../data/yelps/yelps_test.csv"
    config = Config(path)
    
    ## 修改summary和checkpoint保存的路径
    config["summary_dir"] = "../model/textCNN/yelps/summary"
    config["checkpoint_dir"] = "../model/textCNN/yelps/checkpoint"
    
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    
    data = Dataset(config)
    ## 生成训练集数据，第一个参数表示wordembedding文件所在的文件夹
    ## prefix表示生成word2idx和label2idx文件的前缀
    data.dataGen("../data/yelps/", prefix="yelps")
    
    train_X, train_y, eval_X, eval_y = data.trainReviews, data.trainLabels, data.evalReviews, data.evalLabels
    wordEmbedding, labels = data.wordEmbedding, data.labelList
    
    train_data = DataGenerator(train_X, train_y)
    eval_data = DataGenerator(eval_X, eval_y)
    pack_data = [train_data, eval_data]
    
    tf.reset_default_graph()
    ## 设置计算图的配置
    session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    session_conf.gpu_options.allow_growth=True
    session_conf.gpu_options.per_process_gpu_memory_fraction=0.9  # 配置GPU占用率
    
    sess = tf.Session(config=session_conf)
    
    ## 创建一个实例
    model = TextCNN(config, wordEmbedding)
    
    logger = Logger(sess, config)
    
    trainer = Trainer(sess, model, pack_data, config, logger)
    
    
    trainer.train_all()

In [19]:
main()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.

当前正处于第1次迭代


HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 100 | Loss: 0.4236382246017456 | Acc: 0.78125 | F1_Score: 0.84091

Evaluation: 

2019-08-03 15:06:46 PM | Loss: 0.38133999705314636 | Precision: 0.80748 | Recall: 0.97098
Saving model...
Model saved
Train —— Step: 200 | Loss: 0.5492726564407349 | Acc: 0.76562 | F1_Score: 0.82759

Evaluation: 

2019-08-03 15:07:08 PM | Loss: 0.31501999497413635 | Precision: 0.8527 | Recall: 0.96115
Saving model...
Model saved
Train —— Step: 300 | Loss: 0.3025462329387665 | Acc: 0.85938 | F1_Score: 0.89655

Evaluation: 

2019-08-03 15:07:30 PM | Loss: 0.27430999279022217 | Precision: 0.90562 | Recall: 0.92164
Saving model...
Model saved
Train —— Step: 400 | Loss: 0.400593101978302 | Acc: 0.84375 | F1_Score: 0.86842

Evaluation: 

2019-08-03 15:07:52 PM | Loss: 0.2738099992275238 | Precision: 0.8861 | Recall: 0.94932
Saving model...
Model saved
Train —— Step: 500 | Loss: 0.2654377222061157 | Acc: 0.92188 | F1_Score: 0.9505

Evaluation: 

2019-08-03 15:08:23 PM | Loss: 0.2788200080394745 | P

Model saved
Train —— Step: 3800 | Loss: 0.2328280657529831 | Acc: 0.90625 | F1_Score: 0.925

Evaluation: 

2019-08-03 15:31:07 PM | Loss: 0.22649000585079193 | Precision: 0.91483 | Recall: 0.94975
Saving model...
Model saved
Train —— Step: 3900 | Loss: 0.16956540942192078 | Acc: 0.96875 | F1_Score: 0.97872

Evaluation: 

2019-08-03 15:31:49 PM | Loss: 0.24156999588012695 | Precision: 0.8933 | Recall: 0.96884
Saving model...
Model saved
Train —— Step: 4000 | Loss: 0.21481743454933167 | Acc: 0.90625 | F1_Score: 0.91667

Evaluation: 

2019-08-03 15:32:35 PM | Loss: 0.24254000186920166 | Precision: 0.94136 | Recall: 0.90577
Saving model...
Model saved
Train —— Step: 4100 | Loss: 0.30642974376678467 | Acc: 0.85938 | F1_Score: 0.89888

Evaluation: 

2019-08-03 15:33:22 PM | Loss: 0.22683000564575195 | Precision: 0.90764 | Recall: 0.95895
Saving model...
Model saved
Train —— Step: 4200 | Loss: 0.16877663135528564 | Acc: 0.9375 | F1_Score: 0.95555

Evaluation: 

2019-08-03 15:34:09 PM | Loss: 

2019-08-03 15:59:32 PM | Loss: 0.216389998793602 | Precision: 0.92555 | Recall: 0.94585
Saving model...
Model saved
Train —— Step: 7700 | Loss: 0.1541595607995987 | Acc: 0.92188 | F1_Score: 0.94253

Evaluation: 

2019-08-03 16:00:16 PM | Loss: 0.23015999794006348 | Precision: 0.90073 | Recall: 0.96766
Saving model...
Model saved
Train —— Step: 7800 | Loss: 0.2666506767272949 | Acc: 0.89062 | F1_Score: 0.92135

Evaluation: 

2019-08-03 16:01:04 PM | Loss: 0.2231999933719635 | Precision: 0.90582 | Recall: 0.96454
Saving model...
Model saved
Train —— Step: 7900 | Loss: 0.24289105832576752 | Acc: 0.875 | F1_Score: 0.90244

Evaluation: 

2019-08-03 16:01:46 PM | Loss: 0.21622000634670258 | Precision: 0.93118 | Recall: 0.93835
Saving model...
Model saved
Train —— Step: 8000 | Loss: 0.21055099368095398 | Acc: 0.89062 | F1_Score: 0.92473

Evaluation: 

2019-08-03 16:02:34 PM | Loss: 0.21768000721931458 | Precision: 0.93076 | Recall: 0.93861
Saving model...
Model saved
Train —— Step: 8100 | Los

HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 9000 | Loss: 0.19401875138282776 | Acc: 0.90625 | F1_Score: 0.925

Evaluation: 

2019-08-03 16:09:50 PM | Loss: 0.2135699987411499 | Precision: 0.92644 | Recall: 0.94642
Saving model...
Model saved
Train —— Step: 9100 | Loss: 0.2137695997953415 | Acc: 0.95312 | F1_Score: 0.96552

Evaluation: 

2019-08-03 16:10:33 PM | Loss: 0.23010000586509705 | Precision: 0.94327 | Recall: 0.91321
Saving model...
Model saved
Train —— Step: 9200 | Loss: 0.12573514878749847 | Acc: 0.9375 | F1_Score: 0.95556

Evaluation: 

2019-08-03 16:11:18 PM | Loss: 0.21741999685764313 | Precision: 0.91422 | Recall: 0.95879
Saving model...
Model saved
Train —— Step: 9300 | Loss: 0.21448464691638947 | Acc: 0.89062 | F1_Score: 0.91358

Evaluation: 

2019-08-03 16:12:02 PM | Loss: 0.2145799994468689 | Precision: 0.92162 | Recall: 0.95173
Saving model...
Model saved
Train —— Step: 9400 | Loss: 0.24163204431533813 | Acc: 0.875 | F1_Score: 0.90909

Evaluation: 

2019-08-03 16:12:42 PM | Loss: 0.2153999954462

2019-08-03 16:39:00 PM | Loss: 0.21281999349594116 | Precision: 0.92268 | Recall: 0.95196
Saving model...
Model saved
Train —— Step: 12900 | Loss: 0.13054189085960388 | Acc: 0.95312 | F1_Score: 0.96296

Evaluation: 

2019-08-03 16:39:47 PM | Loss: 0.21376000344753265 | Precision: 0.93455 | Recall: 0.93617
Saving model...
Model saved
Train —— Step: 13000 | Loss: 0.185133159160614 | Acc: 0.90625 | F1_Score: 0.93333

Evaluation: 

2019-08-03 16:40:32 PM | Loss: 0.22304999828338623 | Precision: 0.90469 | Recall: 0.96748
Saving model...
Model saved
Train —— Step: 13100 | Loss: 0.1918637454509735 | Acc: 0.9375 | F1_Score: 0.94872

Evaluation: 

2019-08-03 16:41:16 PM | Loss: 0.2257400006055832 | Precision: 0.902 | Recall: 0.96918
Saving model...
Model saved
Train —— Step: 13200 | Loss: 0.2740161716938019 | Acc: 0.82812 | F1_Score: 0.87641

Evaluation: 

2019-08-03 16:42:05 PM | Loss: 0.21270999312400818 | Precision: 0.93268 | Recall: 0.93939
Saving model...
Model saved
Train —— Step: 13300 |

Model saved
Train —— Step: 16700 | Loss: 0.28034573793411255 | Acc: 0.89062 | F1_Score: 0.91954

Evaluation: 

2019-08-03 17:07:39 PM | Loss: 0.20928999781608582 | Precision: 0.92846 | Recall: 0.94733
Saving model...
Model saved
Train —— Step: 16800 | Loss: 0.17404235899448395 | Acc: 0.9375 | F1_Score: 0.95122

Evaluation: 

2019-08-03 17:08:23 PM | Loss: 0.21435999870300293 | Precision: 0.93974 | Recall: 0.93036
Saving model...
Model saved
Train —— Step: 16900 | Loss: 0.23597440123558044 | Acc: 0.90625 | F1_Score: 0.93023

Evaluation: 

2019-08-03 17:09:05 PM | Loss: 0.2103700041770935 | Precision: 0.93214 | Recall: 0.94259
Saving model...
Model saved
Train —— Step: 17000 | Loss: 0.2546014189720154 | Acc: 0.92188 | F1_Score: 0.94506

Evaluation: 

2019-08-03 17:09:49 PM | Loss: 0.20945000648498535 | Precision: 0.9259 | Recall: 0.95023
Saving model...
Model saved
Train —— Step: 17100 | Loss: 0.18340665102005005 | Acc: 0.90625 | F1_Score: 0.93023

Evaluation: 

2019-08-03 17:10:31 PM | 

HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 17900 | Loss: 0.24011757969856262 | Acc: 0.9375 | F1_Score: 0.95238

Evaluation: 

2019-08-03 17:16:10 PM | Loss: 0.21127000451087952 | Precision: 0.91907 | Recall: 0.95692
Saving model...
Model saved
Train —— Step: 18000 | Loss: 0.14622238278388977 | Acc: 0.9375 | F1_Score: 0.95349

Evaluation: 

2019-08-03 17:16:51 PM | Loss: 0.20990000665187836 | Precision: 0.92851 | Recall: 0.9467
Saving model...
Model saved
Train —— Step: 18100 | Loss: 0.19263851642608643 | Acc: 0.89062 | F1_Score: 0.90909

Evaluation: 

2019-08-03 17:17:36 PM | Loss: 0.21367999911308289 | Precision: 0.91615 | Recall: 0.9601
Saving model...
Model saved
Train —— Step: 18200 | Loss: 0.23608559370040894 | Acc: 0.875 | F1_Score: 0.91304

Evaluation: 

2019-08-03 17:18:15 PM | Loss: 0.21076999604701996 | Precision: 0.93474 | Recall: 0.93829
Saving model...
Model saved
Train —— Step: 18300 | Loss: 0.23381909728050232 | Acc: 0.89062 | F1_Score: 0.92473

Evaluation: 

2019-08-03 17:18:57 PM | Loss: 0.209820

2019-08-03 17:43:33 PM | Loss: 0.20991000533103943 | Precision: 0.93234 | Recall: 0.94127
Saving model...
Model saved
Train —— Step: 21800 | Loss: 0.24980780482292175 | Acc: 0.84375 | F1_Score: 0.86487

Evaluation: 

2019-08-03 17:44:15 PM | Loss: 0.20847000181674957 | Precision: 0.9236 | Recall: 0.95238
Saving model...
Model saved
Train —— Step: 21900 | Loss: 0.2161463499069214 | Acc: 0.90625 | F1_Score: 0.92683

Evaluation: 

2019-08-03 17:45:02 PM | Loss: 0.20970000326633453 | Precision: 0.9294 | Recall: 0.94559
Saving model...
Model saved
Train —— Step: 22000 | Loss: 0.24621789157390594 | Acc: 0.89062 | F1_Score: 0.90411

Evaluation: 

2019-08-03 17:45:44 PM | Loss: 0.20890000462532043 | Precision: 0.92783 | Recall: 0.94808
Saving model...
Model saved
Train —— Step: 22100 | Loss: 0.2718270719051361 | Acc: 0.875 | F1_Score: 0.90698

Evaluation: 

2019-08-03 17:46:34 PM | Loss: 0.21511000394821167 | Precision: 0.94058 | Recall: 0.92876
Saving model...
Model saved
Train —— Step: 22200

2019-08-03 18:10:18 PM | Loss: 0.2091600000858307 | Precision: 0.92545 | Recall: 0.9517
Saving model...
Model saved
Train —— Step: 25600 | Loss: 0.20848730206489563 | Acc: 0.92188 | F1_Score: 0.93976

Evaluation: 

2019-08-03 18:11:04 PM | Loss: 0.2097799926996231 | Precision: 0.92137 | Recall: 0.95629
Saving model...
Model saved
Train —— Step: 25700 | Loss: 0.12075316905975342 | Acc: 0.9375 | F1_Score: 0.95238

Evaluation: 

2019-08-03 18:11:47 PM | Loss: 0.20895999670028687 | Precision: 0.93381 | Recall: 0.94212
Saving model...
Model saved
Train —— Step: 25800 | Loss: 0.10551349818706512 | Acc: 0.96875 | F1_Score: 0.97436

Evaluation: 

2019-08-03 18:12:33 PM | Loss: 0.20771999657154083 | Precision: 0.928 | Recall: 0.94943
Saving model...
Model saved
Train —— Step: 25900 | Loss: 0.11293695122003555 | Acc: 0.98438 | F1_Score: 0.98823

Evaluation: 

2019-08-03 18:13:19 PM | Loss: 0.20845000445842743 | Precision: 0.93178 | Recall: 0.945
Saving model...
Model saved
Train —— Step: 26000 |

HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 26900 | Loss: 0.22309647500514984 | Acc: 0.92188 | F1_Score: 0.93827

Evaluation: 

2019-08-03 18:21:11 PM | Loss: 0.22396999597549438 | Precision: 0.94941 | Recall: 0.91472
Saving model...
Model saved
Train —— Step: 27000 | Loss: 0.19705389440059662 | Acc: 0.92188 | F1_Score: 0.93976

Evaluation: 

2019-08-03 18:21:53 PM | Loss: 0.20962999761104584 | Precision: 0.92843 | Recall: 0.94918
Saving model...
Model saved
Train —— Step: 27100 | Loss: 0.19781804084777832 | Acc: 0.92188 | F1_Score: 0.93827

Evaluation: 

2019-08-03 18:22:41 PM | Loss: 0.21178999543190002 | Precision: 0.93903 | Recall: 0.93413
Saving model...
Model saved
Train —— Step: 27200 | Loss: 0.17466497421264648 | Acc: 0.92188 | F1_Score: 0.93671

Evaluation: 

2019-08-03 18:23:34 PM | Loss: 0.21049000322818756 | Precision: 0.93333 | Recall: 0.94183
Saving model...
Model saved
Train —— Step: 27300 | Loss: 0.11912639439105988 | Acc: 0.95312 | F1_Score: 0.96386

Evaluation: 

2019-08-03 18:24:15 PM | Loss: 0.

Model saved
Train —— Step: 30700 | Loss: 0.19359999895095825 | Acc: 0.95312 | F1_Score: 0.96385

Evaluation: 

2019-08-03 18:50:49 PM | Loss: 0.21052999794483185 | Precision: 0.94003 | Recall: 0.93377
Saving model...
Model saved
Train —— Step: 30800 | Loss: 0.15092867612838745 | Acc: 0.9375 | F1_Score: 0.95238

Evaluation: 

2019-08-03 18:51:34 PM | Loss: 0.20840999484062195 | Precision: 0.93057 | Recall: 0.94588
Saving model...
Model saved
Train —— Step: 30900 | Loss: 0.1694890558719635 | Acc: 0.92188 | F1_Score: 0.94506

Evaluation: 

2019-08-03 18:52:19 PM | Loss: 0.21960000693798065 | Precision: 0.90722 | Recall: 0.96839
Saving model...
Model saved
Train —— Step: 31000 | Loss: 0.20746499300003052 | Acc: 0.92188 | F1_Score: 0.92537

Evaluation: 

2019-08-03 18:53:04 PM | Loss: 0.21020999550819397 | Precision: 0.91905 | Recall: 0.95822
Saving model...
Model saved
Train —— Step: 31100 | Loss: 0.11856837570667267 | Acc: 0.96875 | F1_Score: 0.98

Evaluation: 

2019-08-03 18:53:51 PM | L

Model saved
Train —— Step: 34500 | Loss: 0.32780104875564575 | Acc: 0.84375 | F1_Score: 0.88372

Evaluation: 

2019-08-03 19:17:27 PM | Loss: 0.2070399969816208 | Precision: 0.92956 | Recall: 0.94896
Saving model...
Model saved
Train —— Step: 34600 | Loss: 0.24118295311927795 | Acc: 0.82812 | F1_Score: 0.87059

Evaluation: 

2019-08-03 19:18:14 PM | Loss: 0.20757000148296356 | Precision: 0.92459 | Recall: 0.95472
Saving model...
Model saved
Train —— Step: 34700 | Loss: 0.14100578427314758 | Acc: 0.9375 | F1_Score: 0.95455

Evaluation: 

2019-08-03 19:18:52 PM | Loss: 0.20980000495910645 | Precision: 0.93864 | Recall: 0.93661
Saving model...
Model saved
Train —— Step: 34800 | Loss: 0.20402762293815613 | Acc: 0.9375 | F1_Score: 0.95238

Evaluation: 

2019-08-03 19:19:30 PM | Loss: 0.2083899974822998 | Precision: 0.93303 | Recall: 0.94417
Saving model...
Model saved
Train —— Step: 34900 | Loss: 0.21991771459579468 | Acc: 0.85938 | F1_Score: 0.90722

Evaluation: 

2019-08-03 19:20:03 PM | 

HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 35800 | Loss: 0.1875038743019104 | Acc: 0.89062 | F1_Score: 0.91566

Evaluation: 

2019-08-03 19:26:05 PM | Loss: 0.20821000635623932 | Precision: 0.92733 | Recall: 0.95062
Saving model...
Model saved
Train —— Step: 35900 | Loss: 0.1608557552099228 | Acc: 0.9375 | F1_Score: 0.95238

Evaluation: 

2019-08-03 19:26:47 PM | Loss: 0.2091200053691864 | Precision: 0.9359 | Recall: 0.94036
Saving model...
Model saved
Train —— Step: 36000 | Loss: 0.10912846773862839 | Acc: 0.98438 | F1_Score: 0.98701

Evaluation: 

2019-08-03 19:27:21 PM | Loss: 0.2110999971628189 | Precision: 0.92161 | Recall: 0.95643
Saving model...
Model saved
Train —— Step: 36100 | Loss: 0.15164442360401154 | Acc: 0.95312 | F1_Score: 0.96907

Evaluation: 

2019-08-03 19:28:06 PM | Loss: 0.20930999517440796 | Precision: 0.92442 | Recall: 0.95467
Saving model...
Model saved
Train —— Step: 36200 | Loss: 0.18104305863380432 | Acc: 0.92188 | F1_Score: 0.95146

Evaluation: 

2019-08-03 19:28:46 PM | Loss: 0.208299

Model saved
Train —— Step: 39600 | Loss: 0.16608229279518127 | Acc: 0.92188 | F1_Score: 0.94253

Evaluation: 

2019-08-03 19:51:38 PM | Loss: 0.21069000661373138 | Precision: 0.92144 | Recall: 0.95747
Saving model...
Model saved
Train —— Step: 39700 | Loss: 0.26109030842781067 | Acc: 0.90625 | F1_Score: 0.93877

Evaluation: 

2019-08-03 19:52:22 PM | Loss: 0.21212999522686005 | Precision: 0.94075 | Recall: 0.93287
Saving model...
Model saved
Train —— Step: 39800 | Loss: 0.12692943215370178 | Acc: 0.96875 | F1_Score: 0.97959

Evaluation: 

2019-08-03 19:53:05 PM | Loss: 0.20819999277591705 | Precision: 0.92969 | Recall: 0.94799
Saving model...
Model saved
Train —— Step: 39900 | Loss: 0.37990131974220276 | Acc: 0.875 | F1_Score: 0.90244

Evaluation: 

2019-08-03 19:53:48 PM | Loss: 0.20960000157356262 | Precision: 0.92501 | Recall: 0.95379
Saving model...
Model saved
Train —— Step: 40000 | Loss: 0.16207563877105713 | Acc: 0.9375 | F1_Score: 0.95833

Evaluation: 

2019-08-03 19:54:29 PM |

Model saved
Train —— Step: 43400 | Loss: 0.11429419368505478 | Acc: 0.95312 | F1_Score: 0.96386

Evaluation: 

2019-08-03 20:16:25 PM | Loss: 0.20927000045776367 | Precision: 0.93475 | Recall: 0.94146
Saving model...
Model saved
Train —— Step: 43500 | Loss: 0.2092631459236145 | Acc: 0.89062 | F1_Score: 0.92632

Evaluation: 

2019-08-03 20:17:02 PM | Loss: 0.20975999534130096 | Precision: 0.9384 | Recall: 0.9367
Saving model...
Model saved
Train —— Step: 43600 | Loss: 0.22647346556186676 | Acc: 0.90625 | F1_Score: 0.92857

Evaluation: 

2019-08-03 20:17:39 PM | Loss: 0.21375000476837158 | Precision: 0.91557 | Recall: 0.96233
Saving model...
Model saved
Train —— Step: 43700 | Loss: 0.23423391580581665 | Acc: 0.92188 | F1_Score: 0.94117

Evaluation: 

2019-08-03 20:18:16 PM | Loss: 0.20986999571323395 | Precision: 0.92471 | Recall: 0.95336
Saving model...
Model saved
Train —— Step: 43800 | Loss: 0.1052577793598175 | Acc: 0.96875 | F1_Score: 0.97916

Evaluation: 

2019-08-03 20:18:54 PM | 

HBox(children=(IntProgress(value=0, max=8939), HTML(value='')))

Train —— Step: 44700 | Loss: 0.11068956553936005 | Acc: 0.96875 | F1_Score: 0.97222

Evaluation: 

2019-08-03 20:24:29 PM | Loss: 0.21177999675273895 | Precision: 0.92153 | Recall: 0.95681
Saving model...
Model saved
Train —— Step: 44800 | Loss: 0.239914208650589 | Acc: 0.90625 | F1_Score: 0.93182

Evaluation: 

2019-08-03 20:25:03 PM | Loss: 0.21142999827861786 | Precision: 0.92173 | Recall: 0.95732
Saving model...
Model saved
Train —— Step: 44900 | Loss: 0.13849587738513947 | Acc: 0.96875 | F1_Score: 0.97778

Evaluation: 

2019-08-03 20:25:38 PM | Loss: 0.2131900042295456 | Precision: 0.93705 | Recall: 0.93723
Saving model...
Model saved
Train —— Step: 45000 | Loss: 0.1840493232011795 | Acc: 0.9375 | F1_Score: 0.95349

Evaluation: 

2019-08-03 20:26:20 PM | Loss: 0.21424999833106995 | Precision: 0.91727 | Recall: 0.96136
Saving model...
Model saved
Train —— Step: 45100 | Loss: 0.2624504566192627 | Acc: 0.89062 | F1_Score: 0.91139

Evaluation: 

2019-08-03 20:27:02 PM | Loss: 0.209800

2019-08-03 20:50:29 PM | Loss: 0.21004000306129456 | Precision: 0.93328 | Recall: 0.94388
Saving model...
Model saved
Train —— Step: 48600 | Loss: 0.3269273340702057 | Acc: 0.82812 | F1_Score: 0.87912

Evaluation: 

2019-08-03 20:51:16 PM | Loss: 0.22269000113010406 | Precision: 0.90857 | Recall: 0.96691
Saving model...
Model saved
Train —— Step: 48700 | Loss: 0.17110371589660645 | Acc: 0.90625 | F1_Score: 0.92683

Evaluation: 

2019-08-03 20:51:57 PM | Loss: 0.2097100019454956 | Precision: 0.93078 | Recall: 0.94685
Saving model...
Model saved
Train —— Step: 48800 | Loss: 0.2394964098930359 | Acc: 0.89062 | F1_Score: 0.91765

Evaluation: 

2019-08-03 20:52:41 PM | Loss: 0.2102700024843216 | Precision: 0.93379 | Recall: 0.94336
Saving model...
Model saved
Train —— Step: 48900 | Loss: 0.24395102262496948 | Acc: 0.89062 | F1_Score: 0.91139

Evaluation: 

2019-08-03 20:53:17 PM | Loss: 0.21050000190734863 | Precision: 0.9265 | Recall: 0.95176
Saving model...
Model saved
Train —— Step: 4900

2019-08-03 21:13:17 PM | Loss: 0.21083000302314758 | Precision: 0.93255 | Recall: 0.94459
Saving model...
Model saved
Train —— Step: 52400 | Loss: 0.1620350480079651 | Acc: 0.92188 | F1_Score: 0.94624

Evaluation: 

2019-08-03 21:13:48 PM | Loss: 0.2122199982404709 | Precision: 0.92726 | Recall: 0.9514
Saving model...
Model saved
Train —— Step: 52500 | Loss: 0.18732553720474243 | Acc: 0.90625 | F1_Score: 0.93023

Evaluation: 

2019-08-03 21:14:20 PM | Loss: 0.21052999794483185 | Precision: 0.93184 | Recall: 0.9453
Saving model...
Model saved
Train —— Step: 52600 | Loss: 0.0948861762881279 | Acc: 0.98438 | F1_Score: 0.98877

Evaluation: 

2019-08-03 21:14:48 PM | Loss: 0.20923000574111938 | Precision: 0.93236 | Recall: 0.94618
Saving model...
Model saved
Train —— Step: 52700 | Loss: 0.13543882966041565 | Acc: 0.9375 | F1_Score: 0.94737

Evaluation: 

2019-08-03 21:15:30 PM | Loss: 0.21025000512599945 | Precision: 0.92613 | Recall: 0.95357
Saving model...
Model saved
Train —— Step: 52800

最终验证集结果

冻结Embedding的情况下 —— <font size=3 color=red>**Precision: 0.92241, Recall: 0.9568**</font>