In [1]:
import numpy as np
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
# import seaborn as sns
import pickle
import time
import gc
from tqdm import tqdm, tqdm_notebook

%matplotlib inline

#每次可以输出多个变量
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from pylab import rcParams
rcParams['figure.figsize'] = 14, 6

import warnings
warnings.filterwarnings("ignore")

#中文字体
import matplotlib
matplotlib.use('qt4agg')
#指定默认字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family'] = 'sans-serif'
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

In [2]:
import os
import sys
import time
from datetime import timedelta
import logging

import numpy as np
import tensorflow as tf

from PIL import Image

sys.path.append("src")
from input import *
from model import Model
from sklearn.metrics import f1_score

Using TensorFlow backend.


In [3]:
logging.basicConfig(
        level = logging.INFO, 
        format = "[%(asctime)s] %(message)s",
        datefmt = "%Y-%m-%d %H:%M:%S",
        )

In [4]:
expname = "classification"

params = {
    "train_data": "./data/train.txt",
    "predict_data": "./data/predict.txt",
#     "train_data": "../../../feature/rnn_train.txt",
#     "predict_data": "../../../feature/rnn_predict.txt",

    "dictionary_path": "./output/dictionary",
    "model_path": "./output/model",
    "train_summary_path": "./output/summary",
    "eval_summary_path": "./output/eval",

    "train_epoch": 10,
    "batch_size": 64,
    "shuffle": False,
    "dictionary_cutoff": 0,
    "num_targets": 2,
    "word_embedding_dim": 200,
    "letter_embedding_dim": 200,
    "tag_embedding_dim": 100,
    "is_use_pretrained_word_embedding": True,
    "pretrained_emnedding_dir": "./w2v/dict_tencent",
    "keep_prob": 0.5,
    "cnn_layers": {
        "filter_width": 3,
        "output_channel": 64,
    },
    "gru_layers": {
        "padding_len": 10,
        "gru_size": 64,
        "gru_layers": 2,
        "attention_size": 32,
    },
    "dense_layers": [
        {"hidden_units": 2048},
        {"hidden_units": 512},
        {"hidden_units": 128},
    ],

    "optimizer_type": "adam",
    "learning_rate": 0.005,
    "lr_decay": 1,
    "lr_decay_steps": 10,
    "clip": 0.2,

    "steps_per_run": 10,
}

In [5]:
train_set, valid_set = load_data(params["train_data"])
dictionary = Dictionary(params, train_set)
dictionary.save(params["dictionary_path"])
pad_id = dictionary.pad_id()
print ("pad_id: ", pad_id)
vocab_size = dictionary.vocab_size()
print ("vocab_size: ", vocab_size)

pad_id:  {'tag': 1, 'words': 1, 'letters': 1}
vocab_size:  {'tag': 22, 'words': 9356, 'letters': 2707}


In [6]:
train_set = dictionary.to_id(train_set)
valid_set = dictionary.to_id(valid_set)

In [7]:
train_bm = BatchManager(train_set, 1, params, pad_id)
valid_bm = BatchManager(valid_set, 1, params, pad_id)

In [8]:
model = Model(params, vocab_size, dictionary)
merge_summary = tf.summary.merge_all()

In [9]:
def metrics(targets, preds):
    preds = [int(i[1]>=0.5) for i in preds]
    return f1_score(targets, preds), ""


def get_time_dif(start_time):
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [10]:
summary_path = os.path.join(params["train_summary_path"], expname)
os.system("rm -rf %s" % summary_path)
model_path = os.path.join(params["model_path"], expname)
os.system("rm -rf %s" % model_path)
eval_summary_path = os.path.join(params["eval_summary_path"], expname)
os.system("rm -rf %s" % eval_summary_path)
os.system("mkdir -p %s" % eval_summary_path)

train_writer = tf.summary.FileWriter(summary_path, tf.get_default_graph())

0

0

0

0

In [13]:
x = tf.constant([[1,2,3],[2,3,4]], dtype=tf.float32) 
y = tf.constant([[2,3,4],[3,4,5]], dtype=tf.float32)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    sess.run([x, y])
    print(x.shape, y.shape)
    sess.run(tf.diag_part(tf.matmul(x, y, transpose_b=True)))

[array([[1., 2., 3.],
        [2., 3., 4.]], dtype=float32), array([[2., 3., 4.],
        [3., 4., 5.]], dtype=float32)]

(2, 3) (2, 3)


array([20., 38.], dtype=float32)

In [11]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    initializer = tf.global_variables_initializer()
    sess.run(initializer)
    steps_per_run = params["steps_per_run"]
    global_step = 0
    best_f1 = 0.
    start_time = time.time()
    valid_step = 0
    for epoch in range(params["train_epoch"]):
        train_bm.init()
        while True:
            global_step, loss, n_steps, (f1, _) = model.train(sess, train_bm, steps_per_run, metrics, 
                    merge_summary=merge_summary, train_writer=train_writer)
            logging.info("TRAIN %d steps[%d]: loss %.4f  f1 %.4f" % (global_step, epoch, loss, f1))
            if train_bm.is_finished:
                break
            
            valid_step += 1
            if valid_step % 50 == 0:
                valid_bm.init()
                loss, (f1, confusion_matrix) = model.eval(sess, valid_bm, metrics)
                if f1 > best_f1:
                    best_f1 = f1
                    model.save(sess, save_path=model_path)
                    best_flag = '*'
                else:
                    best_flag = ''
                
                time_dif = get_time_dif(start_time)
                logging.info("EVALUATION: %d steps: loss %.4f  f1 %.4f  cost_time %s  %s" % (global_step, loss, f1, str(time_dif), best_flag))

[2018-10-30 22:38:31] TRAIN 10 steps[0]: loss 2.2118  f1 0.3843
[2018-10-30 22:38:51] TRAIN 20 steps[0]: loss 0.6659  f1 0.0103
[2018-10-30 22:39:09] TRAIN 30 steps[0]: loss 0.6956  f1 0.1074
[2018-10-30 22:39:13] TRAIN 32 steps[0]: loss 0.6891  f1 0.0000
[2018-10-30 22:39:34] TRAIN 42 steps[1]: loss 0.6871  f1 0.0000
[2018-10-30 22:39:54] TRAIN 52 steps[1]: loss 0.6687  f1 0.0000
[2018-10-30 22:40:13] TRAIN 62 steps[1]: loss 0.6661  f1 0.0000
[2018-10-30 22:40:17] TRAIN 64 steps[1]: loss 0.8626  f1 0.0000
[2018-10-30 22:40:37] TRAIN 74 steps[2]: loss 0.6713  f1 0.1571
[2018-10-30 22:40:57] TRAIN 84 steps[2]: loss 0.5723  f1 0.0000
[2018-10-30 22:41:17] TRAIN 94 steps[2]: loss 0.5712  f1 0.2637
[2018-10-30 22:41:20] TRAIN 96 steps[2]: loss 0.3578  f1 0.7500
[2018-10-30 22:41:40] TRAIN 106 steps[3]: loss 0.6743  f1 0.4962
[2018-10-30 22:42:00] TRAIN 116 steps[3]: loss 0.3770  f1 0.6582
[2018-10-30 22:42:19] TRAIN 126 steps[3]: loss 0.6315  f1 0.5862
[2018-10-30 22:42:23] TRAIN 128 steps