# 0. 데이터 불러오기

In [None]:
# 구글 드라이브와 마운트 먼저 해주세요

import pandas as pd

data_train = pd.read_csv('/content/drive/My Drive/data/emoint_train.csv')
data_test = pd.read_csv('/content/drive/My Drive/data/emoint_test.csv')
data_train = data_train[['sentence', 'emotion']]
data_test = data_test[['sentence', 'emotion']]

data_test.head()

Unnamed: 0,sentence,emotion
0,At the point today where if someone says somet...,anger
1,@CorningFootball IT'S GAME DAY!!!! T MIN...,anger
2,This game has pissed me off more than any othe...,anger
3,@spamvicious I've just found out it's Candice ...,anger
4,@moocowward @mrsajhargreaves @Melly77 @GaryBar...,anger


# 1. 데이터 전처리
트위터 문장인 점을 감안하여 전처리를 해줍니다. 또, 감정 레이블을 숫자형으로 변환시켜줍니다.

In [None]:
import re

def resub(text_series):
  ls = []
  for i in range(len(text_series)):
    sent = text_series[i]
    sent = re.sub('@\w+','',sent)
    sent = re.sub('!!+','!',sent)
    sent = re.sub('~+','~',sent)
    sent = re.sub('[?]+','?',sent) 
    sent = re.sub('[.][.]+','...',sent) 
    ls.append(sent)
  new_series = pd.Series(ls)
  return new_series
    
data_train['sentence'] = resub(data_train['sentence'])
data_test['sentence'] = resub(data_test['sentence'])

data_test.head()

Unnamed: 0,sentence,emotion
0,At the point today where if someone says somet...,anger
1,IT'S GAME DAY! T MINUS 14:30 #relentless,anger
2,This game has pissed me off more than any othe...,anger
3,I've just found out it's Candice and not Cand...,anger
4,if he can't come to my Mum'a 60th after 25...,anger


In [None]:
data_test['emotion'].unique()

array(['anger', 'fear', 'joy', 'sadness'], dtype=object)

In [None]:
def label(x):
  if x=='anger': return '0'
  elif x=='fear': return '1'
  elif x=='joy': return '2'
  elif x=='sadness': return '3'
  else: return x

data_train["emotion"] = data_train["emotion"].apply(label)
data_test["emotion"] = data_test["emotion"].apply(label)

data_test.head()

Unnamed: 0,sentence,emotion
0,At the point today where if someone says somet...,0
1,IT'S GAME DAY! T MINUS 14:30 #relentless,0
2,This game has pissed me off more than any othe...,0
3,I've just found out it's Candice and not Cand...,0
4,if he can't come to my Mum'a 60th after 25...,0


In [None]:
data_test['sentence'].str.contains("\n").unique()

array([False])

In [None]:
'''
# Saving dataframes to .tsv format as required by BERT
!mkdir emotion_csv
data_train.to_csv('/content/emotion_csv/train.tsv', sep='\t', index=False, header=False)
data_test.to_csv('/content/emotion_csv/test.tsv', sep='\t', index=False, header=True)
'''

"\n# Saving dataframes to .tsv format as required by BERT\n!mkdir emotion_csv\ndata_train.to_csv('/content/emotion_csv/train.tsv', sep='\t', index=False, header=False)\ndata_test.to_csv('/content/emotion_csv/test.tsv', sep='\t', index=False, header=True)\n"

## 2. <3. 학습 (TPU)>를 위한 학습데이터 전처리
TPU 사용 모델의 인풋 형태를 맞춰줍니다

In [None]:
train = pd.DataFrame()
train = pd.concat([data_train[['emotion']], pd.DataFrame([i for i in range(len(data_train))]), pd.DataFrame([i for i in range(len(data_train))]), data_train[['sentence']], data_train[['sentence']]], axis=1)
train.columns = ['Quality', '#1 ID', '#2 ID', '#1 String', '#2 String']
train.head()

test = pd.DataFrame()
test = pd.concat([pd.DataFrame([i for i in range(len(data_test))]), pd.DataFrame([i for i in range(len(data_test))]), pd.DataFrame([i for i in range(len(data_test))]), data_test[['sentence']], data_test[['sentence']]], axis=1)
test.columns = ['Index', '#1 ID', '#2 ID', '#1 String', '#2 String']
test.head()

Unnamed: 0,Index,#1 ID,#2 ID,#1 String,#2 String
0,0,0,0,At the point today where if someone says somet...,At the point today where if someone says somet...
1,1,1,1,IT'S GAME DAY! T MINUS 14:30 #relentless,IT'S GAME DAY! T MINUS 14:30 #relentless
2,2,2,2,This game has pissed me off more than any othe...,This game has pissed me off more than any othe...
3,3,3,3,I've just found out it's Candice and not Cand...,I've just found out it's Candice and not Cand...
4,4,4,4,if he can't come to my Mum'a 60th after 25...,if he can't come to my Mum'a 60th after 25...


In [None]:
# Saving dataframes to .tsv format as required by BERT
!mkdir emotion_csv
train.to_csv('/content/emotion_csv/train.tsv', sep='\t', index=False, header=False)
test.to_csv('/content/emotion_csv/test.tsv', sep='\t', index=False, header=True)


mkdir: cannot create directory ‘emotion_csv’: File exists


#3. 학습 (망함)

!python run_classifier.py 
    --task_name=cola 
    --do_train=true 
    --do_eval=true 
    --data_dir=\emoint_csv\ 
    --vocab_file=\BERT\uncased_L-12_H-768_A-12\uncased_L-12_H-768_A-12\vocab.txt 
    --bert_config_file=\BERT\uncased_L-12_H-768_A-12\uncased_L-12_H-768_A-12\bert_config.json
    --init_checkpoint=\BERT\uncased_L-12_H-768_A-12\uncased_L-12_H-768_A-12\bert_model.ckpt 
    --max_seq_length=64 
    --train_batch_size=2 
    --learning_rate=2e-5 
    --num_train_epochs=3.0 
    --output_dir=\BERT\bert_output\ 
    --do_lower_case=True
    --save_checkpoints_steps 10000
    

["0","1" ..] 에서 "가 특수문자인 문제 해결  
tf.compat.v1.train.Optimizer 문제 해결  
문제가 계속되어 2.3 삭제 후 1.14 설치

In [None]:
!git clone https://github.com/google-research/bert.git


In [None]:
!sh /content/bert/run.sh

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/absl/flags/_flagvalues.py", line 528, in _assert_validators
    validator.verify(self)
  File "/usr/local/lib/python3.6/dist-packages/absl/flags/_validators.py", line 82, in verify
    raise _exceptions.ValidationError(self.message)
absl.flags._exceptions.ValidationError: Flag --data_dir must have

In [None]:
!pip uninstall tensorflow

Uninstalling tensorflow-2.3.0:
  Would remove:
    /usr/local/bin/estimator_ckpt_converter
    /usr/local/bin/saved_model_cli
    /usr/local/bin/tensorboard
    /usr/local/bin/tf_upgrade_v2
    /usr/local/bin/tflite_convert
    /usr/local/bin/toco
    /usr/local/bin/toco_from_protos
    /usr/local/lib/python3.6/dist-packages/tensorflow-2.3.0.dist-info/*
    /usr/local/lib/python3.6/dist-packages/tensorflow/*
Proceed (y/n)? y
  Successfully uninstalled tensorflow-2.3.0


In [None]:
!pip install tensorflow==1.14

Collecting tensorflow==1.14
[?25l  Downloading https://files.pythonhosted.org/packages/de/f0/96fb2e0412ae9692dbf400e5b04432885f677ad6241c088ccc5fe7724d69/tensorflow-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (109.2MB)
[K     |████████████████████████████████| 109.2MB 84kB/s 
Collecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/3c/d5/21860a5b11caf0678fbc8319341b0ae21a07156911132e0e71bffed0510d/tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488kB)
[K     |████████████████████████████████| 491kB 42.2MB/s 
Collecting tensorboard<1.15.0,>=1.14.0
[?25l  Downloading https://files.pythonhosted.org/packages/91/2d/2ed263449a078cd9c8a9ba50ebd50123adf1f8cfbea1492f9084169b89d9/tensorboard-1.14.0-py3-none-any.whl (3.1MB)
[K     |████████████████████████████████| 3.2MB 45.1MB/s 
[?25hCollecting keras-applications>=1.0.6
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a88

In [None]:
import tensorflow as tf
tf.__version__

'2.3.0'

# 3. 학습 (TPU)


In [None]:
'''
import tensorflow as tf
tf.__version__
'''

'\nimport tensorflow as tf\ntf.__version__\n'

In [None]:
# !pip uninstall tensorflow

In [None]:
# !pip install tensorflow==1.14

RESTART RUNTIME 버튼을 눌러야 함


In [None]:
import datetime
import json
import csv
import os
import pprint
import random
import string
import sys
import tensorflow as tf

assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
print('TPU address is', TPU_ADDRESS)

from google.colab import auth
auth.authenticate_user()
with tf.compat.v1.Session(TPU_ADDRESS) as session:
  print('TPU devices:')
  pprint.pprint(session.list_devices())

  # Upload credentials to TPU.
  with open('/content/adc.json', 'r') as f:
    auth_info = json.load(f)
  tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.

TPU address is grpc://10.11.46.82:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 1753833705938056072),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 8986590505526519860),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 15330041861167025966),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 6539390160276144647),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 10326232523896083260),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 17179514159235774004),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 15780599816054567215),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 2317231723356321691),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 1157421127798

In [None]:
import sys

!test -d bert_repo || git clone https://github.com/google-research/bert bert_repo
if not 'bert_repo' in sys.path:
  sys.path += ['bert_repo']

# import python modules defined by BERT
import modeling
import optimization
import run_classifier
import run_classifier_with_tfhub
import tokenization

# import tfhub 
import tensorflow_hub as hub

In [None]:
tokenizer = run_classifier_with_tfhub.create_tokenizer_from_hub_module(BERT_MODEL_HUB)
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

In [None]:
TASK = 'train.tsv' #@param {type:"string"}
# assert TASK in ('MRPC', 'CoLA'), 'Only (MRPC, CoLA) are demonstrated here.'

TASK_DATA_DIR = '/content/emotion_csv'
print('***** Task data directory: {} *****'.format(TASK_DATA_DIR))
!ls $TASK_DATA_DIR

BUCKET = '/content/emotion_csv' #@param {type:"string"}
assert BUCKET, 'Must specify an existing GCS bucket name'
OUTPUT_DIR = '{}/bert-tfhub/models/'.format(BUCKET)
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

# Available pretrained model checkpoints:
#   uncased_L-12_H-768_A-12: uncased BERT base model
#   uncased_L-24_H-1024_A-16: uncased BERT large model
#   cased_L-12_H-768_A-12: cased BERT large model
BERT_MODEL = 'uncased_L-12_H-768_A-12' #@param {type:"string"}
BERT_MODEL_HUB = 'https://tfhub.dev/google/bert_' + BERT_MODEL + '/1'

***** Task data directory: /content/emotion_csv *****
bert-tfhub  test.tsv  train.tsv
***** Model output directory: /content/emotion_csv/bert-tfhub/models/ *****


train_examples = processor.get_train_examples('/content/emotion_csv/') 여기서 문제 발생. 원래 예제에서는 총 5 칼럼 Quality, id1, id2, sentence1, sentence2 형식. 이걸 고쳐줘야 함.  
get_labels도 고쳐야 함.

In [None]:
TRAIN_BATCH_SIZE = 32
EVAL_BATCH_SIZE = 8
PREDICT_BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
MAX_SEQ_LENGTH = 128
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 1000
SAVE_SUMMARY_STEPS = 500

processors = {
  "cola": run_classifier.ColaProcessor,
  "mnli": run_classifier.MnliProcessor,
  "mrpc": run_classifier.MrpcProcessor,
}
processor = processors['cola']()
label_list = processor.get_labels()

# Compute number of train and warmup steps from batch size
train = data_train
train_examples = processor.get_train_examples(TASK_DATA_DIR)
num_train_steps = int(len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Setup TPU related config
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
NUM_TPU_CORES = 8
ITERATIONS_PER_LOOP = 1000
      
def get_run_config(output_dir):
  return tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=output_dir,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=ITERATIONS_PER_LOOP,
        num_shards=NUM_TPU_CORES,
        per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2))


여러가지 시도해봄 

tsv 파일을 읽어와서 진행하는 건데 왜인지 slicing이 문자단위로 걸려서 이상해짐. 아예 처음부터 그냥 dataframe을 나눠서 입력해주려 했는데 다른 함수 부분에서 다 걸려서 형식을 바꾸기 힘들듯.   
그냥 tsv로 바꾸는 부분에서 먼저 형식을 맞춰놓으면 될 듯. id 추가하고 label 부분 추가하고 하면 될듯?

# 4. 학습

In [None]:
# Force TF Hub writes to the GS bucket we provide.
os.environ['TFHUB_CACHE_DIR'] = OUTPUT_DIR

model_fn = run_classifier_with_tfhub.model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps,
  use_tpu=True,
  bert_hub_module_handle=BERT_MODEL_HUB
)

estimator_from_tfhub = tf.contrib.tpu.TPUEstimator(
  use_tpu=True,
  model_fn=model_fn,
  config=get_run_config(OUTPUT_DIR),
  train_batch_size=TRAIN_BATCH_SIZE,
  eval_batch_size=EVAL_BATCH_SIZE,
  predict_batch_size=PREDICT_BATCH_SIZE,
)






INFO:tensorflow:Using config: {'_model_dir': '/content/emotion_csv/bert-tfhub/models/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.11.46.82:8470"
    }
  }
}
isolate_session_state: true
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f900ed54860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://10.11.46.82:8470', '_evaluation_master': 'grpc://10.11.46.82:8470', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_

INFO:tensorflow:Using config: {'_model_dir': '/content/emotion_csv/bert-tfhub/models/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.11.46.82:8470"
    }
  }
}
isolate_session_state: true
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f900ed54860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://10.11.46.82:8470', '_evaluation_master': 'grpc://10.11.46.82:8470', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_

INFO:tensorflow:_TPUContext: eval_on_tpu True


INFO:tensorflow:_TPUContext: eval_on_tpu True


In [None]:
# Train the model
def model_train(estimator):
  print('MRPC/CoLA on BERT base model normally takes about 2-3 minutes. Please wait...')
  # We'll set sequences to be at most 128 tokens long.
  train_features = run_classifier.convert_examples_to_features(
      train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started training at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(train_examples)))
  print('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
  tf.logging.info("  Num steps = %d", num_train_steps)
  train_input_fn = run_classifier.input_fn_builder(
      features=train_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=True,
      drop_remainder=True)
  estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
  print('***** Finished training at {} *****'.format(datetime.datetime.now()))



In [None]:
model_train(estimator_from_tfhub)

MRPC/CoLA on BERT base model normally takes about 2-3 minutes. Please wait...
INFO:tensorflow:Writing example 0 of 3960


INFO:tensorflow:Writing example 0 of 3960


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-0


INFO:tensorflow:guid: train-0


INFO:tensorflow:tokens: [CLS] how the fu * k ! who the heck ! moved my fridge ! . . . should i knock the landlord door . # angry # mad # # [SEP]


INFO:tensorflow:tokens: [CLS] how the fu * k ! who the heck ! moved my fridge ! . . . should i knock the landlord door . # angry # mad # # [SEP]


INFO:tensorflow:input_ids: 101 2129 1996 11865 1008 1047 999 2040 1996 17752 999 2333 2026 16716 999 1012 1012 1012 2323 1045 7324 1996 18196 2341 1012 1001 4854 1001 5506 1001 1001 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2129 1996 11865 1008 1047 999 2040 1996 17752 999 2333 2026 16716 999 1012 1012 1012 2323 1045 7324 1996 18196 2341 1012 1001 4854 1001 5506 1001 1001 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-1


INFO:tensorflow:guid: train-1


INFO:tensorflow:tokens: [CLS] so my indian uber driver just called someone the n word . if i wasn ' t in a moving vehicle i ' d have jumped out # disgusted [SEP]


INFO:tensorflow:tokens: [CLS] so my indian uber driver just called someone the n word . if i wasn ' t in a moving vehicle i ' d have jumped out # disgusted [SEP]


INFO:tensorflow:input_ids: 101 2061 2026 2796 19169 4062 2074 2170 2619 1996 1050 2773 1012 2065 1045 2347 1005 1056 1999 1037 3048 4316 1045 1005 1040 2031 5598 2041 1001 17733 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2061 2026 2796 19169 4062 2074 2170 2619 1996 1050 2773 1012 2065 1045 2347 1005 1056 1999 1037 3048 4316 1045 1005 1040 2031 5598 2041 1001 17733 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


KeyError: ignored


KeyError: '2' 문제 발생.  
아무래도 문장 1대1 비교 task 모델을 다중 감성 분류 모델로 전환하는 과정에서 오류가 발생한 것으로 보임. [0,1] 다음에 2 에서 걸리는 듯.   
아무래도 프로젝트 마감 기한이 다가오는데 그 안에 이 방식을 완성하긴 어려울 듯.

포기

In [None]:
def model_eval(estimator):
  # Eval the model.
  eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started evaluation at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(eval_examples)))
  print('  Batch size = {}'.format(EVAL_BATCH_SIZE))

  # Eval will be slightly WRONG on the TPU because it will truncate
  # the last batch.
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('***** Finished evaluation at {} *****'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))
