In [None]:
from google.colab import drive
drive.mount('/content/drive')

!python -m pip install -U "tensorflow-text==2.9.*"
!python -m pip install tensorflow-hub
!python -m pip install tensorflow_datasets

%cd /content/drive/MyDrive/2022_2_machine_learning_hw4/TFNLP_prac
import sys
sys.path.append('/content/drive/MyDrive/2022_2_machine_learning_hw4/TFNLP_prac')
print(sys.path)

import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

In [None]:
import argparse
from dataset import build as dataset_build
from model import build as model_build

In [None]:
def parse_args():
    parser = argparse.ArgumentParser(
        description='Machine Learning Homework 4 : NLP')

    # model setting
    '''
    word_embed : bert
    encoder : bert
    decoder : transformer
    head : sentiment_head
    '''
    parser.add_argument('--word_embed', default='bert', type=str)
    parser.add_argument('--encoder', default='bert', type=str)
    parser.add_argument('--decoder', default='transformer', type=str)
    parser.add_argument('--head', default='sentiment_head', type=str)

    # dataset setting
    # imdb or raw_imdb
    parser.add_argument('--dataset', default='raw_imdb', type=str)

    # imdb dataset setting
    parser.add_argument('--n_words', default=10000, type=int)
    parser.add_argument('--max_len', default=128, type=int)
    # 256
    parser.add_argument('--dim_embedding', default=256, type=int)

    # encoder
    parser.add_argument('--encoder_n_layer', default=2, type=int)

    # cnn encoder
    parser.add_argument('--cnn_kernel_size', default=3, type=int)

    # transformer
    parser.add_argument('--encoder_n_head', default=8, type=int)

    # decoder
    parser.add_argument('--decoder_n_layer', default=2, type=int)
    parser.add_argument('--decoder_n_head', default=4, type=int)

    # bert
    parser.add_argument('--bert_model_name', default='small_bert/bert_en_uncased_L-4_H-256_A-4', type=str)

    # hyper parameter
    parser.add_argument('--drop_out', default=0.05, type=float)
    parser.add_argument('--optimizer', default='adam', type=str)
    parser.add_argument('--loss', default='binary_crossentropy', type=str)
    parser.add_argument('--epochs', default=5, type=int)
    parser.add_argument('--batch_size', default=500, type=int)
    parser.add_argument('--seed', default=42, type=int)

    return parser.parse_args(args=[])

In [None]:
if __name__ == '__main__':
    args = parse_args()
    model = model_build.build_model(args)
    model.summary()
    model.compile(optimizer=args.optimizer, loss=args.loss, metrics=["accuracy"])

    if args.dataset == 'imdb':
        (x_train, y_train), (x_test, y_test) = dataset_build.build_dataset(args)
        score = model.fit(x_train, y_train,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          validation_data=(x_test, y_test))
        print("\nTest loss:", score.history['val_loss'][-1])
        print('Test accuracy:', score.history['val_accuracy'][-1])

    elif args.dataset == 'raw_imdb':
        train_ds, val_ds, test_ds = dataset_build.build_dataset(args)
        score = model.fit(x=train_ds,
                          validation_data=val_ds,
                          epochs=args.epochs)
        print("\nTest loss:", score.history['val_loss'][-1])
        print('Test accuracy:', score.history['val_accuracy'][-1])

    else:
        print("args.dataset error")
        exit()