In [1]:
import torch.backends.cudnn

import models.args
import os

def get_arg_parser():
    parser = models.args.get_args()

    parser.add_argument('--bidirectional', action='store_true')
    parser.add_argument('--bottleneck-layer', action='store_true')
    parser.add_argument('--num-layers', type=int, default=2)
    parser.add_argument('--hidden-dim', type=int, default=256)
    parser.add_argument('--mode', type=str, default='static', choices=['rand', 'static', 'non-static'])
    parser.add_argument('--dataset', type=str, default='Reuters', choices=['Reuters', 'AAPD', 'IMDB', 'Yelp2014'])
    parser.add_argument('--words-dim', type=int, default=300)
    parser.add_argument('--embed-dim', type=int, default=300)
    parser.add_argument('--epoch-decay', type=int, default=15)
    parser.add_argument('--weight-decay', type=float, default=0)

    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--wdrop', type=float, default=0.0, help="weight drop")
    parser.add_argument('--beta-ema', type=float, default=0, help="temporal averaging")
    parser.add_argument('--embed-droprate', type=float, default=0.0, help="embedding dropout")
    parser.add_argument('--tar', type=float, default=0.0, help="temporal activation regularization")
    parser.add_argument('--ar', type=float, default=0.0, help="activation regularization")

    parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec'))
    parser.add_argument('--word-vectors-file', default='GoogleNews-vectors-negative300.txt')
    parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'reg_lstm'))
    parser.add_argument('--resume-snapshot', type=str)
    parser.add_argument('--trained-model', type=str)

    return parser

parser = get_arg_parser()
args = parser.parse_args(
    ["--mode=static", "--batch-size=32", "--lr=0.01", "--epochs=30", "--bidirectional", "--num-layers=1", "--hidden-dim=512",  "--wdrop=0.1", "--embed-droprate=0.2", "--dropout=0.5", "--beta-ema=0.99", "--seed=3435"]
)
# __main__.py adds some things to args
args.gpu = -1
args.cuda = False  # I don't have a GPU
args.dataset = "AG_NEWS"

  _dtype_to_storage = {data_type(0).dtype: data_type for data_type in _storages}


Process the Dataset

In [3]:
from copy import deepcopy
from models.reg_lstm.model import RegLSTM
import logging
import numpy as np
import random
import torch

from common.train import TrainerFactory
from common.evaluate import ClassificationEvaluator
from torchtext.datasets import AG_NEWS
# from torchtext.data import Iterator

# copy function from __main__.py because __main__ is not compatible
#  with the version of torchtext I use

def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    return logger

logger = get_logger()

DATASET_NAME = "AG_NEWS"
NUM_CLASSES = 4
IS_MULTILABEL = False

# set random seed
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = True
np.random.seed(args.seed)
random.seed(args.seed)

train_iter, test_iter = AG_NEWS(split=('train', 'test'))

# train_data_loader = Iterator(train_iter, batch_size=args.batch_size)
# test_data_loader = Iterator(test_iter, batch_size=args.batch_size)

config = deepcopy(args)
config.dataset = train_iter
config.target_class = NUM_CLASSES
config.words_num = len(train_iter.get_vocab())

save_path = os.path.join(args.save_path, DATASET_NAME)
os.makedirs(save_path, exist_ok=True)

AttributeError: 

Set up and train the model

In [None]:
model = RegLSTM(config)
parameter = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay)

trainer_config = {
        'optimizer': optimizer,
        'batch_size': args.batch_size,
        'log_interval': args.log_every,
        'patience': args.patience,
        'model_outfile': args.save_path,
        'logger': logger,
        'is_multilabel': IS_MULTILABEL
}

train_evaluator = ClassificationEvaluator(AG_NEWS, model, None, train_data_loader, args.batch_size, args.gpu)
test_evaluator = ClassificationEvaluator(AG_NEWS, model, None, test_data_loader, args.batch_size, args.gpu)

if hasattr(train_evaluator, 'is_multilabel'):
        train_evaluator.is_multilabel = IS_MULTILABEL
if hasattr(test_evaluator, 'is_multilabel'):
    test_evaluator.is_multilabel = IS_MULTILABEL

trainer = TrainerFactory.get_trainer(args.dataset, model, None, test_iter, trainer_config, train_evaluator, test_evaluator)

# train the model
trainer.train(args.epochs)