In [1]:
import torch
from torch import nn
from torch.autograd import Variable

In [1]:
class ConvolutionalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, first_stride=1):
        super(ConvolutionalBlock, self).__init__()
        self.sequential = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=first_stride, padding=1),
            nn.Dropout(p=0.5)
            nn.BatchNorm1d(num_features=out_channels), nn.ReLU(),
            nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=1),
            nn.Dropout(p=0.5)
            nn.BatchNorm1d(num_features=out_channels), nn.ReLU())

    def forward(self, x):
        return self.sequential(x)


class KMaxPool(nn.Module):
    def __init__(self, k='half'):
        super(KMaxPool, self).__init__()

        self.k = k

    def forward(self, x):
        # x : batch_size, channel, time_steps
        if self.k == 'half':
            time_steps = x.shape(2)
            self.k = time_steps // 2
        kmax, kargmax = x.topk(self.k, dim=2)
        return kmax


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, downsample=False, downsample_type='resnet', optional_shortcut=True):
        super(ResidualBlock, self).__init__()
        self.optional_shortcut = optional_shortcut
        self.downsample = downsample

        if self.downsample:
            if downsample_type == 'resnet':
                self.pool = None
                first_stride = 2
            elif downsample_type == 'kmaxpool':
                self.pool = KMaxPool(k='half')
                first_stride = 1
            elif downsample_type == 'vgg':
                self.pool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
                first_stride = 1
            else:
                raise NotImplementedError()
        else:
            first_stride = 1

        self.convolutional_block = ConvolutionalBlock(in_channels, out_channels, first_stride=first_stride)

        if self.optional_shortcut and self.downsample:
            self.shortcut = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=2)

    def forward(self, x):

        residual = x
        if self.downsample and self.pool:
            x = self.pool(x)
        x = self.convolutional_block(x)

        if self.optional_shortcut and self.downsample:
            residual = self.shortcut(residual)

        if self.optional_shortcut:
            x = x + residual

        return x


class VDCNN_feat(nn.Module):
    def __init__(self, config, n_features):
        super(VDCNN_feat, self).__init__()

        vocabulary_size = config.vocabulary_size

        depth = 9 # config.depth  # 29
        embed_size = config.embedding_size # config.embed_size  # 16
        optional_shortcut = True # config.optional_shortcut  # True
        k = 8 # config.k  # 8

        if depth == 9:
            n_conv_layers = {'conv_block_512': 2, 'conv_block_256': 2, 'conv_block_128': 2, 'conv_block_64': 2}
        elif depth == 17:
            n_conv_layers = {'conv_block_512': 2, 'conv_block_256': 2, 'conv_block_128': 2, 'conv_block_64': 2}
        elif depth == 29:
            n_conv_layers = {'conv_block_512': 4, 'conv_block_256': 4, 'conv_block_128': 10, 'conv_block_64': 10}
        elif depth == 49:
            n_conv_layers = {'conv_block_512': 6, 'conv_block_256': 10, 'conv_block_128': 16, 'conv_block_64': 16}

        # quantization
        self.embedding = nn.Embedding(num_embeddings=vocabulary_size, embedding_dim=embed_size, padding_idx=0)

        conv_layers = []
        conv_layers.append(nn.Conv1d(16, 64, kernel_size=3, padding=1))

        for i in range(n_conv_layers['conv_block_64']):
            conv_layers.append(ResidualBlock(64, 64, optional_shortcut=optional_shortcut))

        for i in range(n_conv_layers['conv_block_128']):
            if i == 0:
                conv_layers.append(ResidualBlock(64, 128, downsample=True, optional_shortcut=optional_shortcut))
            conv_layers.append(ResidualBlock(128, 128, optional_shortcut=optional_shortcut))

        for i in range(n_conv_layers['conv_block_256']):
            if i == 0:
                conv_layers.append(ResidualBlock(128, 256, downsample=True, optional_shortcut=optional_shortcut))
            conv_layers.append(ResidualBlock(256, 256, optional_shortcut=optional_shortcut))

        for i in range(n_conv_layers['conv_block_512']):
            if i == 0:
                conv_layers.append(ResidualBlock(256, 512, downsample=True, optional_shortcut=optional_shortcut))
            conv_layers.append(ResidualBlock(512, 512, optional_shortcut=optional_shortcut))

        self.conv_layers = nn.Sequential(*conv_layers)
        self.kmax_pooling = KMaxPool(k=k)

        linear_layers = []

        linear_layers.append(nn.Linear(512 * k, 2048))
        linear_layers.append(nn.Linear(2048, 2048))
        linear_layers.append(nn.Linear(2048, 128))

        self.linear_layers = nn.Sequential(*linear_layers)

        self.final_layer = nn.Linear(128 + n_features, 1)
        self.final_bn = nn.BatchNorm1d(num_features=128 + n_features)

    def forward(self, sentences, features):

        x = self.embedding(sentences)
        x = x.transpose(1, 2)  # (batch_size, sequence_length, embed_size) -> (batch_size, embed_size, sequence_length)
        x = self.conv_layers(x)
        x = self.kmax_pooling(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        # print(x.shape)
        x = self.linear_layers(x)
        x_features = torch.cat([x, features], dim=1)
        final_output = self.final_layer(self.final_bn(x_features))
        return final_output.squeeze()


if __name__ == '__main__':
    pass

_StoreAction(option_strings=['--save_every'], dest='save_every', nargs=None, const=None, default=1, type=<class 'int'>, choices=None, help=None, metavar=None)

In [2]:
config = args.parse_args([])

In [3]:
logger = utils.get_logger('MovieReview')
logger.info('Arguments: {}'.format(config))

if not HAS_DATASET and not IS_ON_NSML:  # It is not running on nsml
    DATASET_PATH = 'data/movie_review_phase1/'

# DONOTCHANGE: They are reserved for nsml
if config.pause:
    nsml.paused(scope=locals())

[INFO] 04-04 00:20:27 > Arguments: Namespace(batch_size=64, embedding_size=100, epochs=10, iteration='0', learning_rate=0.01, max_vocab_size=10000, min_count=3, mode='train', output=1, pause=0, print_every=1, save_every=1, sentence_length=20, use_gpu=True)


In [4]:
if config.mode == 'train':
    # 데이터를 로드합니다.
    logger.info("Loading data...")
    train_data, val_data = load_data(DATASET_PATH, val_size=0.3)

    logger.info("Building preprocessor...")
    tokenizer = DummyTokenizer(config)
    feature_extractor1 = LengthFeatureExtractor(config)
    feature_extractors = [feature_extractor1]
    dictionary = RandomWordDictionary(tokenizer, config)
    dictionary.build_dictionary(train_data)

    preprocessor = Preprocessor(tokenizer, feature_extractors, dictionary)

    logger.info("Making dataset & dataloader...")
    train_dataset = MovieReviewDataset(train_data, preprocessor, sort=False, min_length=config.sentence_length, max_length=config.sentence_length)
    val_dataset = MovieReviewDataset(val_data, preprocessor, sort=False, min_length=config.sentence_length, max_length=config.sentence_length)

    train_dataloader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn,
                              num_workers=2)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, shuffle=True,
                                  collate_fn=collate_fn, num_workers=2)

    model = WordCNN(dictionary, config)
    if config.use_gpu:
        model = model.cuda()

    # DONOTCHANGE: Reserved for nsml use
    bind_model(model, config)

    criterion = nn.MSELoss(size_average=False)
    trainable_params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params=trainable_params, lr=0.01)

    trainer = Trainer(model, train_dataloader, val_dataloader, criterion=criterion, optimizer=optimizer,
                      lr_schedule=False, lr_scheduler=None, use_gpu=config.use_gpu, logger=logger)
    trainer.run(epochs=config.epochs)

# 로컬 테스트 모드일때 사용합니다
# 결과가 아래와 같이 나온다면, nsml submit을 통해서 제출할 수 있습니다.
# [(0.0, 9.045), (0.0, 5.91), ... ]
elif config.mode == 'test_local':
    with open(os.path.join(DATASET_PATH, 'train/train_data'), 'rt', encoding='utf-8') as f:
        reviews = f.readlines()
    res = nsml.infer(reviews)
    print(res)

[INFO] 04-04 00:20:28 > Loading data...
[INFO] 04-04 00:20:28 > Building preprocessor...
[INFO] 04-04 00:20:29 > Making dataset & dataloader...


  0%|          | 0/5692 [00:00<?, ?it/s]


RuntimeError: dimension out of range (expected to be in range of [-1, 0], but got 1)

In [5]:
%debug

> [0;32m/home/dreamgonfly/ToMuchInfo/code/trainers.py[0m(128)[0;36maccuracy[0;34m()[0m
[0;32m    126 [0;31m    [0;32mdef[0m [0maccuracy[0m[0;34m([0m[0mself[0m[0;34m,[0m [0moutputs[0m[0;34m,[0m [0mlabels[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m    127 [0;31m[0;34m[0m[0m
[0m[0;32m--> 128 [0;31m        [0mmaximum[0m[0;34m,[0m [0margmax[0m [0;34m=[0m [0moutputs[0m[0;34m.[0m[0mmax[0m[0;34m([0m[0mdim[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    129 [0;31m        [0mcorrects[0m [0;34m=[0m [0margmax[0m [0;34m==[0m [0mlabels[0m  [0;31m# ByteTensor[0m[0;34m[0m[0m
[0m[0;32m    130 [0;31m        [0mn_corrects[0m [0;34m=[0m [0mcorrects[0m[0;34m.[0m[0mfloat[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0msum[0m[0;34m([0m[0;34m)[0m  [0;31m# FloatTensor[0m[0;34m[0m[0m
[0m
ipdb> outputs
Variable containing:
 0.1273
-0.5819
-0.6532
-0.1320
-0.5961
-0.7171
-0.2539
-0.3998
 0.2566
-0.1505