In [1]:
from __future__ import division

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torchvision import transforms, datasets

import bcolz
import cPickle as pickle

In [5]:
def save_array(fname, arr): 
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()
    
def load_array(fname):
    return bcolz.open(fname)

In [2]:
# CNN Model (2 conv layer)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
        

In [3]:
# load the pre-trained network
model_path = "cnn.pkl"
cnn = CNN()
cnn.load_state_dict(torch.load(model_path))

# remove last layer
layers = list(cnn.children())
layers.pop()
cnn = torch.nn.Sequential(*layers)

In [6]:
# load sequence data
train_inputs = load_array("data/train_inputs.bc")
test_inputs = load_array("data/test_inputs.bc")


In [7]:
# preprocess the sequence data
train_inputs = Variable(torch.Tensor(train_inputs)).view(-1, 1, 28, 140)
test_inputs = Variable(torch.Tensor(test_inputs)).view(-1, 1, 28, 140)

In [8]:
features = []
batch_size = 100
n_iters = len(train_inputs) // batch_size
for i in range(n_iters):
    batch = train_inputs[i*batch_size: (i+1)*batch_size]
    f = cnn(batch)
    features.append(f.data)

features = torch.cat(features)

In [9]:
features.size()

torch.Size([500, 32, 7, 35])

In [10]:
pickle.dump(features, open("data/train_features.pkl", "wb"))

In [11]:
features = []
batch_size = 100
n_iters = len(test_inputs) // batch_size
for i in range(n_iters):
    batch = test_inputs[i*batch_size: (i+1)*batch_size]
    f = cnn(batch)
    features.append(f.data)

features = torch.cat(features)

In [12]:
features.size()

torch.Size([200, 32, 7, 35])

In [13]:
pickle.dump(features, open("data/test_features.pkl", "wb"))