In [1]:
import os
import argparse
import time
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch.nn.utils import clip_grad_norm
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchnet.meter import AverageValueMeter

# Load data

In [3]:
name = 'helpdesk'
parser = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{0}/'.format(name),
}

dirs = argparse.Namespace(**parser)

if not os.path.isdir(dirs.outputdir):
    os.makedirs(dirs.outputdir)

In [4]:
# load parameters
with open(dirs.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

# load data
with open(dirs.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

In [5]:
#X = np.reshape(X, (X.shape[1],X.shape[0],X.shape[2]))
#X.shape

In [17]:
args = {
    'epochs': 100,
    'batch_size': 64,
    'lr': 0.0002,
    'optim': 'adam',
    'beta1': 0.5,
    'weight_decay': 1e-4,
    'features': X.shape[2],
    'h_dim1': 100,
    'h_dim2a': 100,
    'h_dim2t': 100,
    'num_layer1': 1,
    'num_layer2a': 1,
    'num_layer2t': 1,
    'outdim_a': len(targetchartoindice), 
    'outdim_t': 1,
    'clip': 3,
    'cuda': False,
    'seed': 7,
    'workers': 2,
}

args = argparse.Namespace(**args)

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

In [7]:
data_loader_x = DataLoader(dataset=X, batch_size=args.batch_size, shuffle=False, num_workers=args.workers)
data_loader_y_a = DataLoader(dataset=y_a, batch_size=args.batch_size, shuffle=False, num_workers=args.workers)
data_loader_y_t = DataLoader(dataset=y_t, batch_size=args.batch_size, shuffle=False, num_workers=args.workers)

# Model

In [8]:
class LSTMModel(nn.Module):
    
    def __init__(self, features, h_dim1, h_dim2a, h_dim2t, num_layer1, 
                 num_layer2a, num_layer2t, outdim_a, outdim_t,
                 batch_size=64, seq_length=15, cuda=args.cuda):
        super(LSTMModel, self).__init__()
        # no seq length when init LSTM 
        self.lstm1 = nn.LSTM(features, h_dim1, num_layer1)
        self.lstm2a = nn.LSTM(h_dim1, h_dim2a, num_layer2a)
        self.lstm2t = nn.LSTM(h_dim1, h_dim2t, num_layer2t)
        self.linear3a = nn.Linear(h_dim2a, outdim_a)
        self.linear3t = nn.Linear(h_dim2t, outdim_t)
        
        self.num_layer1 = num_layer1
        self.num_layer2a = num_layer2a
        self.num_layer2t = num_layer2t
        self.h_dim1 = h_dim1
        self.h_dim2a = h_dim2a
        self.h_dim2t = h_dim2t
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.cuda = cuda

    def forward(self, x):
        assert (x.size(1) == self.batch_size)
        
        tt = torch.cuda if self.cuda else torch
        
        # shared layer
        h1 = Variable(tt.FloatTensor(self.num_layer1, x.size(1), self.h_dim1).zero_(), requires_grad=False)
        c1 = Variable(tt.FloatTensor(self.num_layer1, x.size(1), self.h_dim1).zero_(), requires_grad=False)
        out1, _ = self.lstm1(x, (h1, c1)) # out1 dim : seq x batch x h_dim
        
        # activity layer
        h2a = Variable(tt.FloatTensor(self.num_layer2a, out1.size(1), self.h_dim2a).zero_(), requires_grad=False)
        c2a = Variable(tt.FloatTensor(self.num_layer2a, out1.size(1), self.h_dim2a).zero_(), requires_grad=False)
        _, (h_t2a, _) = self.lstm2a(out1, (h2a, c2a))
        h_t2a = h_t2a.squeeze(0) # remove size 0: h_t2a dim: numlayer x batch x hidden --> batch x hidden
        
        # time layer
        h2t = Variable(tt.FloatTensor(self.num_layer2t, out1.size(1), self.h_dim2t).zero_(), requires_grad=False)
        c2t = Variable(tt.FloatTensor(self.num_layer2t, out1.size(1), self.h_dim2t).zero_(), requires_grad=False)
        _, (h_t2t, _) = self.lstm2t(out1, (h2t, c2t))
        h_t2t = h_t2t.squeeze(0) # remove size 0
        
        # output
        out_a = self.linear3a(h_t2a)
        out_t = self.linear3t(h_t2t)
        return (out_a, out_t)

In [9]:
model = LSTMModel(features=args.features, h_dim1=args.h_dim1, h_dim2a=args.h_dim2a, h_dim2t=args.h_dim2t, 
                  num_layer1=args.num_layer1, num_layer2a=args.num_layer2a, num_layer2t=args.num_layer2t, 
                  outdim_a=args.outdim_a, outdim_t=args.outdim_t)

In [10]:
model.parameters

<bound method Module.parameters of LSTMModel (
  (lstm1): LSTM(14, 100)
  (lstm2a): LSTM(100, 100)
  (lstm2t): LSTM(100, 100)
  (linear3a): Linear (100 -> 10)
  (linear3t): Linear (100 -> 1)
)>

# Utils

In [11]:
def adjust_learning_rate(optimizer, lr_decay):
    for param_group in optimizer.param_groups:
        param_group['lr'] *= lr_decay

In [12]:
def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"):
    checkpoint_filepath = os.path.join(dirs.outputdir, filename)
    torch.save(state, checkpoint_filepath)
    if is_best:
        shutil.copyfile(checkpoint_filepath, model_best_filepath)

In [13]:
def accuracy(outputs, targets):
    _, predicted = torch.max(outputs.data, 1)
    total += targets.size(0)
    #correct += predicted.eq(targets.data).cpu().sum()
    correct += (predicted == labels).sum()
    return correct*100/total

In [14]:
class Results():
    def __init__(self, results_path):
        self.losses_a = []
        self.losses_t = []
        self.losses_model = []
        self.accuracy_a = []
        self.results_path = results_path
    
    def save_losses(self, loss_a, loss_t, loss_model, accuracy):
        self.losses_a.append(loss_a)
        self.losses_t.append(loss_t)
        self.losses_model.append(loss_model)
        self.accuracy_a.append(accuracy)
        
    def save_to_disk(self):
        f = open(self.results_path + "losses_a.pkl", "wb")
        pickle.dump(self.D_losses, f)
        f= open(self.results_path + "losses_t.pkl", "wb")
        pickle.dump(self.G_losses, f)
        f = open(self.results_path + "losses_model.pkl", "wb")
        pickle.dump(self.D_reals, f)
        f = open(self.results_path + "accuracy_a.pkl", "wb")
        pickle.dump(self.D_fakes, f)

# Define train/validate function

In [30]:
def train(epoch, data_loader_x, data_loader_y_a, data_loader_y_t, model, optimizer, criterion_a, criterion_t, args, result_losses):
    print("=> EPOCH {}".format(epoch))
    losses_a = AverageValueMeter()
    losses_t = AverageValueMeter()
    losses_model = AverageValueMeter()
    accuracy_a = AverageValueMeter()
    
    tt = torch.cuda if args.cuda else torch
    model.train()
    start = time.time()
    
    for inputs, y_a, y_t in zip(data_loader_x, data_loader_y_a, data_loader_y_t): # shape: (batch_size, seq len, features)
        # input
        batch_size = inputs.size(0)
        inputs = np.reshape(X, (X.shape[1],X.shape[0],X.shape[2])) # shape: (seq len, batch_size, features)
        inputs = tt.FloatTensor(inputs)
        y_a = tt.FloatTensor(y_a)
        y_t = tt.FloatTensor(y_t)
        
        if args.cuda:
            inputs = inputs.cuda()
            y_a = y_a.cuda()
            y_t = y_t.cuda()
            
        inputs = Variable(inputs)
        y_a = Variable(y_a)
        y_t = Variable(y_t)
        
        # output
        output_a, output_t = model(inputs)
        
        # loss
        loss_a = criterion_a(output_a, y_a)
        loss_t = criterion_t(output_t, y_t)
        loss_model = loss_a + loss_t
        accuracy = accuracy(output_a, y_a)
        
        # update
        model.zero_grad()
        loss_model.backward()
        clip_grad_norm(model.parameters(), args.clip, 'inf')
        optimizer.step()
        
        losses_a.add(loss_a.data.cpu()[0] * batch_size, batch_size)
        losses_t.add(loss_t.data.cpu()[0] * batch_size, batch_size)
        losses_model.add(loss_model.data.cpu()[0] * batch_size, batch_size)
      
    # Show and save result after each epoch
    print("=> EPOCH {} | Time: {}s | Activity loss: {:.4f} | Time loss: {:.4f}"
          " | Model loss: {:.4f} | Activity accuracy: {:.4f}"
          .format(epoch, round(time.time()-start), losses_a.value()[0],
                  losses_t.value()[0], losses_model.value()[0], accuracy_a.value()[0]))
    
    result_losses.save_losses(losses_a.value()[0], losses_t.value()[0], 
                              losses_model.value()[0], accuracy_a.value()[0])

In [None]:
def validate

In [None]:
def test

# Loss function and optimizer

In [31]:
# define loss function
criterion_a = nn.CrossEntropyLoss()
criterion_t = nn.MSELoss()
criterion = [criterion_a, criterion_t]

# define optimizer
if args.cuda:
    criterion.cuda()

# define optimizer
if args.optim == "sgd":
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    
elif args.optim == "adam":
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    
elif args.optim == "rmsprop":
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.weight_decay)

# Train model

In [32]:
result_losses = Results(dirs.outputdir)
for epoch in range(1, args.epochs+1):
    train(epoch, data_loader_x, data_loader_y_a, data_loader_y_t, model, optimizer, criterion_a, criterion_t, args, result_losses)
    break

=> EPOCH 1


TypeError: torch.FloatTensor constructor received an invalid combination of arguments - got (torch.DoubleTensor), but expected one of:
 * no arguments
 * (int ...)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.DoubleTensor[0m)
 * (torch.FloatTensor viewed_tensor)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.DoubleTensor[0m)
 * (torch.Size size)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.DoubleTensor[0m)
 * (torch.FloatStorage data)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.DoubleTensor[0m)
 * (Sequence data)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.DoubleTensor[0m)


In [28]:
y_t

array([  7.11185218e-05,   8.24970111e-01,   0.00000000e+00, ...,
         7.11185218e-05,   4.12487426e-04,   0.00000000e+00])

In [29]:
torch.FloatTensor(y_t)


  0.0001
  0.8250
  0.0000
   ⋮    
  0.0001
  0.0004
  0.0000
[torch.FloatTensor of size 9181]

In [33]:
y_t

array([  7.11185218e-05,   8.24970111e-01,   0.00000000e+00, ...,
         7.11185218e-05,   4.12487426e-04,   0.00000000e+00])