In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import syft as sy
from syft.execution.plan import Plan

hook = sy.TorchHook(torch)

# force protobuf serialization for tensors
hook.local_worker.framework = None

Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/Users/santteegt/anaconda3/envs/pysyft-dev/lib/python3.7/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.2.so'





In [2]:
class LSTMCell(nn.Module):
    """
    Python implementation of LSTMCell for MPC
    This class overrides the torch.nn.LSTMCell
    """

    def __init__(self, input_size, hidden_size, bias=True, nonlinearity=None):
        super(LSTMCell, self).__init__()
    
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.nonlinearity = nonlinearity


        # Input Gate
        self.fc_xi = nn.Linear(input_size, hidden_size, bias=bias)
        self.fc_hi = nn.Linear(hidden_size, hidden_size, bias=bias)
        
        # Forget Gate
        self.fc_xf = nn.Linear(input_size, hidden_size, bias=bias)
        self.fc_hf = nn.Linear(hidden_size, hidden_size, bias=bias)

        # Cell Gate
        self.fc_xc = nn.Linear(input_size, hidden_size, bias=bias)
        self.fc_hc = nn.Linear(hidden_size, hidden_size, bias=bias)

        # Output Gate
        self.fc_xo = nn.Linear(input_size, hidden_size, bias=bias)
        self.fc_ho = nn.Linear(hidden_size, hidden_size, bias=bias)
        
        self.init_parameters()
        
    def init_parameters(self):
        std = 1.0 / np.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)
        
#     def init_hidden(self, input):
#         """
#         TODO: Not being used 
#         This method initializes a hidden state when no hidden state is provided
#         in the forward method. It creates a hidden state with zero values.
#         """
# #         h = torch.zeros(input.shape[0], self.hidden_size, dtype=input.dtype, device=input.device)
#         h = torch.zeros(input.shape[0], self.hidden_size)
#         if input.has_child() and isinstance(input.child, PointerTensor):
#             h = h.send(input.child.location)
#         if input.has_child() and isinstance(input.child, precision.FixedPrecisionTensor):
#             h = h.fix_precision()
#             child = input.child
#             if isinstance(child.child, AdditiveSharingTensor):
#                 crypto_provider = child.child.crypto_provider
#                 owners = child.child.locations
#                 h = h.share(*owners, crypto_provider=crypto_provider)
#         return h

    def init_hidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)

    def forward(self, x, hc=None):

        if hc is None:
            batch_size = x.shape[1]
            hc = (self.init_hidden(batch_size), self.init_hidden(batch_size))
        h, c = hc
        
#         print('LSTMCell', type(x), x.shape)
#         print('Hidden', h, h.shape)
#         print('C t-1', c, c.shape) 
        x_i = self.fc_xi(x)
        h_i = self.fc_hi(h)
        x_f = self.fc_xf(x)
        h_f = self.fc_hf(h)
        x_c = self.fc_xc(x)
        h_c = self.fc_hc(h)
        x_o = self.fc_xo(x)
        h_o = self.fc_ho(h)
        
        inputgate = (x_i + h_i).sigmoid()
        forgetgate = (x_f + h_f).sigmoid()
        cellgate = (x_c + h_c).tanh()
        outputgate = (x_o + h_o).sigmoid()

#         c_ = torch.mul(forgetgate, c) + torch.mul(inputgate, cellgate)
        c_ = (forgetgate * c) + (inputgate * cellgate)

#         h_ = torch.mul(outputgate, torch.tanh(c_))
        h_ = outputgate * c_.tanh()
#         print('h', h_)

        return h_, c_


class LSTM(nn.Module):
    """
    V2
    Python implementation of LSTM for MPC
    This class overrides the torch.nn.LSTM
    """

    def __init__(
        self,
        input_size,
        hidden_size,
        num_layers=1,
        bias=True,
        batch_first=False,
        dropout=0,
        bidirectional=False,
        nonlinearity=None,
    ):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
#         self.dropout = float(dropout)
        self.bidirectional = bidirectional
#         self.num_directions = 2 if bidirectional else 1
#         self.is_lstm = base_cell is LSTMCell
        self.nonlinearity = nonlinearity
    
        # Dropout layers
        # TODO: implement a nn.Dropout class for PySyft
        # Link to issue: https://github.com/OpenMined/PySyft/issues/2500

        # Build RNN forward layers
        sizes = [input_size, *(hidden_size for _ in range(self.num_layers - 1))]
        print('sizes', sizes)
        self.rnn_forward = nn.ModuleList(
            (LSTMCell(sz, hidden_size, bias, nonlinearity) for sz in sizes)
        )
        
        self.lstm_cell = LSTMCell(self.input_size, self.hidden_size, self.bias, self.nonlinearity)

#         # Build RNN backward layers, if needed
#         if self.bidirectional:
#             self.rnn_backward = nn.ModuleList(
#                 (base_cell(sz, hidden_size, bias, nonlinearity) for sz in sizes)
#             )

    def init_hidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)

    def forward(self, x, hc=None):
        
        batch_size = x.shape[1]
        seq_len = x.shape[0]
        
        if hc is None:
            print('Init hc...')
            hc = (self.init_hidden(batch_size), self.init_hidden(batch_size))
            
        # Run through rnn in the forward direction
        for t in range(seq_len):
            input_ = x.select(0, t).view(1, -1)
            hc = self.lstm_cell(input_, hc)
                
        return hc

    
class WesadLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=3, lstm_layers=1, dropout=0.2):
        # super(WesadLSTM, self).__init__(id="encrypted-model")
        super(WesadLSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.classes = output_dim
        self.lstm = LSTM(input_size=input_dim, hidden_size=input_dim, num_layers=lstm_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
#         self.dropout = nn.Dropout(dropout)
        
    def init_hidden(self, batch_size):
        return self.lstm.init_hidden(batch_size), self.lstm.init_hidden(batch_size)

    def forward(self, x, h):
        lstm_out, _ = self.lstm(x, h)
        out = self.fc(lstm_out.view(-1, self.hidden_dim))
#         out = F.softmax(out.view(-1, self.classes), dim=1)
        return out

In [3]:
def set_model_params(module, params_list, start_param_idx=0):
    """
    Set params list into model recursively.
    """
    param_idx = start_param_idx

    for name, param in module._parameters.items():
        print('param name', name)
        # A param can be None if it is not trainable.
        if param is not None:
            module._parameters[name] = params_list[param_idx]
            param_idx += 1

    for name, child in module._modules.items():
        print('module name', name)
        if child is not None:
            param_idx = set_model_params(child, params_list, param_idx)

    return param_idx

In [4]:
def softmax_cross_entropy_with_logits(logits, targets, batch_size):
    """ Calculates softmax entropy
        Args:
            * logits: (NxC) outputs of dense layer
            * targets: (NxC) one-hot encoded labels
            * batch_size: value of N, temporarily required because Plan cannot trace .shape
    """
    # numstable logsoftmax
    norm_logits = logits - logits.max()
    log_probs = norm_logits - norm_logits.exp().sum(dim=1, keepdim=True).log()
    # NLL, reduction = mean
    return -(targets * log_probs).sum() / batch_size

In [5]:
def naive_sgd(param, **kwargs):
    print('param', param, param.shape)
    print(kwargs['lr'])
    print(param.requires_grad, param.grad)
#     return param - (kwargs['lr'] * param.grad if param.grad is not None else torch.zeros_like(param))
    return param - (kwargs['lr'] * param.grad)

In [6]:
train_batch_size = 25
val_batch_size = 5
input_dim = 77
output_dim = 3
lstm_layers = 1
dropout = 0.5
lr = 1e-4

In [7]:
model = WesadLSTM(input_dim=input_dim, hidden_dim=input_dim, output_dim=output_dim, lstm_layers=lstm_layers,
                  dropout=dropout)

@sy.func2plan()
def train(data, target, h, c, batch_size, lr, model_parameters):
    set_model_params(model, model_parameters)
    out = model(data, (h, c))
    
#     batch_size = out.shape[0]
    # loss = ((out - target)**2).sum().refresh()/batch_size
#     loss = ((out - target)**2).sum()/batch_size
    loss = softmax_cross_entropy_with_logits(out, target, batch_size)
    print('loss', loss)
    loss.backward()
    
    print('post backprop')
    
    # step
    updated_params = [
        naive_sgd(param, lr=lr)
        for param in model_parameters
    ]

    return (loss, *updated_params)

sizes [77]


In [8]:
# p = model_state[0]
# p.uniform_??

In [9]:
# # model_state = list(model.parameters())
model_state = [param.data for param in model.parameters()] # raw tensors instead of nn.Parameter
[(m.shape, m.requires_grad) for m in model_state]

[(torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([77, 77]), False),
 (torch.Size([77]), False),
 (torch.Size([3, 77]), False),
 (torch.S

In [10]:
len(model_state)

34

In [11]:
# model_state = list(model.parameters())
# model_state = [param.data for param in model.parameters()] # raw tensors instead of nn.Parameter

data = torch.randn((train_batch_size, input_dim), dtype=torch.float32)
print('data', data.shape)

target = torch.randint(0, output_dim - 1, (train_batch_size,))
target = nn.functional.one_hot(target, output_dim)
print('target', target.shape)

h, c = model.init_hidden(train_batch_size)
print('hc', h.shape, c.shape)

data torch.Size([25, 77])
target torch.Size([25, 3])
hc torch.Size([25, 77]) torch.Size([25, 77])


In [12]:
# # model_state = list(model.parameters())
# model_state = [param.data for param in model.parameters()] # raw tensors instead of nn.Parameter

# data = torch.randn((train_batch_size, input_dim), dtype=torch.float32)

# target = torch.randint(0, output_dim - 1, (train_batch_size,))
# target = nn.functional.one_hot(target, output_dim)
# target.shape

# h, c = model.init_hidden(train_batch_size)

In [13]:
# Plan._build_translators = []

train.build(data, 
            target, 
            h, c,
            torch.tensor([train_batch_size]),
            torch.tensor([lr]),
            model_state, 
            trace_autograd=True)

module name lstm
module name rnn_forward
module name 0
module name fc_xi
param name weight
param name bias
module name fc_hi
param name weight
param name bias
module name fc_xf
param name weight
param name bias
module name fc_hf
param name weight
param name bias
module name fc_xc
param name weight
param name bias
module name fc_hc
param name weight
param name bias
module name fc_xo
param name weight
param name bias
module name fc_ho
param name weight
param name bias
module name lstm_cell
module name fc_xi
param name weight
param name bias
module name fc_hi
param name weight
param name bias
module name fc_xf
param name weight
param name bias
module name fc_hf
param name weight
param name bias
module name fc_xc
param name weight
param name bias
module name fc_hc
param name weight
param name bias
module name fc_xo
param name weight
param name bias
module name fc_ho
param name weight
param name bias
module name fc
param name weight
param name bias
loss AutogradTensor>PlaceHolder[Id:6779832

TypeError: mul(): argument 'other' (position 1) must be Tensor, not NoneType