In [1]:
import numpy as np  
import torch  
from torch import nn    

#### Guideline

![](example_guideline.jpg)

#### Reference
[https://discuss.pytorch.org/t/append-for-nn-sequential-or-directly-converting-nn-modulelist-to-nn-sequential/7104/4](https://discuss.pytorch.org/t/append-for-nn-sequential-or-directly-converting-nn-modulelist-to-nn-sequential/7104/4)    

[https://discuss.pytorch.org/t/custom-connections-in-neural-network-layers/3027/15](https://discuss.pytorch.org/t/custom-connections-in-neural-network-layers/3027/15)

### input layer랑 output layer만 있는 경우

In [2]:
#아래 코드를 좀 변형하면 될듯? 아래 코드는 hidden layer가 없어서.. 
class MaskedLinear(nn.Module):
    def __init__(self, in_dim, out_dim, indices_mask):
        """
       :param in_features: number of input features
       :param out_features: number of output features
       :param indices_mask: list of two lists containing indices for dimensions 0 and 1, used to create the mask  
       """
        super(MaskedLinear, self).__init__()    
        
 
        def backward_hook(grad): 
            # Clone due to not being allowed to modify in-place gradients
            out = grad.clone()  
            out[self.mask] = 0    
            return out 
 
        self.linear = nn.Linear(in_dim, out_dim)#.cuda()    
        self.mask = torch.ones([out_dim, in_dim]).byte()#.cuda()
        self.mask[indices_mask] = 0 # create mask
        self.linear.weight.data[self.mask] = 0 # zero out bad weights
        self.linear.weight.register_hook(backward_hook) # hook to zero out bad gradients
 
    def forward(self, input):
        return self.linear(input)
    
    


In [3]:
indices_mask = torch.ones([2,4]).byte() #[output_dim, input_dim] 
indices_mask[0,0] = 0  # [output_index, input_index]: gradient 0으로 해주고 싶은 부분 
indices_mask[1,1] = 0 
indices_mask[0,2] = 0
indices_mask[0,3] = 0
indices_mask[1,3] = 0
indices_mask

tensor([[0, 1, 0, 0],
        [1, 0, 1, 0]], dtype=torch.uint8)

In [4]:
model = MaskedLinear(4, 2, indices_mask) 

In [5]:
model.linear.weight.data

tensor([[ 0.0000,  0.0397,  0.0000,  0.0000],
        [-0.0415,  0.0000,  0.3291,  0.0000]])

### hidden layer까지 있는 경우  
- adjacency matrix의 transpose 버전인 mat_mask를 통해서 hidden layer들의 dimension을 구한다 
- mat_mask의 row가 hidden과 output, column이 input과 hidden 
- 나머지 부분 바꿔서 구현

In [6]:
# mat_mask 행렬 바로 생성 
mat_mask = np.array([[0,2,0,0,2,0,0,0,0,0],[2,0,2,0,0,0,0,0,0,0],[0,2,0,2,0,0,0,0,0,0],[0,0,0,0,0,1,1,0,0,0],[0,0,0,0,0,0,1,1,0,0],[0,0,0,0,0,0,0,0,0,3],[0,0,0,0,0,0,0,0,3,0]])
print(mat_mask)
print(mat_mask.shape) 

[[0 2 0 0 2 0 0 0 0 0]
 [2 0 2 0 0 0 0 0 0 0]
 [0 2 0 2 0 0 0 0 0 0]
 [0 0 0 0 0 1 1 0 0 0]
 [0 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 3]
 [0 0 0 0 0 0 0 0 3 0]]
(7, 10)


In [7]:
mat_mask = torch.from_numpy(mat_mask)
print(mat_mask)
print(mat_mask.shape)

tensor([[0, 2, 0, 0, 2, 0, 0, 0, 0, 0],
        [2, 0, 2, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 2, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
        [0, 0, 0, 0, 0, 0, 0, 0, 3, 0]], dtype=torch.int32)
torch.Size([7, 10])


In [8]:
mat_mask[0:3, 0:5] #index (0~2, 0~4) 표현할라면 이렇게.. 

tensor([[0, 2, 0, 0, 2],
        [2, 0, 2, 0, 0],
        [0, 2, 0, 2, 0]], dtype=torch.int32)

In [9]:
mat_mask[3,0:5].sum() == 0
#이때 3이 첫번째 hidden layer의 dimension 

tensor(1, dtype=torch.uint8)

In [10]:
# hidden layer 추가한 버전
#아래 코드를 좀 변형하면 될듯? 아래 코드는 hidden layer가 없어서.. 
class MaskedLinear(nn.Module):
    def __init__(self, in_dim, out_dim, mat_mask):
        """
       :param in_features: number of input features
       :param out_features: number of output features
       :param indices_mask: list of two lists containing indices for dimensions 0 and 1, used to create the mask  
       """
        super(MaskedLinear, self).__init__()    
        
        def calculate_hidden_dim(in_dim, out_dim, mat_mask):
            #hidden dim의 정보를 담은 list를 출력한다..?
            hidden_dim_list = []
            start_col_idx = 0 
            finish_col_idx = in_dim - 1
            
            while(True):
                if finish_col_idx >= mat_mask.shape[1]:
                    break 
        
                for i in range(start_col_idx, len(mat_mask)): 
                    if (mat_mask[i,start_col_idx:(finish_col_idx + 1)].sum() == 0):
                        hidden_dim = i - sum(hidden_dim_list)
                        hidden_dim_list += [hidden_dim]  
                        start_col_idx = finish_col_idx + 1
                        finish_col_idx += i 
                        break 
                        
            return hidden_dim_list
 
        def backward_hook(grad): 
            # Clone due to not being allowed to modify in-place gradients     
            out = grad.clone()    
            out[self.mask] = 0    
            return out  
        
        
        def mask_grad(from_dim, to_dim, indices_mask): #두 layer에 대해서 gradient masking 해주고 생성한 layer를 return
            
            if (indices_mask == 1).sum() != 0: #indices_mask가 1을 가지고 있으면#왜냐하면 layer사이에서의 activation function은 같으니까
                self.layer = nn.Linear(from_dim, to_dim)

            elif (indices_mask == 2).sum() != 0: #indices_mask가 2를 가지고 있으면  
                self.layer = nn.Sequential(nn.Linear(from_dim, to_dim), nn.ReLU(True))    
            
            elif (indices_mask == 3).sum() != 0: #indices_mask가 3을 가지고 있으면    
                self.layer = nn.Sequential(nn.Linear(from_dim, to_dim), nn.Sigmoid(True)) 
            
            self.mask = torch.ones([to_dim, from_dim]).byte()
            self.mask[indices_mask] = 0
            self.layer.weight.data[self.mask] = 0 
            self.layer.weight.register_hook(backward_hook)   
            
            return self.layer  
            
        #이부분 바꿔야함     
        hidden_dim_list = calculate_hidden_dim(in_dim, out_dim, mat_mask)    
        
        #input이랑 첫번째 hidden layer
        self.input = mask_grad(in_dim, hidden_dim_list[0], mat_mask[0:hidden_dim_list[0], 0:in_dim])  
        self.hiddens = nn.Sequential()  
        for i in range(len(hidden_dim_list)-1): # repetition by (# of layers -1)  
            self.layer = mask_grad(hidden_dim_list[i], hidden_dim_list[i+1], mat_mask[hidden_dim_list[0:i-1].sum()-1:hidden_dim_list[0:i].sum()-1,in_dim + hidden_dim_list[0:i-1].sum()-1:in_dim + hidden_dim_list[0:i].sum()-1])    
            self.hiddens.add_module("hidden{}".format(i+1), self.layer)   
                
        self.output = mask_grad(hidden_dim_list[len(hidden_dim_list)-1], out_dim, mat_mask[(mat_mask.shape[0]-out_dim):,(mat_mask.shape[1]-hidden_dim_list[len(hidden_dim_list)-1]):])  
                
            
            
    
    
    #이부분 바꿔야함    
    def forward(self, input):   
       
        o1 = self.layer1(input)     
        o2 = self.hiddens(o1)       
        out = self.output(o2)      
        
        return out

    
    
    
    


In [11]:
model = MaskedLinear(5, 2, mat_mask.byte())       

AttributeError: 'Sequential' object has no attribute 'weight'

#### test 용 코드

In [12]:
temp = []  
temp += [1]   
temp

[1]

In [13]:
temp += [2]
temp

[1, 2]

In [14]:
len(temp)

2

In [15]:
print(mat_mask.shape[0])
print(mat_mask.shape[1])

7
10


In [16]:
hidden_dim_list = []
start_col_idx = 0 
finish_col_idx = 4 # 5 - 1
            
while(True):   
    
    if finish_col_idx >= mat_mask.shape[1]:   
        break    
        
    for i in range(start_col_idx, len(mat_mask)):       
        if (mat_mask[i,start_col_idx:(finish_col_idx + 1)].sum() == 0):      
            hidden_dim = i - sum(hidden_dim_list)    
            hidden_dim_list += [hidden_dim]   
            start_col_idx = finish_col_idx + 1  
            finish_col_idx += i   
            break    

In [17]:
print(hidden_dim_list)
print(start_col_idx)
print(finish_col_idx)

[3, 2]
8
12


In [18]:
hidden_dim_list = []
sum(hidden_dim_list) 

0

In [19]:
(mat_mask[0:3, 0:5] == 2).sum() != 0

tensor(1, dtype=torch.uint8)

In [20]:
print("hidden{}".format(1))

hidden1


In [21]:
print(hidden_dim_list[0:2])
print(sum(hidden_dim_list[0:2]))

[]
0
