# Conversion from `EdgeAttibute` to `EdgeEmbeddings` via `GammaVAE`

In [1]:
import glob
import pandas as pd
import numpy as np
import random

## Paramters Setting

In [77]:
from collections import namedtuple

ModelConfig = namedtuple('ModelConfig', ['input_dims', 'latent_dims', 'hidden_dims', 'gamma_shape', 'prior_shape', 'prior_rate', 'prior_weight'])
model_config = ModelConfig(
    input_dims = 8,
    latent_dims = 3,
    hidden_dims = [16, 8, 5],
    gamma_shape = 8.,
    prior_shape = 2.0,
    prior_rate = 1.,
    prior_weight = 0.001,
)

TrainConfig = namedtuple('TrainConfig', ['training_epochs', 'batch_size', 'learning_rate'])
train_config = TrainConfig(
    training_epochs = 200,
    batch_size = 1000,
    learning_rate = 1e-5,
)

## Definition of `GammaVAE`

In [78]:
import torch
from torch import nn
from torch.distributions import Gamma
from torch.nn import functional as F
import torch.tensor as Tensor
import torch.nn.init as init


class GammaVAE(nn.Module):

    def __init__(self,
                 in_channels: int,
                 latent_dim: int,
                 hidden_dims: list = None,
                 gamma_shape: float = 8.,
                 prior_shape: float = 2.0,
                 prior_rate: float = 1.,
                 prior_weight: float = 0.1,
                 **kwargs) -> None:
        super(GammaVAE, self).__init__()
        
        #
        # Parameters setting
        # --------------------------------------------------------------------------------------------------------------
        self.input_dim = in_channels
        self.latent_dim = latent_dim
        self.B = gamma_shape
        self.prior_alpha = torch.tensor([prior_shape])
        self.prior_beta = torch.tensor([prior_rate])
        self.prior_weight = prior_weight
        modules = []
        if hidden_dims is None:
            hidden_dims = [32, 64, 128, 256, 512]
        
        #
        # Build Encoder
        # --------------------------------------------------------------------------------------------------------------
        for h_dim in hidden_dims:
            modules.append(
                nn.Sequential(
                    nn.Linear(in_channels, out_features=h_dim),
                    nn.BatchNorm1d(h_dim),
                    nn.ELU(),
                ))
            in_channels = h_dim
        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Sequential(nn.Linear(hidden_dims[-1], latent_dim),
                                   nn.Softmax())
        self.fc_var = nn.Sequential(nn.Linear(hidden_dims[-1], latent_dim),
                                    nn.Softmax())

        #
        # Build Decoder
        # --------------------------------------------------------------------------------------------------------------
        modules = []
        self.decoder_input = nn.Sequential(nn.Linear(latent_dim, hidden_dims[-1]))
        hidden_dims = hidden_dims[::-1]
        for i in range(len(hidden_dims) - 1):
            modules.append(
                nn.Sequential(
                    nn.Linear(in_features=hidden_dims[i], out_features=hidden_dims[i + 1]),
                    nn.BatchNorm1d(hidden_dims[i + 1]),
                    nn.ELU(),
                ))
        self.decoder = nn.Sequential(*modules)
        self.final_layer = nn.Sequential(
            nn.Linear(in_features=hidden_dims[-1], out_features=self.input_dim),
            nn.BatchNorm1d(self.input_dim),
            nn.ELU(),
        )
        self.weight_init()

    def weight_init(self):
        for block in self._modules:
            for m in self._modules[block]:
                init_(m)

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)

        # Split the result into mu and var components of the latent Gaussian distribution
        alpha = self.fc_mu(result)
        beta = self.fc_var(result)

        return [alpha, beta]

    def decode(self, z: Tensor) -> Tensor:
        result = self.decoder_input(z)
        result = self.decoder(result)
        result = self.final_layer(result)
        return result

    def reparameterize(self, alpha: Tensor, beta: Tensor) -> Tensor:
        """
        Reparameterize the Gamma distribution by the shape augmentation trick.
        Reference:
        [1] https://arxiv.org/pdf/1610.05683.pdf

        :param alpha: (Tensor) Shape parameter of the latent Gamma
        :param beta: (Tensor) Rate parameter of the latent Gamma
        :return:
        """
        # Sample from Gamma to guarantee acceptance
        alpha_ = alpha.clone().detach()
        z_hat = Gamma(alpha_ + self.B, torch.ones_like(alpha_)).sample()

        # Compute the eps ~ N(0,1) that produces z_hat
        eps = self.inv_h_func(alpha + self.B , z_hat)
        z = self.h_func(alpha + self.B, eps)

        # When beta != 1, scale by beta
        return z / beta

    @staticmethod
    def h_func(alpha: Tensor, eps: Tensor) -> Tensor:
        """
        Reparameterize a sample eps ~ N(0, 1) so that h(z) ~ Gamma(alpha, 1)
        :param alpha: (Tensor) Shape parameter
        :param eps: (Tensor) Random sample to reparameterize
        :return: (Tensor)
        """

        z = (alpha - 1./3.) * (1 + eps / torch.sqrt(9. * alpha - 3.))**3
        return z

    @staticmethod
    def inv_h_func(alpha: Tensor, z: Tensor) -> Tensor:
        """
        Inverse reparameterize the given z into eps.
        :param alpha: (Tensor)
        :param z: (Tensor)
        :return: (Tensor)
        """
        eps = torch.sqrt(9. * alpha - 3.) * ((z / (alpha - 1./3.))**(1. / 3.) - 1.)
        return eps

    @staticmethod
    def I_function(a, b, c, d):
        return - c * d / a - b * torch.log(a) - torch.lgamma(b) + (b - 1) * (torch.digamma(d) + torch.log(c))

    def forward(self, input: Tensor, **kwargs) -> Tensor:
        alpha, beta = self.encode(input)
        z = self.reparameterize(alpha, beta)
        return [self.decode(z), input, alpha, beta]

    def vae_gamma_kl_loss(self, a, b, c, d):
        """
        https://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions
        b and d are Gamma shape parameters and
        a and c are scale parameters.
        (All, therefore, must be positive.)
        """

        a = 1 / a
        c = 1 / c
        losses = self.I_function(c, d, c, d) - self.I_function(a, b, c, d)
        return torch.sum(losses, dim=1)

    def loss_function(self, *args, **kwargs) -> dict:
        recons = args[0]
        input = args[1]
        alpha = args[2]
        beta = args[3]

        curr_device = input.device
        recons_loss = torch.mean(F.mse_loss(recons, input, reduction='none'), dim=(1,))

        self.prior_alpha = self.prior_alpha.to(curr_device)
        self.prior_beta = self.prior_beta.to(curr_device)

        kld_loss = self.vae_gamma_kl_loss(alpha, beta, self.prior_alpha, self.prior_beta)

        loss = (1 - self.prior_weight) * recons_loss + self.prior_weight * kld_loss
        loss = torch.mean(loss, dim = 0)
        # print(loss, recons_loss, kld_loss)
        return {'loss': loss}

    def sample(self, num_samples:int, current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the modelSay
        :return: (Tensor)
        """
        z = Gamma(self.prior_alpha, self.prior_beta).sample((num_samples, self.latent_dim))
        z = z.squeeze().to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]


def init_(m):
    if isinstance(m, (nn.Linear, nn.Conv2d)):
        init.orthogonal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)
    elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
        m.weight.data.fill_(1)
        if m.bias is not None:
            m.bias.data.fill_(0)

## Data Loading

In [6]:
torch.cuda.set_device(0)

In [53]:
urls = glob.glob('Edge_Attribute/*.csv')
dfs = [pd.read_csv(url, encoding = "ISO-8859-1", engine='python') for url in urls]
df_train = pd.concat(dfs[:16])
df_test  = pd.concat(dfs[16:])

In [54]:
edge_attr_train = df_train.iloc[:,2:]
edge_attr_test  = df_test.iloc[:,2:]
edge_attr_train.head()

Unnamed: 0,Large_TransOut_Count,TransOut_Count,Total_Large_TransIn,Total_WireTrans,Total_WireTrans_Times,Average_WireTrans,WireTransIn_8000,WireTrans_Out_9mon
0,0.0,0.0,1261999.04,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,0.0,515242.0,1.0,515242.0,0.0,675792.0
2,0.0,1.0,0.0,150.0,1.0,150.0,0.0,0.0
3,1.0,1.0,0.0,2347140.0,1.0,2347140.0,0.0,10128640.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1258224.0


In [55]:
edge_attr_train = np.log(edge_attr_train + 1.)
edge_attr_test = np.log(edge_attr_test + 1.)
edge_attr_train.head()

Unnamed: 0,Large_TransOut_Count,TransOut_Count,Total_Large_TransIn,Total_WireTrans,Total_WireTrans_Times,Average_WireTrans,WireTransIn_8000,WireTrans_Out_9mon
0,0.0,0.0,14.048208,0.0,0.0,0.0,0.0,0.0
1,0.693147,0.693147,0.0,13.152394,0.693147,13.152394,0.0,13.423642
2,0.0,0.693147,0.0,5.01728,0.693147,5.01728,0.0,0.0
3,0.693147,0.693147,0.0,14.668709,0.693147,14.668709,0.0,16.130878
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.045213


In [56]:
from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()
edge_attr_train_scaled = scaler.fit_transform(edge_attr_train)
edge_attr_test_scaled  = scaler.transform(edge_attr_test)
# AML_dataset = pd.DataFrame(features, columns=AML_dataset.columns, index=AML_dataset.index)
# AML_dataset.head()

In [57]:
edge_attr_train_scaled = pd.DataFrame(edge_attr_train_scaled)
edge_attr_train_scaled = torch.tensor(edge_attr_train_scaled.values, dtype=torch.float32).cuda()

edge_attr_test_scaled = pd.DataFrame(edge_attr_test_scaled)
edge_attr_test_scaled = torch.tensor(edge_attr_test_scaled.values, dtype=torch.float32).cuda()

## Model & Optimizer Initialization

In [79]:
vae = GammaVAE(in_channels=model_config.input_dims,
               latent_dim=model_config.latent_dims,
               hidden_dims=model_config.hidden_dims,
               gamma_shape=model_config.gamma_shape,
               prior_shape=model_config.prior_shape,
               prior_rate=model_config.prior_rate,
               prior_weight=model_config.prior_weight).cuda()

In [80]:
optimizer = torch.optim.Adam(vae.parameters(), lr=train_config.learning_rate)

## Training

In [81]:
indice = torch.tensor(random.sample(range(edge_attr_train_scaled.shape[0]), train_config.batch_size))
batch_xs = edge_attr_train_scaled[indice]
x_output, x_input, x_alpha, x_beta = vae(batch_xs)
loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
train_loss_ini = loss_dict['loss']

indice = torch.tensor(random.sample(range(edge_attr_test_scaled.shape[0]), train_config.batch_size))
batch_xs = edge_attr_test_scaled[indice]
x_output, x_input, x_alpha, x_beta = vae(batch_xs)
loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
test_loss_ini = loss_dict['loss']

print("Epoch:", '%04d' % (0), 
      "Training loss=", "{:.9f}".format(train_loss_ini), 
      "Testing loss=", "{:.9f}".format(test_loss_ini))

Epoch: 0000 Training loss= 0.732802212 Testing loss= 0.711290300


  input = module(input)


In [82]:
for epoch in range(train_config.training_epochs):
    avg_cost = 0.
    total_batch = int(edge_attr_train_scaled.shape[0] / train_config.batch_size)
    
    train_loss = []
    test_loss = []
    avg_train_cost = 0.
    avg_test_cost = 0.
    for i in range(total_batch):
        
        ## train
        indice = torch.tensor(random.sample(range(edge_attr_train_scaled.shape[0]), train_config.batch_size))
        batch_xs = edge_attr_train_scaled[indice]

        x_output, x_input, x_alpha, x_beta = vae(batch_xs)
        loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
        train_loss.append(loss_dict['loss'] / edge_attr_train_scaled.shape[0] * train_config.batch_size)
        avg_train_cost += train_loss[-1]
        
        optimizer.zero_grad()
        train_loss[-1].backward()
        optimizer.step()
        
        ## valid
        indice = torch.tensor(random.sample(range(edge_attr_test_scaled.shape[0]), train_config.batch_size))
        batch_xs = edge_attr_test_scaled[indice]

        x_output, x_input, x_alpha, x_beta = vae(batch_xs)
        loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
        test_loss.append(loss_dict['loss'] / edge_attr_train_scaled.shape[0] * train_config.batch_size)
        avg_test_cost += test_loss[-1]
     
    # Display logs per epoch step    
    print("Epoch:", '%04d' % (epoch+1), 
          "Training loss=", "{:.9f}".format(avg_train_cost), "Testing loss=", "{:.9f}".format(test_loss[-1]))
    

Epoch: 0001 Training loss= 0.630568266 Testing loss= 0.000511591
Epoch: 0002 Training loss= 0.514774740 Testing loss= 0.000431419
Epoch: 0003 Training loss= 0.441184103 Testing loss= 0.000372224
Epoch: 0004 Training loss= 0.387676656 Testing loss= 0.000346573
Epoch: 0005 Training loss= 0.346536309 Testing loss= 0.000291239
Epoch: 0006 Training loss= 0.304940552 Testing loss= 0.000262089
Epoch: 0007 Training loss= 0.265059531 Testing loss= 0.000213156
Epoch: 0008 Training loss= 0.217041194 Testing loss= 0.000183434
Epoch: 0009 Training loss= 0.194039866 Testing loss= 0.000163344
Epoch: 0010 Training loss= 0.176967680 Testing loss= 0.000150220
Epoch: 0011 Training loss= 0.167441070 Testing loss= 0.000140745
Epoch: 0012 Training loss= 0.160921603 Testing loss= 0.000143355
Epoch: 0013 Training loss= 0.155339211 Testing loss= 0.000137041
Epoch: 0014 Training loss= 0.149634436 Testing loss= 0.000128531
Epoch: 0015 Training loss= 0.143482685 Testing loss= 0.000122143
Epoch: 0016 Training loss

KeyboardInterrupt: 

## Generating `EdgeEmbeddings`

In [91]:
train_x_output, train_x_input, train_x_alpha, train_x_beta = vae(edge_attr_train_scaled)
train_mean = train_x_alpha / train_x_beta
train_mean[:10,:]

tensor([[0.8616, 1.3121, 4.1235],
        [1.3659, 1.4379, 0.9330],
        [1.8430, 0.8570, 0.9022],
        [1.2423, 1.5387, 0.9604],
        [0.9004, 0.9434, 1.3315],
        [0.9027, 0.9403, 1.3310],
        [2.5470, 1.1560, 0.7780],
        [2.7016, 1.1012, 0.6829],
        [0.8825, 0.9573, 1.3329],
        [1.9542, 0.8930, 0.8496]], device='cuda:0', grad_fn=<SliceBackward>)

In [92]:
test_x_output, test_x_input, test_x_alpha, test_x_beta = vae(edge_attr_test_scaled)
test_mean = test_x_alpha / test_x_beta
test_mean[:10,:]

tensor([[0.9653, 1.0041, 3.7995],
        [0.9655, 1.0037, 3.7990],
        [0.9424, 0.8919, 1.2775],
        [0.8230, 1.0190, 1.3399],
        [1.3687, 1.4174, 0.9472],
        [2.7459, 1.1319, 0.7801],
        [2.5276, 1.1070, 0.7316],
        [2.8289, 1.2349, 0.6895],
        [1.4041, 1.4209, 0.9409],
        [0.9572, 1.0245, 3.8274]], device='cuda:0', grad_fn=<SliceBackward>)

In [112]:
train_mean.cpu().data.numpy()
test_mean.cpu().data.numpy()

array([[0.9653172 , 1.0041116 , 3.7995434 ],
       [0.965473  , 1.0037236 , 3.7989998 ],
       [0.9424194 , 0.8918789 , 1.2775328 ],
       ...,
       [0.9577436 , 1.0230571 , 3.825455  ],
       [0.959069  , 1.0197277 , 3.8209865 ],
       [0.9877101 , 0.94934356, 3.7180855 ]], dtype=float32)

In [94]:
train_mean.shape

torch.Size([1080709, 3])

In [1]:
# df_train
# df_test 

In [149]:
train_mean.cpu().data.numpy().shape
# test_mean.cpu().data.numpy()

(1080709, 3)

In [154]:
VAE_res = np.concatenate((train_mean.cpu().data.numpy(), test_mean.cpu().data.numpy()), 0)

In [156]:
VAE_res.shape

(1520631, 3)

In [166]:
count = 0
index = 0
for url in urls:
    date = url.split('_')[-1].split('.')[0]
    print (date, dfs[index].shape[0])
    tmp_df =  pd.concat([pd.DataFrame(dfs[index].iloc[:,:2].values), 
                         pd.DataFrame(VAE_res[count:count+dfs[index].shape[0],:])], axis=1)
    tmp_df.to_csv('Edge_Embedd_{}.csv'.format(date), header=False)
    count += dfs[index].shape[0]
    index += 1

2017-8 54014
2017-9 58190
2017-10 62001
2017-11 63763
2017-12 65844
2018-1 68002
2018-2 67346
2018-3 68397
2018-4 70560
2018-5 72217
2018-6 72674
2018-7 72923
2018-8 73160
2018-9 71512
2018-10 70925
2018-11 69181
2018-12 68906
2019-1 68598
2019-2 64474
2019-3 61642
2019-4 60343
2019-5 58842
2019-6 57117
