# Process to make truncated dataset

In [1]:
import numpy as np
import numpy.random as npr
import random
import pickle

import torch

import matplotlib
import matplotlib.pyplot as plt

import os
import sys
os.chdir("/".join(os.getcwd().split('/')[:-1]))
%matplotlib inline

In [2]:
import easydict
import yaml

args = easydict.EasyDict({
})

with open('configs/rmsn_propensity.yaml') as f:
    config = yaml.safe_load(f)
for k, v in config.items():
    args.__setattr__(k, v)

In [3]:
args

{'mode': {'test': False},
 'model': {'name': 'rmsn',
  'phase': 3,
  'cs_hidden_dim': 64,
  'st_hidden_dim': 64,
  'intervention_dim': 2,
  'observation_dim': 2,
  'latent_dim': 16,
  'encoder_dim': 64,
  'layer': 1,
  'optimizer': 'ADAM',
  'decoder_hidden_dim': 64,
  'encoder_lr': 0.0001,
  'encoder_epoch': 1000,
  'decoder_lr': 0.0001,
  'decoder_epoch': 1000},
 'dataset': {'type': 'synthetic',
  'path': './datasets/csv/synthetic_changed_itv_v_seed.csv',
  'batch': 64,
  'projection_horizon': 5,
  'trunc_path': './rmsn_decoder_dataset/1224_test_3/',
  'truncated_batch': 64,
  'truncated_one_series': 47},
 'gpus': '3',
 'debug': False,
 'exid': '1224_test_3'}

In [4]:
import pandas as pd
path = args.dataset['path']
dataset = pd.read_csv(path)

In [5]:
dataset.head()

Unnamed: 0,ID,Time,pos_x,pos_y,vel_x,vel_y,itv_x,itv_y,seed_x,seed_y
0,0.0,0.0,-1.066326,-1.687203,-1.59949,4.218008,0.0,0.0,0.0,0.0
1,0.0,0.01,-1.082321,-1.645023,-1.623482,4.112558,0.0,0.0,0.0,0.0
2,0.0,0.02,-1.098556,-1.603898,-1.647834,4.009745,0.0,0.0,0.0,0.0
3,0.0,0.03,-1.115035,-1.5638,-1.672552,3.909501,0.0,0.0,0.0,0.0
4,0.0,0.04,-1.13176,-1.524705,-1.69764,3.811763,0.0,0.0,0.0,0.0


In [6]:
# from utils.dataset_uitls.get_dataloader import synthetic_regular_time_series
from datasets.synthetic import *
valid_data = Synthetic(path, idx=np.arange(1000))

In [7]:
from torch.utils.data import DataLoader, Dataset

data_loader = DataLoader(dataset=valid_data, batch_size=1000,
                                  shuffle=False,collate_fn=synthetic_regular_time_series)

In [8]:
whole_data = next(iter(data_loader))

In [9]:
obs = whole_data['obs']

truncate observations

In [12]:
from copy import copy, deepcopy
obs_list = []
mask_list = []
projection_horizon = 5
idx_list = []

for i in range(obs.size(0)):
    for j in range(3, 50):
        hidden_idx = torch.zeros(projection_horizon, 2) # hidden index -> {id, t}
        hidden_idx[:,0]=i
        hidden_idx[:,1]=j

        idx_list.append(hidden_idx)
        max_projection = min(projection_horizon, 50-j)
        trun_obs = obs[i,j:j+max_projection]
        mask = torch.zeros(projection_horizon, 2)
        mask[:max_projection,:] = 1
        
        if max_projection!=5:
            tmp = torch.zeros(mask.size())
            tmp[:max_projection,:] = trun_obs
            trun_obs = tmp
            
        obs_list.append(trun_obs)
        mask_list.append(mask)
        
print(torch.stack(obs_list).size())
print(torch.stack(mask_list).size())

torch.Size([47000, 5, 2])
torch.Size([47000, 5, 2])


In [13]:
hidden_idx = torch.stack(idx_list)

truncate intervention

In [14]:
itv = whole_data['itv']

In [15]:
from copy import copy
itv_list = []
projection_horizon = 5

for i in range(itv.size(0)):
    for j in range(2, 49):
        max_projection = min(projection_horizon, 49-j)
        trun_itv = itv[i, j:j+max_projection]
        mask = torch.zeros(projection_horizon, 3)  # considering intervention mask
        mask[:max_projection,:] = 1

        if max_projection!=5:
            tmp = torch.zeros(mask.size())
            tmp[:max_projection,:] = trun_itv
            trun_itv = tmp
        itv_list.append(trun_itv)

In [16]:
print(torch.stack(itv_list).size())

torch.Size([47000, 5, 3])


In [17]:
b = torch.stack(mask_list)
b = b.reshape(-1)
for k in b:
    if k <0:
        print('h')

In [18]:
truncated_dataset = torch.cat([hidden_idx, torch.stack(obs_list),torch.stack(mask_list), torch.stack(itv_list)], dim=2)

In [19]:
truncated_dataset.size()

torch.Size([47000, 5, 9])

In [20]:
truncated_dataset = truncated_dataset.numpy()

In [21]:
base_path = './rmsn_decoder_dataset/'+args.exid+'/'
base_path

'./rmsn_decoder_dataset/1224_test_3/'

In [22]:
np.save(base_path+'obs_mask_itv', truncated_dataset)

Generate Stabilized Weight File

In [11]:
from utils import Parser, get_dataloader, get_model, get_runner
args.lr = 0.0001
model, optim = get_model(args)

Succeed in Loading Stabilized Weights Nominator
Succeed in Loading Stabilized Weights Denominator
Succeed in Loading Censoring Nominator
Succeed in Loading Censoring Denominator
model: rmsn, number of params: 178186


In [91]:
itv = whole_data['itv']
epsilon=0.000001

def gaussian_dist(obs, mean, var):
    """
    :param obs:
    :param mean:
    :param var:
    :return: p1 * p2
    """
    return torch.exp((-0.5)*torch.pow((obs-mean)/var,2))/(var+epsilon)

propensity = model
obs = obs.float().permute(1,0,2)
itv = itv.float()[:,:,:-1].permute(1,0,2)

H = torch.cat([obs, itv], dim=2)
censor_numerator = propensity.censor_numer.infer(obs) # update hidden
censor_denominator = propensity.censor_denom.infer(H)
censor_coeff = (censor_numerator/(censor_denominator+epsilon))

# predict SW~
_,_,_, sw_numerator = propensity.st_numer(obs)
A_mean = torch.stack(sw_numerator[0])
A_std = torch.stack(sw_numerator[1])
sw_numerator = gaussian_dist(obs[1:,:,:], A_mean, A_std)

H=torch.cat([obs[1:,:,:], itv[:itv.size(0)-1,:,:]],dim=2)
_,_,_, sw_demoninator = propensity.st_denom(x=H, gt=obs)
A_mean = torch.stack(sw_demoninator[0])
A_std = torch.stack(sw_demoninator[1])
sw_demoninator = gaussian_dist(obs[1:,:,:], A_mean, A_std)

SW=1
for i in range(sw_numerator.size(2)):
    SW*=sw_numerator[:,:,i]/(sw_demoninator[:,:,i]+epsilon)
SW = SW.unsqueeze(2)

print(censor_coeff.size(), SW.size())

torch.Size([49, 1000, 1]) torch.Size([49, 1000, 1])


In [92]:
CW = censor_coeff.permute(1,0,2)
SW = SW.permute(1,0,2)

make a CW matrix

In [113]:
from copy import copy
CW_list = []
mask_list = []
projection_horizon = 6

for i in range(CW.size(0)):
    for j in range(0, 48):
        max_projection = min(projection_horizon, 48-j)
        trun_CW = CW[i,j:j+max_projection]
        mask = torch.zeros(projection_horizon, 1)
        mask[:max_projection,:] = 1
        
        if max_projection!=6:
            if max_projection ==1:
                continue
            tmp = copy(mask)
            tmp[:max_projection,:] = trun_CW
            trun_CW = tmp
            
        CW_list.append(trun_CW)
        mask_list.append(mask)
print(torch.stack(CW_list).size())
# print(torch.stack(mask_list).size())

torch.Size([47000, 6, 1])


In [115]:
CW_mat = torch.stack(CW_list)

In [120]:
proc_CW = torch.ones(CW_mat.size(0),CW_mat.size(1)-1, CW_mat.size(2))
for i in range(1,6):
    CW_mat[:,i,:] =CW_mat[:,i-1,:]*CW_mat[:,i,:]
proc_CW = CW_mat[:,1:,:]

In [121]:
proc_CW.size()

torch.Size([47000, 5, 1])

make a SW matrix

In [122]:
from copy import copy
SW_list = []
mask_list = []
projection_horizon = 6

for i in range(SW.size(0)):
    for j in range(0, 48):
        max_projection = min(projection_horizon, 48-j)
        trun_SW = SW[i,j:j+max_projection]
        mask = torch.zeros(projection_horizon, 1)
        mask[:max_projection,:] = 1
        
        if max_projection!=6:
            if max_projection ==1:
                continue
            tmp = copy(mask)
            tmp[:max_projection,:] = trun_SW
            trun_SW = tmp
            
        SW_list.append(trun_SW)
        mask_list.append(mask)
        
print(torch.stack(SW_list).size())
# print(torch.stack(mask_list).size())

torch.Size([47000, 6, 1])


In [123]:
SW_mat = torch.stack(SW_list)
proc_SW = torch.ones(SW_mat.size(0),SW_mat.size(1)-1, SW_mat.size(2))
for i in range(1,6):
    SW_mat[:,i,:] =SW_mat[:,i-1,:]*SW_mat[:,i,:]
proc_CW = CW_mat[:,1:,:]

In [125]:
proc_CW.size()

torch.Size([47000, 5, 1])

In [11]:
truncated_dataset = torch.cat([proc_SW, proc_CW], dim=2)
truncated_dataset = truncated_dataset.detach().numpy()
base_path = './rmsn_decoder_dataset/'+args.exid+'/'
np.save(base_path+'SW_CW', truncated_dataset)

NameError: name 'proc_SW' is not defined

hidden vector

In [12]:
from models.rmsn.encoder_net import EncoderNet
input_dim = args.model['intervention_dim'] + \
            args.model['observation_dim']
hidden_dim = args.model['encoder_dim']
obs_dim = args.model['observation_dim']
encoder = EncoderNet(input_dim, hidden_dim, obs_dim, propensity_net=model)

[Encoder Net] Encoder is initialized


In [13]:
state_dict = torch.load('./save/{}/Encoder'.format(args.exid))['model_state_dict']
encoder.load_state_dict(state_dict)
print('Succeed in Loading Encoder')

Succeed in Loading Encoder


In [15]:
_, _,hidden_v = encoder(torch.cat([obs.float(), itv[:,:,:-1].float()], dim=2).permute(1,0,2), gt=obs.float().permute(1,0,2), valid=True, jupyter=True)

In [16]:
hidden_v.size()

torch.Size([49, 1000, 64])

In [24]:
import os
base_path+='hidden'
os.mkdir(base_path)

FileExistsError: [Errno 17] File exists: './rmsn_decoder_dataset/1224_test_3/hidden'

In [25]:
base_path

'./rmsn_decoder_dataset/1224_test_3/hidden'

In [26]:
for i in range(hidden_v.size(0)):
    for j in range(hidden_v.size(1)):
        v = hidden_v[i,j]
        np.save(base_path+'/{}_{}'.format(j,i), v.detach().numpy())