Creator: Dhanajit Brahma

Adapted from the original implementation in tensorflow from here: https://github.com/jsyoon0823/GAIN

Generative Adversarial Imputation Networks (GAIN) Implementation on Letter and Spam Dataset
Reference: J. Yoon, J. Jordon, M. van der Schaar, "GAIN: Missing Data Imputation using Generative Adversarial Nets," ICML, 2018.

In [1]:
#%% Packages
import torch
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F

In [2]:
dataset_file = 'Letter.csv'  # 'Letter.csv' for Letter dataset an 'Spam.csv' for Spam dataset
use_gpu = True  # set it to True to use GPU and False to use CPU

In [3]:
torch.cuda.set_device(0)

In [4]:
#%% System Parameters
# 1. Mini batch size
mb_size = 128
# 2. Missing rate
p_miss = 0.2
# 3. Hint rate
p_hint = 0.9
# 4. Loss Hyperparameters
alpha = 10
# 5. Train Rate
train_rate = 0.8

#%% Data

# Data generation
Data = np.loadtxt(dataset_file, delimiter=",",skiprows=1)

# Parameters
No = len(Data)
Dim = len(Data[0,:])

# Hidden state dimensions
H_Dim1 = Dim
H_Dim2 = Dim

# Normalization (0 to 1)
Min_Val = np.zeros(Dim)
Max_Val = np.zeros(Dim)

for i in range(Dim):
    Min_Val[i] = np.min(Data[:,i])
    Data[:,i] = Data[:,i] - np.min(Data[:,i])
    Max_Val[i] = np.max(Data[:,i])
    Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)    

#%% Missing introducing
p_miss_vec = p_miss * np.ones((Dim,1)) 
   
Missing = np.zeros((No,Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size = [len(Data),])
    B = A > p_miss_vec[i]
    Missing[:,i] = 1.*B

    
#%% Train Test Division    
   
idx = np.random.permutation(No)

Train_No = int(No * train_rate)
Test_No = No - Train_No
    
# Train / Test Features
trainX = Data[idx[:Train_No],:]
testX = Data[idx[Train_No:],:]

# Train / Test Missing Indicators
trainM = Missing[idx[:Train_No],:]
testM = Missing[idx[Train_No:],:]

#%% Necessary Functions

# 1. Xavier Initialization Definition
# def xavier_init(size):
#     in_dim = size[0]
#     xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
#     return tf.random_normal(shape = size, stddev = xavier_stddev)
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return np.random.normal(size = size, scale = xavier_stddev)
    
# Hint Vector Generation
def sample_M(m, n, p):
    A = np.random.uniform(0., 1., size = [m, n])
    B = A > p
    C = 1.*B
    return C
   

In [5]:
'''
GAIN Consists of 3 Components
- Generator
- Discriminator
- Hint Mechanism
'''   
   
#%% GAIN Architecture   
   
# #%% 1. Input Placeholders
# # 1.1. Data Vector
# X = tf.placeholder(tf.float32, shape = [None, Dim])
# # 1.2. Mask Vector 
# M = tf.placeholder(tf.float32, shape = [None, Dim])
# # 1.3. Hint vector
# H = tf.placeholder(tf.float32, shape = [None, Dim])
# # 1.4. X with missing values
# New_X = tf.placeholder(tf.float32, shape = [None, Dim])

#%% 2. Discriminator
if use_gpu is True:
    D_W1 = torch.tensor(xavier_init([Dim*2, H_Dim1]),requires_grad=True, device="cuda")     # Data + Hint as inputs
    D_b1 = torch.tensor(np.zeros(shape = [H_Dim1]),requires_grad=True, device="cuda")

    D_W2 = torch.tensor(xavier_init([H_Dim1, H_Dim2]),requires_grad=True, device="cuda")
    D_b2 = torch.tensor(np.zeros(shape = [H_Dim2]),requires_grad=True, device="cuda")

    D_W3 = torch.tensor(xavier_init([H_Dim2, Dim]),requires_grad=True, device="cuda")
    D_b3 = torch.tensor(np.zeros(shape = [Dim]),requires_grad=True, device="cuda")       # Output is multi-variate
else:
    D_W1 = torch.tensor(xavier_init([Dim*2, H_Dim1]),requires_grad=True)     # Data + Hint as inputs
    D_b1 = torch.tensor(np.zeros(shape = [H_Dim1]),requires_grad=True)

    D_W2 = torch.tensor(xavier_init([H_Dim1, H_Dim2]),requires_grad=True)
    D_b2 = torch.tensor(np.zeros(shape = [H_Dim2]),requires_grad=True)

    D_W3 = torch.tensor(xavier_init([H_Dim2, Dim]),requires_grad=True)
    D_b3 = torch.tensor(np.zeros(shape = [Dim]),requires_grad=True)       # Output is multi-variate

theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

#%% 3. Generator
if use_gpu is True:
    G_W1 = torch.tensor(xavier_init([Dim*2, H_Dim1]),requires_grad=True, device="cuda")     # Data + Mask as inputs (Random Noises are in Missing Components)
    G_b1 = torch.tensor(np.zeros(shape = [H_Dim1]),requires_grad=True, device="cuda")

    G_W2 = torch.tensor(xavier_init([H_Dim1, H_Dim2]),requires_grad=True, device="cuda")
    G_b2 = torch.tensor(np.zeros(shape = [H_Dim2]),requires_grad=True, device="cuda")

    G_W3 = torch.tensor(xavier_init([H_Dim2, Dim]),requires_grad=True, device="cuda")
    G_b3 = torch.tensor(np.zeros(shape = [Dim]),requires_grad=True, device="cuda")
else:
    G_W1 = torch.tensor(xavier_init([Dim*2, H_Dim1]),requires_grad=True)     # Data + Mask as inputs (Random Noises are in Missing Components)
    G_b1 = torch.tensor(np.zeros(shape = [H_Dim1]),requires_grad=True)

    G_W2 = torch.tensor(xavier_init([H_Dim1, H_Dim2]),requires_grad=True)
    G_b2 = torch.tensor(np.zeros(shape = [H_Dim2]),requires_grad=True)

    G_W3 = torch.tensor(xavier_init([H_Dim2, Dim]),requires_grad=True)
    G_b3 = torch.tensor(np.zeros(shape = [Dim]),requires_grad=True)

theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

In [6]:
#%% GAIN Function

#%% 1. Generator
def generator(new_x,m):
    inputs = torch.cat(dim = 1, tensors = [new_x,m])  # Mask + Data Concatenate
    G_h1 = F.relu(torch.matmul(inputs, G_W1) + G_b1)
    G_h2 = F.relu(torch.matmul(G_h1, G_W2) + G_b2)   
    G_prob = torch.sigmoid(torch.matmul(G_h2, G_W3) + G_b3) # [0,1] normalized Output
    
    return G_prob

#%% 2. Discriminator
def discriminator(new_x, h):
    inputs = torch.cat(dim = 1, tensors = [new_x,h])  # Hint + Data Concatenate
    D_h1 = F.relu(torch.matmul(inputs, D_W1) + D_b1)  
    D_h2 = F.relu(torch.matmul(D_h1, D_W2) + D_b2)
    D_logit = torch.matmul(D_h2, D_W3) + D_b3
    D_prob = torch.sigmoid(D_logit)  # [0,1] Probability Output
    
    return D_prob

#%% 3. Other functions
# Random sample generator for Z
def sample_Z(m, n):
    return np.random.uniform(0., 0.01, size = [m, n])        

# Mini-batch generation
def sample_idx(m, n):
    A = np.random.permutation(m)
    idx = A[:n]
    return idx

In [7]:
def discriminator_loss(M, New_X, H):
    # Generator
    G_sample = generator(New_X,M)
    # Combine with original data
    Hat_New_X = New_X * M + G_sample * (1-M)

    # Discriminator
    D_prob = discriminator(Hat_New_X, H)

    #%% Loss
    D_loss = -torch.mean(M * torch.log(D_prob + 1e-8) + (1-M) * torch.log(1. - D_prob + 1e-8))
    return D_loss

def generator_loss(X, M, New_X, H):
    #%% Structure
    # Generator
    G_sample = generator(New_X,M)

    # Combine with original data
    Hat_New_X = New_X * M + G_sample * (1-M)

    # Discriminator
    D_prob = discriminator(Hat_New_X, H)

    #%% Loss
    G_loss1 = -torch.mean((1-M) * torch.log(D_prob + 1e-8))
    MSE_train_loss = torch.mean((M * New_X - M * G_sample)**2) / torch.mean(M)

    G_loss = G_loss1 + alpha * MSE_train_loss 

    #%% MSE Performance metric
    MSE_test_loss = torch.mean(((1-M) * X - (1-M)*G_sample)**2) / torch.mean(1-M)
    return G_loss, MSE_train_loss, MSE_test_loss
    
def test_loss(X, M, New_X):
    #%% Structure
    # Generator
    G_sample = generator(New_X,M)

    #%% MSE Performance metric
    MSE_test_loss = torch.mean(((1-M) * X - (1-M)*G_sample)**2) / torch.mean(1-M)
    return MSE_test_loss, G_sample

In [8]:
#%% Optimizers
optimizer_D = torch.optim.Adam(params=theta_D)
optimizer_G = torch.optim.Adam(params=theta_G)

#%% Iterations

#%% Start Iterations
for it in tqdm(range(5000)):    
    
    #%% Inputs
    mb_idx = sample_idx(Train_No, mb_size)
    X_mb = trainX[mb_idx,:]  
    
    Z_mb = sample_Z(mb_size, Dim) 
    M_mb = trainM[mb_idx,:]  
    H_mb1 = sample_M(mb_size, Dim, 1-p_hint)
    H_mb = M_mb * H_mb1
    
    New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce
    
    if use_gpu is True:
        X_mb = torch.tensor(X_mb, device="cuda")
        M_mb = torch.tensor(M_mb, device="cuda")
        H_mb = torch.tensor(H_mb, device="cuda")
        New_X_mb = torch.tensor(New_X_mb, device="cuda")
    else:
        X_mb = torch.tensor(X_mb)
        M_mb = torch.tensor(M_mb)
        H_mb = torch.tensor(H_mb)
        New_X_mb = torch.tensor(New_X_mb)
    
    optimizer_D.zero_grad()
    D_loss_curr = discriminator_loss(M=M_mb, New_X=New_X_mb, H=H_mb)
    D_loss_curr.backward()
    optimizer_D.step()
    
    optimizer_G.zero_grad()
    G_loss_curr, MSE_train_loss_curr, MSE_test_loss_curr = generator_loss(X=X_mb, M=M_mb, New_X=New_X_mb, H=H_mb)
    G_loss_curr.backward()
    optimizer_G.step()    
        
    #%% Intermediate Losses
    if it % 100 == 0:
        print('Iter: {}'.format(it))
        print('Train_loss: {:.4}'.format(np.sqrt(MSE_train_loss_curr.item())))
        print('Test_loss: {:.4}'.format(np.sqrt(MSE_test_loss_curr.item())))
        print()

  0%|          | 10/5000 [00:00<28:40,  2.90it/s]

Iter: 0
Train_loss: 0.3342
Test_loss: 0.3383



  2%|▏         | 114/5000 [00:01<01:23, 58.35it/s]

Iter: 100
Train_loss: 0.1941
Test_loss: 0.204



  4%|▍         | 213/5000 [00:02<00:52, 91.17it/s]

Iter: 200
Train_loss: 0.1566
Test_loss: 0.1648



  6%|▋         | 313/5000 [00:04<00:51, 91.15it/s]

Iter: 300
Train_loss: 0.1518
Test_loss: 0.1504



  8%|▊         | 413/5000 [00:05<00:49, 91.83it/s]

Iter: 400
Train_loss: 0.1436
Test_loss: 0.1466



 10%|█         | 513/5000 [00:06<00:49, 91.57it/s]

Iter: 500
Train_loss: 0.1314
Test_loss: 0.1495



 12%|█▏        | 613/5000 [00:07<00:47, 92.96it/s]

Iter: 600
Train_loss: 0.1312
Test_loss: 0.1401



 14%|█▍        | 713/5000 [00:08<00:46, 91.69it/s]

Iter: 700
Train_loss: 0.1193
Test_loss: 0.1394



 16%|█▋        | 813/5000 [00:09<00:45, 92.38it/s]

Iter: 800
Train_loss: 0.1164
Test_loss: 0.1321



 18%|█▊        | 913/5000 [00:10<00:44, 92.25it/s]

Iter: 900
Train_loss: 0.1097
Test_loss: 0.1509



 20%|██        | 1013/5000 [00:11<00:43, 91.47it/s]

Iter: 1000
Train_loss: 0.1071
Test_loss: 0.1342



 22%|██▏       | 1113/5000 [00:12<00:42, 91.11it/s]

Iter: 1100
Train_loss: 0.1003
Test_loss: 0.1343



 24%|██▍       | 1213/5000 [00:13<00:41, 92.14it/s]

Iter: 1200
Train_loss: 0.09141
Test_loss: 0.1265



 26%|██▋       | 1313/5000 [00:14<00:38, 95.18it/s]

Iter: 1300
Train_loss: 0.09714
Test_loss: 0.1375



 28%|██▊       | 1413/5000 [00:16<00:39, 91.42it/s]

Iter: 1400
Train_loss: 0.09017
Test_loss: 0.1362



 30%|███       | 1513/5000 [00:17<00:38, 90.94it/s]

Iter: 1500
Train_loss: 0.08389
Test_loss: 0.1319



 32%|███▏      | 1613/5000 [00:18<00:36, 92.16it/s]

Iter: 1600
Train_loss: 0.08213
Test_loss: 0.1454



 34%|███▍      | 1713/5000 [00:19<00:36, 90.67it/s]

Iter: 1700
Train_loss: 0.08541
Test_loss: 0.1202



 36%|███▋      | 1813/5000 [00:20<00:34, 92.12it/s]

Iter: 1800
Train_loss: 0.08326
Test_loss: 0.1381



 38%|███▊      | 1913/5000 [00:21<00:33, 92.02it/s]

Iter: 1900
Train_loss: 0.07923
Test_loss: 0.1249



 40%|████      | 2013/5000 [00:22<00:32, 92.08it/s]

Iter: 2000
Train_loss: 0.07596
Test_loss: 0.1167



 42%|████▏     | 2113/5000 [00:23<00:31, 92.16it/s]

Iter: 2100
Train_loss: 0.07772
Test_loss: 0.1249



 44%|████▍     | 2213/5000 [00:24<00:30, 91.93it/s]

Iter: 2200
Train_loss: 0.07377
Test_loss: 0.1291



 46%|████▋     | 2313/5000 [00:25<00:28, 93.43it/s]

Iter: 2300
Train_loss: 0.077
Test_loss: 0.1381



 48%|████▊     | 2413/5000 [00:26<00:27, 93.65it/s]

Iter: 2400
Train_loss: 0.07289
Test_loss: 0.1358



 50%|█████     | 2513/5000 [00:27<00:27, 91.60it/s]

Iter: 2500
Train_loss: 0.07455
Test_loss: 0.1097



 52%|█████▏    | 2613/5000 [00:29<00:26, 91.57it/s]

Iter: 2600
Train_loss: 0.07166
Test_loss: 0.1192



 54%|█████▍    | 2713/5000 [00:30<00:24, 92.48it/s]

Iter: 2700
Train_loss: 0.07104
Test_loss: 0.1341



 56%|█████▋    | 2813/5000 [00:31<00:23, 91.30it/s]

Iter: 2800
Train_loss: 0.07514
Test_loss: 0.1253



 58%|█████▊    | 2913/5000 [00:32<00:22, 91.19it/s]

Iter: 2900
Train_loss: 0.07385
Test_loss: 0.128



 60%|██████    | 3013/5000 [00:33<00:21, 92.35it/s]

Iter: 3000
Train_loss: 0.07921
Test_loss: 0.1259



 62%|██████▏   | 3113/5000 [00:34<00:20, 92.66it/s]

Iter: 3100
Train_loss: 0.07544
Test_loss: 0.1226



 64%|██████▍   | 3213/5000 [00:35<00:19, 91.79it/s]

Iter: 3200
Train_loss: 0.07133
Test_loss: 0.1232



 66%|██████▋   | 3313/5000 [00:36<00:18, 91.15it/s]

Iter: 3300
Train_loss: 0.07411
Test_loss: 0.1236



 68%|██████▊   | 3413/5000 [00:37<00:17, 91.89it/s]

Iter: 3400
Train_loss: 0.07162
Test_loss: 0.1219



 70%|███████   | 3513/5000 [00:38<00:16, 91.91it/s]

Iter: 3500
Train_loss: 0.07753
Test_loss: 0.13



 72%|███████▏  | 3613/5000 [00:39<00:14, 92.49it/s]

Iter: 3600
Train_loss: 0.07649
Test_loss: 0.1207



 74%|███████▍  | 3713/5000 [00:41<00:13, 92.07it/s]

Iter: 3700
Train_loss: 0.07395
Test_loss: 0.1342



 76%|███████▋  | 3813/5000 [00:42<00:13, 90.06it/s]

Iter: 3800
Train_loss: 0.07453
Test_loss: 0.1296



 78%|███████▊  | 3913/5000 [00:43<00:11, 91.69it/s]

Iter: 3900
Train_loss: 0.07407
Test_loss: 0.1282



 80%|████████  | 4013/5000 [00:44<00:10, 91.52it/s]

Iter: 4000
Train_loss: 0.07186
Test_loss: 0.1292



 82%|████████▏ | 4113/5000 [00:45<00:09, 91.55it/s]

Iter: 4100
Train_loss: 0.06995
Test_loss: 0.1297



 84%|████████▍ | 4213/5000 [00:46<00:08, 88.90it/s]

Iter: 4200
Train_loss: 0.06693
Test_loss: 0.1224



 86%|████████▌ | 4310/5000 [00:47<00:08, 79.72it/s]

Iter: 4300
Train_loss: 0.07387
Test_loss: 0.1214



 88%|████████▊ | 4417/5000 [00:49<00:07, 79.85it/s]

Iter: 4400
Train_loss: 0.07252
Test_loss: 0.1273



 90%|█████████ | 4516/5000 [00:50<00:05, 88.91it/s]

Iter: 4500
Train_loss: 0.07322
Test_loss: 0.1164



 92%|█████████▏| 4616/5000 [00:51<00:04, 91.33it/s]

Iter: 4600
Train_loss: 0.06921
Test_loss: 0.1161



 94%|█████████▍| 4715/5000 [00:52<00:03, 86.68it/s]

Iter: 4700
Train_loss: 0.07135
Test_loss: 0.1336



 96%|█████████▋| 4813/5000 [00:53<00:02, 78.71it/s]

Iter: 4800
Train_loss: 0.06967
Test_loss: 0.1255



 98%|█████████▊| 4911/5000 [00:54<00:01, 80.14it/s]

Iter: 4900
Train_loss: 0.06921
Test_loss: 0.1228



100%|██████████| 5000/5000 [00:55<00:00, 89.45it/s]


In [9]:
#%% Final Loss
    
Z_mb = sample_Z(Test_No, Dim) 
M_mb = testM
X_mb = testX
        
New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce

if use_gpu is True:
    X_mb = torch.tensor(X_mb, device='cuda')
    M_mb = torch.tensor(M_mb, device='cuda')
    New_X_mb = torch.tensor(New_X_mb, device='cuda')
else:
    X_mb = torch.tensor(X_mb)
    M_mb = torch.tensor(M_mb)
    New_X_mb = torch.tensor(New_X_mb)
    
MSE_final, Sample = test_loss(X=X_mb, M=M_mb, New_X=New_X_mb)
        
print('Final Test RMSE: ' + str(np.sqrt(MSE_final.item())))

Final Test RMSE: 0.12446774813546198


In [10]:
imputed_data = M_mb * X_mb + (1-M_mb) * Sample
print("Imputed test data:")
# np.set_printoptions(formaM_mbtter={'float': lambda x: "{0:0.8f}".format(x)})

if use_gpu is True:
    print(imputed_data.cpu().detach().numpy())
else:
    print(imputed_data.detach().numpy())

Imputed test data:
[[0.13333332 0.26666665 0.26666665 ... 0.39999997 0.16881414 0.5333333 ]
 [0.26666665 0.39999997 0.26306941 ... 0.59999996 0.16587397 0.46666664]
 [0.19999999 0.13333332 0.33333331 ... 0.39999997 0.13333332 0.58901419]
 ...
 [0.13333332 0.36883384 0.26666665 ... 0.5333333  0.27967424 0.5333333 ]
 [0.19999999 0.39999997 0.39999997 ... 0.46666664 0.13333332 0.5333333 ]
 [0.33333331 0.56728029 0.41322088 ... 0.5333333  0.33333331 0.59999996]]
