In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.utils import save_image

from PIL import Image
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from sklearn.metrics import *
import time
import os
from torch.utils import data
import random

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda') 
else:
    device = torch.device('cpu')

In [3]:
# root directory
andrea_dir = "/home/andreasabo/Documents/HNProject/"

# data directory on current machine: abhishekmoturu, andreasabo, denizjafari, navidkorhani
data_dir = "/home/navidkorhani/Documents/HNProject/"

# read target df
csv_path = os.path.join(andrea_dir, "all_splits_1000000.csv")
data_df = pd.read_csv(csv_path, usecols=['subj_id', 'image_ids', 'view_label', 'view_train'])

In [4]:
batch_size = 128

In [5]:
label_mapping = {'Other':0, 'Saggital_Right':1, 'Transverse_Right':2, 
                 'Saggital_Left':3, 'Transverse_Left':4, 'Bladder':5}

data_df['view_label'] = data_df['view_label'].map(label_mapping)

train_df = data_df[data_df.view_train == 1]
test_df = data_df[data_df.view_train == 0]

labels = {}
train_and_valid_subj_ids = []
train_and_valid_image_ids = []
test_ids = []

for ind, row in train_df.iterrows():
    train_and_valid_subj_ids.append(row['subj_id'])
    train_and_valid_image_ids.append(row['image_ids'])
    labels[row['image_ids']] = row['view_label']

for ind, row in test_df.iterrows():
    test_ids.append(row['image_ids'])
    labels[row['image_ids']] = row['view_label']

s = set()
t_v_ids = pd.DataFrame(list(zip(train_and_valid_subj_ids, train_and_valid_image_ids)), columns=['subj_ids', 'image_ids'])
id_groups = [t_v_ids for _, t_v_ids in t_v_ids.groupby('subj_ids')]
random.shuffle(id_groups)
id_groups = pd.concat(id_groups).reset_index(drop=True)
train_val_split = int(0.8*len(set(id_groups['subj_ids'].values)))
train_val_set = [i for i in id_groups['subj_ids'].values if not (i in s or s.add(i))]
cutoff = train_val_set[train_val_split]
train_portion = (id_groups['subj_ids'].values == cutoff).argmax()

train_ids = id_groups[:train_portion]['image_ids'].tolist()
valid_ids = id_groups[train_portion:]['image_ids'].tolist()

partition = {'train':train_ids, 'valid':valid_ids, 'test':test_ids}

In [6]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        
        hidden_dim = 2000
        latent_dim = 400
        self.fc1 = nn.Linear(65536, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, latent_dim)
        self.fc22 = nn.Linear(hidden_dim, latent_dim)
        self.fc3 = nn.Linear(latent_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, 65536)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        #print("z.size() =", z.size())
        h3 = F.relu(self.fc3(z))
        #print("h3.size() =", h3.size())
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, 65536))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

vae_model = VAE().to(device)
checkpoint = torch.load('results/h2000_l400_e100/vae_model.pt')
vae_model.load_state_dict(checkpoint)
vae_model.eval()

for params in vae_model.parameters():
    params.requires_grad = False

In [7]:
class Dataset(data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        img_path = data_dir + ID + '.jpg'
        image = Image.open(img_path).convert('L')
        image = ToTensor()(image)
        
        
        y = torch.tensor(self.labels[ID])

        return image, y

In [8]:
# Data augmentation and normalization for training

# Parameters
params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 6}

# Generators
train_set = Dataset(partition['train'], labels)
train_loader = DataLoader(train_set, **params)

validation_set = Dataset(partition['valid'], labels)
validation_loader = data.DataLoader(validation_set, **params)

test_set = Dataset(partition['valid'], labels)
test_loader = data.DataLoader(test_set, **params)


In [4]:
len(data_df)

72459

In [5]:
with torch.no_grad():
    for ind, row in data_df.iterrows():
        if ind%1000==0:
            print(ind)
        img_path = data_dir + 'all_label_img/' + row['image_ids'] + '.jpg'
        image = Image.open(img_path).convert('L')
        image = ToTensor()(image)
        image = image.to(device)

        #mu, logvar = vae_model.encode(image.view(-1, 65536))
        #z = vae_model.reparameterize(mu, logvar)
        #output_file = 'latent100_images/'+row['image_ids']+'.npy'
        #print(output_file)
        #np.save(output_file, z.detach().cpu().numpy())
        
        #recon_batch, mu, logvar = vae_model(image.view(-1, 65536))
        #recon_img = recon_batch.view(1, 256, 256)
        #save_image(recon_img.cpu(), data_dir + 'all_label_img_recon400/' + row['image_ids'] + '.jpg')
        
        ind+=1
    

0
1000


KeyboardInterrupt: 

In [None]:
targets.dtype


In [12]:
data_df.head()

Unnamed: 0,image_ids,view_label,subj_id,view_train
0,1323_2_1,,1323,
1,1323_2_2,,1323,
2,1323_2_3,,1323,
3,1323_2_4,,1323,
4,1323_2_5,,1323,
