In [11]:
import pandas as pd
import os
import os.path as path
import numpy as np
import argparse
import json
import matplotlib.pyplot as plt
import random
from math import comb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from torchvision.utils import save_image
from torchvision.utils import make_grid
import torchvision.transforms as transforms

import dataloaders
import networks
import utils

In [12]:
################################################################################
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str)
parser.add_argument('--model', type=str)

parser.add_argument('--N', type=int, default=1000)
parser.add_argument('--T', type=int, default=50)

parser.add_argument('--nepochs', type=int)
parser.add_argument('--batch_size', type=int, default=250)
parser.add_argument('--initial_lr', type=float, default=0.001)
parser.add_argument('--test', type=int, default=0)
parser.add_argument('--test_method', default='1')
# 1 for graph cut, using G(i, j) as the score
# 2 for

parser.add_argument('--log_file', default='log.txt')
parser.add_argument('--continue_saved', default=False)

parser.add_argument('--channels', default=3)
parser.add_argument('--dim_x', default=64)
parser.add_argument('--dim_y', default=64)

# args = parser.parse_known_args()
class Args:
    dataset = 'cifar10'
    model = 'contrastive'
    N = 1000
    T = 50
    nepochs = 100
    batch_size = 96
    initial_lr = 0.001
    test = 1
    test_method = '1'
    
    log_file = 'log.txt'
    continue_saved = False
    channels = 3
    dim_x = 64
    dim_y = 64
args = Args()
    
img_shape = (args.channels, args.dim_x, args.dim_y)
################################################################################

In [13]:
def graph_cut(gs, eta):
    E_r = 0
    E_rc = 0
    E_r_count = 0
    E_rc_count = 0
    for d1 in range(ds.T):
        for d2 in range(ds.T):
            if (d1 < eta and d2 < eta) or (d1 >= eta and d2 >= eta):
                E_rc += gs[d1][d2]
                E_rc_count += 1
            else:
                E_r += gs[d1][d2]
                E_r_count += 1
    score = E_r / E_r_count - E_rc / E_rc_count

    return score


def compute_gs(model, ds, X):
    ps = np.zeros((ds.T, ds.T))
    gs = np.zeros((ds.T, ds.T))
    for d1 in range(ds.T):
        for d2 in range(ds.T):
#             grid = make_grid(torch.cat((X[d1].unsqueeze_(0), X[d2].unsqueeze_(0))), nrow=1)
#             plt.imshow(grid.cpu().permute(1, 2, 0))
#             plt.show()
#             plt.close()
            # save_image(grid, path.join(root_dir, 'X_{}_{}.png'.format(d1, d2)))
            p = model.forward(X[d1].unsqueeze_(0), X[d2].unsqueeze_(0))
            p = torch.sigmoid(p).item()
            #print(p)
            if p == 1:
                p -= 0.0001
            ps[d1][d2] = p
            g = p / (1-p)
            gs[d1][d2] = g
    return ps, gs


def representation(model, ds, X, M):
    random.seed(7)
    L = torch.zeros((M,) + ds.data_dim)
    for i in range(M):
        index = random.choice(list(range(ds.n * ds.T)))
        L[i] = torch.FloatTensor(ds.get_normal(index)[0])
    L = L.to(device)
    R = torch.zeros(X.size(0), M)
    for i in range(X.size(0)):
        R[i] = torch.FloatTensor([model.forward(X[i].unsqueeze_(0), L[m].unsqueeze_(0)) for m in range(M)])
        R[i] = torch.sigmoid(R[i])

    return R


def option2a(R, eta):
    r1, r2 = R[0:eta], R[eta:ds.T]
    diff1 = r1 - torch.mean(r1, 0)
    diff2 = r1 - torch.mean(r2, 0)

    return torch.sum(diff1.pow(2)).detach().item() + \
            torch.sum(diff2.pow(2)).detach().item()


def option2b(R, eta, alpha):
    first_term = 0
    second_term = 0
    third_term = 0

    T = R.size(0)
    for t in range(T):
        for tprime in range(T):
            if t <= eta and tprime >= eta+1:
                first_term += torch.sum(torch.pow(torch.abs(R[t]-R[tprime]), alpha))
            elif t <= eta and tprime <= eta:
                second_term += torch.sum(torch.pow(torch.abs(R[t] - R[tprime]), alpha))
            else:
                third_term += torch.sum(torch.pow(torch.abs(R[t] - R[tprime]), alpha))

    first_term *= 2/(eta*(T-eta))
    second_term /= comb(eta, 2)
    third_term /= comb(T-eta, 2)

    return eta*(t-eta)/T * (first_term - second_term - third_term)


def validation_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for batch_index, (X, y) in enumerate(loader):
            X1, X2 = X[0], X[1]
            X1 = X1.to(device=device)
            X2 = X2.to(device=device)
            y = y.to(device=device)

            p = model.forward(X1, X2)
            p = p.view(p.size(0))
            p = torch.sigmoid(p)

            predictions = p >= 0.5
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples


def ts_sample_precision(ps, eta):
    positive_count = 0
    negative_count = 0
    for d1 in range(ds.T):
        for d2 in range(ds.T):
            if (d1 < eta and d2 < eta) or (d1 >= eta and d2 >= eta):
                if ps[d1][d2] < 0.5:
                    positive_count += 1
            else:
                if ps[d1][d2] >= 0.5:
                    negative_count += 1

    negative_n = 2*eta*(ds.T-eta)
    positive_n = ds.T*ds.T - negative_n
    return positive_count / positive_n, negative_count / negative_n


def train(model, ds, root_dir):
    # should not shuffle here
    train_loader = DataLoader(ds, args.batch_size, shuffle=False, drop_last=False)
    test_loader = DataLoader(ds_test, args.batch_size, shuffle=False, drop_last=False)

    # load saved models if load_saved flag is true
    if args.continue_saved:
        model.load_state_dict(torch.load(path.join(root_dir, 'model')))

    # optimizer definition
    optimizer = optim.Adam(
        model.parameters(),
        lr=args.initial_lr
    )

    # load_saved is false when training is started from 0th iteration
    if not args.continue_saved:
        with open(path.join(root_dir, args.log_file), 'w') as log:
            log.write('Epoch\tIteration\tLoss\n')
    # initialize summary writer
    writer = SummaryWriter()

    # start training
    for epoch in range(0, args.nepochs):
        print('Epoch {}'.format(epoch))

        # the total loss at each epoch after running all iterations of batches
        iteration = 0
        correct = 0

        for batch_index, (X, y) in enumerate(train_loader):
            # set zero grad for the optimizer
            optimizer.zero_grad()

            # move data to cuda
            X1, X2 = X[0], X[1]
            X1 = X1.to(device=device)
            X2 = X2.to(device=device)
            
            # grid = make_grid(torch.cat([X1, X2]), nrow=args.batch_size)
            # save_image(grid, path.join(root_dir, 'X_{}.png'.format(iteration)))
            iteration += 1

            y = y.to(device=device)

            p = model.forward(X1, X2)
            p = p.view(p.size(0))

            criterion = nn.BCEWithLogitsLoss()

            prediction = torch.sigmoid(p) > 0.5
            correct += (prediction == y).sum().item() # training accuracy


            loss = criterion(p, y.float())
            loss.backward()
            optimizer.step()
            
            
            
            # print losses
            if batch_index % 50 == 0 or batch_index == args.batch_size - 1:
                val_acc = validation_accuracy(test_loader, model)
                print('\n[%d/%d][%d/%d]\tLoss: %.2E\tVal Acc: %.2E' %
                      (epoch, args.nepochs, batch_index, len(train_loader),
                       loss.item(), val_acc))
                # get precision on ts sample
                ith = 0
                X_test = ds_test.get_ts_sample(ith).to(device)
                
                with torch.no_grad():
                    model.eval()
                    ps, gs = compute_gs(model, ds, X_test)
                    positive_precision, negative_precision = ts_sample_precision(ps, ds.cps[ith])
                    print("positive, negative samples precision %.3f, %.3f" % (positive_precision, negative_precision))
                    print(ps)
                    model.train()

            # write to log
            with open(path.join(root_dir, args.log_file), 'a') as log:
                log.write('{0}\t{1}\t{2}\n'.format(
                    epoch,
                    batch_index,
                    loss.detach().item()
                ))

            # write to tensorboard
            itr = epoch * (int(len(ds) / args.batch_size) + 1) + batch_index
            writer.add_scalar('Loss', loss.detach().item(), itr)

        print('Training accuracy %.4E' % (correct / len(ds)))
        # save the model at every epoch
        torch.save(model.state_dict(), path.join(root_dir, 'model'))

In [14]:
def test(model, ds, dir):
    print("Running tests...")
    if args.test_method == '1':
        test_dir = 'errors_graph_cut'
    else:
        test_dir = 'errors_'+args.test_method
    test_dir_path = path.join(dir, test_dir)
    if not path.exists(test_dir_path):
        os.makedirs(test_dir_path)

    eta_hats = []  # save predicted change points
    etas = []
    # iterate over ts test samples X_1, X_2, etc...
    all_i = range(ds.n) if not args.dataset == 'clevr' else [args.T*6*(i-1)+j for i in range(1, 7) for j in range(5)]
    for i in all_i:
        print("Running", i)
        etas.append(ds.cps[i])
        # load test sample X_i
        X = ds.get_ts_sample(i).to(device)

        scores = {}  # save errors for all candidate etas
        min_eta = 2
        max_eta = ds.T - 2
        max_score = -float('inf')
        eta_hat = -1

        if args.test_method == '1':
            ps, gs = compute_gs(model, ds, X)
            ps_df = pd.DataFrame(ps)
            ps_tf_df = pd.DataFrame(ps > 0.5, dtype=int)
            ps_tf_df.to_csv(path.join(test_dir_path, 'X_{}_binary.csv'.format(i)), index=True)
            ps_df.to_csv(path.join(test_dir_path, 'X_{}.csv'.format(i)), index=True)
            grid = make_grid(X, nrow=args.T)
            save_image(grid, path.join(test_dir_path, 'X_{}.png'.format(i)))
            
            
        for eta in range(min_eta, max_eta + 1):
            # R = representation(model, ds, X, 20)
            if args.test_method == '1':
                score = graph_cut(gs, eta)
            elif args.test_method == '2a':
                score = option2a(R, eta)
            elif args.test_method == '2b':
                score = option2b(R, eta, 1)
                print(score)
            elif args.test_method == '2c':
                pass

            scores[eta] = score
            if score > max_score:
                max_score = score
                eta_hat = eta
        eta_hats.append(eta_hat)

        # save errors
        plt.scatter(list(scores.keys()), list(scores.values()))
        plt.axvline(x=ds.cps[i])
        plt.axvline(x=eta_hat, color='r')
        plt.xlabel('etas (red: eta_hat, blue: true eta)')
        plt.ylabel('errors')
        plt.savefig(path.join(test_dir_path, 'X_{}_errors.png'.format(i)))
        plt.close()

    # compute mean of |eta-eta_hat| among all test samples
    diff = np.abs(np.asarray(etas) - np.asarray(eta_hats))
    score_mean = np.mean(diff)
    score_std = np.std(diff)
    # keep track of the errors associated with epochs
    with open(path.join(test_dir_path, 'test_stats.txt'), 'w') as f:
        json.dump({'mean': score_mean, 'std': score_std}, f, indent=2)
    # save etas and eta_hats
    with open(test_dir_path + '/cps.txt', 'w') as cps_r:
        for tmp in eta_hats:
            cps_r.write('{} '.format(tmp))
        cps_r.write('\n')
        for tmp in etas:
            cps_r.write('{} '.format(tmp))

In [15]:
if __name__ == '__main__':
    # create parent directories, like 'experiments/cifar10/linearmlvae_50'
    # and 'experiments/cifar10/dfcmlvae_128'
    dir0 = 'experiments'
    dir1 = path.join(dir0, args.dataset)
    dir2 = path.join(dir1, args.model)
    for d in [dir0, dir1, dir2]:
        if not path.exists(d):
            os.makedirs(d)

    # use cpu or gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print('Creating training and testing datasets...')
    trans = transforms.Compose([transforms.Resize([args.dim_x, args.dim_y]),
                                transforms.ToTensor()
                                ])
    if args.dataset == 'mnist':
        ds = dataloaders.mnist_contrastive_explicit(1000, args.T, 50, train=True, transform=trans)
        ds_test = dataloaders.mnist_contrastive_explicit(50, args.T, 50, train=False, seed=7, transform=trans)
    elif args.dataset == 'cifar10':
        ds = dataloaders.cifar10_contrastive(1000, args.T, train=True, transform=trans)
        ds_test = dataloaders.cifar10_contrastive_explicit(50, args.T, 50, train=False, seed=7, transform=trans)
    elif args.dataset == 'celeba':
        ds = dataloaders.celeba_contrastive_explicit(1000, args.T, 50, train=True, transform=trans)
        ds_test = dataloaders.celeba_contrastive_explicit(50, args.T, 50, train=False, seed=7, transform=trans)
    elif args.dataset == 'clevr':
        pass
    else:
        raise Exception("invalid dataset name")

    print('Creating models...')
    if args.model == 'contrastive':
        encoder = networks.resnet18()
        model = networks.TwoPathNetwork(3*64*64, predefined_encoder=encoder).to(device)
        # model = networks.TwoPathNetwork(3 * 64 * 64, embedding_size=100).to(device)
    else:
        raise Exception("invalid model name")
        
        
    
    existing_dirs = [int(f) for f in os.listdir(dir2) if f.isdigit()]
    dir_test = max(existing_dirs)
    if args.test == 0:
        # create new directory for this training run
        new = '1' if not existing_dirs else str(max(existing_dirs) + 1)
        # root dir is the directory of this particular run of experiment
        # all data produced by training and testing will be saved in this root dir
        root_dir = path.join(dir2, new)
        if not path.exists(root_dir):
            os.makedirs(root_dir)
        # save args
        with open(path.join(root_dir, 'args.txt'), 'w') as f:
            json.dump(args.__dict__, f, indent=2)
        train(model, ds, root_dir)
    else:
        root_dir = path.join(dir2, str(dir_test))
        model.load_state_dict(torch.load(path.join(root_dir, 'model')))
        model.eval()
        test(model, ds_test, root_dir)

Creating training and testing datasets...
Files already downloaded and verified
Files already downloaded and verified
Creating models...
Running tests...
Running 0
Running 1
Running 2
Running 3
Running 4
Running 5
Running 6
Running 7
Running 8
Running 9
Running 10
Running 11
Running 12
Running 13
Running 14
Running 15
Running 16
Running 17
Running 18
Running 19
Running 20
Running 21
Running 22
Running 23
Running 24
Running 25
Running 26
Running 27
Running 28
Running 29
Running 30
Running 31
Running 32
Running 33
Running 34
Running 35
Running 36
Running 37
Running 38
Running 39
Running 40
Running 41
Running 42
Running 43
Running 44
Running 45
Running 46
Running 47
Running 48
Running 49
