# Imports + Path Specification

In [1]:
import sys
import os
import torch
from torch import optim
from time import time

In [2]:
# tile2vec_dir = '/home/agupta21/gcloud/231n_gitproject'
tile2vec_dir = '/home/shailimonchik/project'
sys.path.append('../')
sys.path.append(tile2vec_dir)

In [3]:
from src.datasets import TileTripletsDataset, GetBands, RandomFlipAndRotate, ClipAndScale, ToFloatTensor, triplet_dataloader
from src.tilenet import make_tilenet

In [4]:
from src.training import prep_triplets, train_triplet_epoch

In [5]:
import numpy as np
from torch.autograd import Variable
import sys
from src.resnet import ResNet18

In [6]:
#Random Forest Classification

from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Dataloader + TileNet Setup

In [7]:
# Environment stuff
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
cuda = torch.cuda.is_available()
print(cuda)

True


In [8]:
# Defining Parameters
img_type = 'naip'
tile_dir = '/home/shailimonchik/project/data_temp/data/triplets/'
bands = 4
augment = False
batch_size = 50
shuffle = True
num_workers = 4
n_triplets = 108700 #modification: adding 8700 more for semi-supervised learning purposes

In [9]:
dataloader = triplet_dataloader(img_type, tile_dir, bands=bands, augment=augment,
                                batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
                                n_triplets=n_triplets, pairs_only=False)
print('Dataloader set up complete.')

Dataloader set up complete.


In [10]:
in_channels = bands
z_dim = 512

In [11]:
TileNet = make_tilenet(in_channels=in_channels, z_dim=z_dim)
TileNet.train()
if cuda: TileNet.cuda()
print('TileNet set up complete.')

TileNet set up complete.


In [12]:
#Hyperparameters to tune 
learning_rates = [1e-2, 3e-2, 1e-3, 3e-3, 1e-4, 3e-4]
betas = [0.5, 0.6, 0.7, 0.8, 0.9]
margins = [10, 15, 20, 25, 30]


# Training Model + Writing Each Epoch to File

In [13]:
epochs = 5
# margin = 10
l2 = 0.01
print_every = 10000
save_models = True

In [14]:
model_dir = '/home/shailimonchik/project/models/'
if not os.path.exists(model_dir): os.makedirs(model_dir)

In [None]:
#### best_params = None
all_plots = []
best_avg_loss = None
best_params = None
print('Begin learning.................')
for margin in margins:
    for lr in learning_rates:
        for beta1 in betas:
            print('lr: {}, beta1: {}, margin: {}'.format(lr, beta1, margin))
            params = (lr, beta1, margin)
            optimizer = optim.Adam(TileNet.parameters(), lr=lr, betas=(beta1, 0.999))
            t0 = time()
            #with open(results_fn, 'w') as file:
            plot_list = []
            for epoch in range(0, epochs):
                (avg_loss, avg_l_n, avg_l_d, avg_l_nd) = train_triplet_epoch(
                    TileNet, cuda, dataloader, optimizer, epoch+1, margin=margin, l2=l2,
                    print_every=print_every, t0=t0, hyper_tuning=True)
                plot_list.append((epoch+1,avg_loss))
                append_name = "hyper_tuning_ep" + str(epoch+1) + "lr_" + str(lr) + "beta1_" + str(beta1) + "margin_"+ str(margin) + ".ckpt" # change test1_ep
                if save_models:
                    model_fn = os.path.join(model_dir,append_name)
                    torch.save(TileNet.state_dict(),model_fn)
            if best_params == None or plot_list[len(plot_list) - 1][1] < best_avg_loss:
                best_params = params
                best_avg_loss = plot_list[len(plot_list) - 1][1]
            all_plots.append((params, plot_list))

Begin learning.................
lr: 0.01, beta1: 0.5, margin: 10
yy
breaking
100
Finished epoch 1: 106.269s
  Average loss: 0.2382
  Average l_n: 0.2218
  Average l_d: -0.6929
  Average l_nd: -0.4712

yy
breaking
100
Finished epoch 2: 213.969s
  Average loss: 0.2420
  Average l_n: 0.2256
  Average l_d: -0.6923
  Average l_nd: -0.4667

yy
breaking
100
Finished epoch 3: 321.713s
  Average loss: 0.2361
  Average l_n: 0.2229
  Average l_d: -0.6962
  Average l_nd: -0.4733

yy
breaking
100
Finished epoch 4: 429.127s
  Average loss: 0.2364
  Average l_n: 0.2203
  Average l_d: -0.6876
  Average l_nd: -0.4673

yy
breaking
100
Finished epoch 1: 106.542s
  Average loss: 0.2345
  Average l_n: 0.2168
  Average l_d: -0.6897
  Average l_nd: -0.4729

yy
breaking
100
Finished epoch 2: 213.256s
  Average loss: 0.2304
  Average l_n: 0.2157
  Average l_d: -0.6938
  Average l_nd: -0.4781

yy
breaking
100
Finished epoch 3: 319.591s
  Average loss: 0.2340
  Average l_n: 0.2187
  Average l_d: -0.6916
  Averag

breaking
100
Finished epoch 3: 312.033s
  Average loss: 0.2113
  Average l_n: 0.2038
  Average l_d: -0.6837
  Average l_nd: -0.4799

yy
breaking
100
Finished epoch 4: 416.001s
  Average loss: 0.2097
  Average l_n: 0.2000
  Average l_d: -0.6806
  Average l_nd: -0.4806

yy
breaking
100
Finished epoch 5: 519.742s
  Average loss: 0.2124
  Average l_n: 0.2000
  Average l_d: -0.6746
  Average l_nd: -0.4747

lr: 0.001, beta1: 0.8, margin: 10
yy
breaking
100
Finished epoch 1: 103.866s
  Average loss: 0.2094
  Average l_n: 0.1994
  Average l_d: -0.6838
  Average l_nd: -0.4845

yy
breaking
100
Finished epoch 2: 208.189s
  Average loss: 0.2078
  Average l_n: 0.1988
  Average l_d: -0.6845
  Average l_nd: -0.4858

yy
breaking
100
Finished epoch 3: 312.085s
  Average loss: 0.2084
  Average l_n: 0.1973
  Average l_d: -0.6823
  Average l_nd: -0.4849

yy
breaking
100
Finished epoch 4: 415.743s
  Average loss: 0.2095
  Average l_n: 0.1999
  Average l_d: -0.6801
  Average l_nd: -0.4802

yy
breaking
100
F

breaking
100
Finished epoch 3: 652.784s
  Average loss: 0.2029
  Average l_n: 0.1927
  Average l_d: -0.6791
  Average l_nd: -0.4864

yy
breaking
100
Finished epoch 4: 871.425s
  Average loss: 0.2070
  Average l_n: 0.1945
  Average l_d: -0.6736
  Average l_nd: -0.4791

yy
breaking
100
Finished epoch 5: 1088.116s
  Average loss: 0.2052
  Average l_n: 0.1951
  Average l_d: -0.6776
  Average l_nd: -0.4825

lr: 0.0003, beta1: 0.5, margin: 10
yy
breaking
100
Finished epoch 1: 220.066s
  Average loss: 0.2035
  Average l_n: 0.1942
  Average l_d: -0.6795
  Average l_nd: -0.4854

yy
breaking
100
Finished epoch 2: 439.021s
  Average loss: 0.2054
  Average l_n: 0.1948
  Average l_d: -0.6769
  Average l_nd: -0.4821

yy
breaking
100
Finished epoch 3: 657.368s
  Average loss: 0.2052
  Average l_n: 0.1924
  Average l_d: -0.6724
  Average l_nd: -0.4799

yy
breaking
100
Finished epoch 4: 876.725s
  Average loss: 0.2009
  Average l_n: 0.1917
  Average l_d: -0.6796
  Average l_nd: -0.4879

yy
breaking
100

yy
breaking
100
Finished epoch 3: 704.209s
  Average loss: 0.3147
  Average l_n: 0.2892
  Average l_d: -1.0210
  Average l_nd: -0.7318

yy
breaking
100
Finished epoch 4: 929.101s
  Average loss: 0.3202
  Average l_n: 0.2965
  Average l_d: -1.0272
  Average l_nd: -0.7306

yy
breaking
100
Finished epoch 5: 1151.725s
  Average loss: 0.3166
  Average l_n: 0.2953
  Average l_d: -1.0293
  Average l_nd: -0.7340

lr: 0.03, beta1: 0.7, margin: 15
yy
breaking
100
Finished epoch 1: 229.729s
  Average loss: 0.3137
  Average l_n: 0.2935
  Average l_d: -1.0291
  Average l_nd: -0.7356

yy
breaking
100
Finished epoch 2: 454.544s
  Average loss: 0.3148
  Average l_n: 0.2979
  Average l_d: -1.0396
  Average l_nd: -0.7416

yy
breaking
100
Finished epoch 3: 682.031s
  Average loss: 0.3158
  Average l_n: 0.2968
  Average l_d: -1.0301
  Average l_nd: -0.7333

yy
breaking
100
Finished epoch 4: 905.951s
  Average loss: 0.3127
  Average l_n: 0.3002
  Average l_d: -1.0410
  Average l_nd: -0.7408

yy
breaking
10

breaking
100
Finished epoch 1: 221.293s
  Average loss: 0.2962
  Average l_n: 0.2676
  Average l_d: -0.9920
  Average l_nd: -0.7244

yy
breaking
100
Finished epoch 2: 441.100s
  Average loss: 0.2919
  Average l_n: 0.2681
  Average l_d: -0.9978
  Average l_nd: -0.7297

yy
breaking
100
Finished epoch 3: 662.905s
  Average loss: 0.2990
  Average l_n: 0.2757
  Average l_d: -1.0011
  Average l_nd: -0.7254

yy
breaking
100
Finished epoch 4: 882.344s
  Average loss: 0.2956
  Average l_n: 0.2738
  Average l_d: -1.0023
  Average l_nd: -0.7285

yy
breaking
100
Finished epoch 5: 1101.968s
  Average loss: 0.2959
  Average l_n: 0.2691
  Average l_d: -0.9904
  Average l_nd: -0.7213

lr: 0.003, beta1: 0.9, margin: 15
yy
breaking
100
Finished epoch 1: 218.408s
  Average loss: 0.2930
  Average l_n: 0.2706
  Average l_d: -1.0018
  Average l_nd: -0.7313

yy
breaking
100
Finished epoch 2: 439.763s
  Average loss: 0.2978
  Average l_n: 0.2734
  Average l_d: -0.9983
  Average l_nd: -0.7249

yy
breaking
100


breaking
100
Finished epoch 4: 890.204s
  Average loss: 0.2920
  Average l_n: 0.2688
  Average l_d: -1.0000
  Average l_nd: -0.7311

yy
breaking
100
Finished epoch 5: 1113.852s
  Average loss: 0.2880
  Average l_n: 0.2650
  Average l_d: -1.0004
  Average l_nd: -0.7355

lr: 0.01, beta1: 0.5, margin: 20
yy
breaking
100
Finished epoch 1: 220.126s
  Average loss: 0.3956
  Average l_n: 0.3301
  Average l_d: -1.2694
  Average l_nd: -0.9394

yy
breaking
100
Finished epoch 2: 441.915s
  Average loss: 0.3972
  Average l_n: 0.3551
  Average l_d: -1.3198
  Average l_nd: -0.9647

yy
breaking
100
Finished epoch 3: 666.270s
  Average loss: 0.3913
  Average l_n: 0.3504
  Average l_d: -1.3303
  Average l_nd: -0.9799

yy
breaking
100
Finished epoch 4: 890.409s
  Average loss: 0.4003
  Average l_n: 0.3537
  Average l_d: -1.3170
  Average l_nd: -0.9633

yy
breaking
100
Finished epoch 5: 1113.386s
  Average loss: 0.3917
  Average l_n: 0.3546
  Average l_d: -1.3304
  Average l_nd: -0.9758

lr: 0.01, beta1:

breaking
100
Finished epoch 2: 444.708s
  Average loss: 0.3884
  Average l_n: 0.3568
  Average l_d: -1.3412
  Average l_nd: -0.9844

yy
breaking
100
Finished epoch 3: 666.483s
  Average loss: 0.3895
  Average l_n: 0.3548
  Average l_d: -1.3441
  Average l_nd: -0.9893

yy
breaking
100
Finished epoch 4: 889.577s
  Average loss: 0.3872
  Average l_n: 0.3487
  Average l_d: -1.3315
  Average l_nd: -0.9828

yy
breaking
100
Finished epoch 5: 1110.532s
  Average loss: 0.3788
  Average l_n: 0.3444
  Average l_d: -1.3339
  Average l_nd: -0.9895

lr: 0.001, beta1: 0.7, margin: 20
yy
breaking
100
Finished epoch 1: 224.338s
  Average loss: 0.3901
  Average l_n: 0.3526
  Average l_d: -1.3308
  Average l_nd: -0.9783

yy
breaking
100
Finished epoch 2: 444.146s
  Average loss: 0.3859
  Average l_n: 0.3586
  Average l_d: -1.3407
  Average l_nd: -0.9821

yy
breaking
100
Finished epoch 3: 664.608s
  Average loss: 0.3893
  Average l_n: 0.3555
  Average l_d: -1.3330
  Average l_nd: -0.9775

yy
breaking
100


In [None]:
for params, plot_list in all_plots:
    print('lr: {}, beta1: {}, margin: {}'.format(params[0], params[1], params[2]), plot_list)


# Pre Process Y Values

In [None]:
#note to self: need to embed tiles first and then figure out loop for reading in the saved model and plotting
#the classification accuracies per epoch
tile_dir = '../data/tiles'
n_tiles = 1000
y = np.load(os.path.join(tile_dir, 'y.npy'))
print(y.shape)
#print(set(y))

In [None]:
# Check CDL classes
print(set(y))

In [None]:
y = LabelEncoder().fit_transform(y)
print(set(y))

# Load in Epochs' Weights + Run each on tile embeddings + Random Forest 

In [None]:
#set up model
in_channels = 4
z_dim = 512
cuda = torch.cuda.is_available()

In [None]:
def getTileEmbeddings(tilenet):
    X = np.zeros((n_tiles, z_dim))
    for idx in range(n_tiles):
        tile = np.load(os.path.join(tile_dir, '{}tile.npy'.format(idx+1)))
        # Get first 4 NAIP channels (5th is CDL mask)
        tile = tile[:,:,:4]
        # Rearrange to PyTorch order
        tile = np.moveaxis(tile, -1, 0)
        tile = np.expand_dims(tile, axis=0)
        # Scale to [0, 1]
        tile = tile / 255
        # Embed tile
        tile = torch.from_numpy(tile).float()
        tile = Variable(tile)
        if cuda: tile = tile.cuda()
        z = tilenet.encode(tile)
        if cuda: z = z.cpu()
        z = z.data.numpy() #1 by 512
        X[idx,:] = z
    return X

In [None]:
epochsToAccuracy = []
epochsToSTD = []
for i in range(0,epochs): #iterator
    curEpoch = i + 1
    # Setting up model
    tilenet = ResNet18()
    if cuda: tilenet.cuda()
    model_fn = "../models/test1_ep"+str(curEpoch)+".ckpt" #open file
    #checkpoint = torch.load(model_fn)
    tilenet.load_state_dict(torch.load(model_fn), strict=False)
    #tilenet.load_state_dict(checkpoint)
    tilenet.eval()
    
    X = getTileEmbeddings(tilenet) #function above
    
    #train random forest classifier
    n_trials = 100
    accs = np.zeros((n_trials,))
    for i in range(n_trials):
        # Splitting data and training RF classifer
        X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2)
        rf = RandomForestClassifier()
        rf.fit(X_tr, y_tr) #X-tr is 512 by 1
        accs[i] = rf.score(X_te, y_te)
    print("Results for Epoch Number: ", str(curEpoch))
    print('Mean accuracy: {:0.4f}'.format(accs.mean()))
    print('Standard deviation: {:0.4f}'.format(accs.std()))
    epochsToAccuracy.append((curEpoch,accs.mean()))
    epochsToSTD.append((curEpoch,accs.std()))
print(epochsToAccuracy)
print(epochsToSTD)
    #save value
    
    
    
    

In [None]:
print(epochsToSTD)

In [None]:
import matplotlib.pyplot as plt
plt.plot(*zip(*epochsToAccuracy))
plt.title('Triplet Augmentation Average Test Performance')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()
print(epochsToAccuracy[9][1], "was the Max Accuracy")

In [None]:
plt.plot(*zip(*plot_list))
plt.title('Triplet Augmentation Loss Performance')
plt.xlabel('Epochs')
plt.ylabel('Loss')
print("The min loss was",str(plot_list[9][1]))
plt.show()

In [None]:
plt.plot(*zip(*epochsToSTD))
plt.title('STD on Random Forest Test Prediction')
plt.xlabel('Epochs')
plt.ylabel('Standard Deviation')
plt.show()