In [1]:
from dataset import *
from train import *

In [2]:
import os
import pandas as pd

import collections

import sys

import torch
import torch.optim as optim

In [3]:
# Set parameters
EuroSat_Type = 'ALL'    

In [4]:
args = str(sys.argv)
target_task = args[1]
algorithm = args[2]
algorithm = "bandit"


In [5]:
target_task = "France"

In [6]:
from pathlib import Path
output_path = Path("derived_data")
output_path.mkdir(parents = True, exist_ok = True)

In [7]:
if EuroSat_Type == 'RGB':
  data_folder = '/content/sample_data/'
  #root = os.path.join(data_folder, '2750/')
  root = '2750/'
  download_ON = os.path.exists(root)

  if not download_ON:
    # This can be long...
    #os.chdir(data_folder)
    os.system('wget http://madm.dfki.de/files/sentinel/EuroSAT.zip') #Just RGB Bands
    !unzip EuroSAT.zip
    download_ON = True
elif EuroSat_Type == 'ALL':
    root = 'ds/images/remote_sensing/otherDatasets/sentinel_2/tif/'
    download_ON = os.path.exists(root)
    if not download_ON:
      os.system('wget http://madm.dfki.de/files/sentinel/EuroSATallBands.zip') #All bands
      !unzip EuroSATallBands.zip
      download_ON = True

In [8]:
geo_df = pd.read_csv("metadata.csv")

In [9]:
data = torchvision.datasets.DatasetFolder(root=root,loader = iloader, transform=None, extensions = 'tif')

In [10]:
input_data = prepare_input_data(geo_df, target_task)

In [11]:
np.random.seed(0)
torch.cuda.manual_seed(0)
random.seed(0)

In [12]:
def train(net, train_, val_, criterion, optimizer, epochs=None, scheduler=None, weights=None, save_epoch = 10,
plot = False):
    losses=[]; acc=[]; mean_losses=[]; val_acc=[]
    iter_ = t0 =0
    t0 = time.time()
    for e in range(1, epochs + 1):
        print('e=',e,'{} seconds'.format(time.time() - t0))
        net.train()
        for batch_idx, (data, target) in enumerate(train_):
            if torch.cuda.is_available():
                data, target =  cus_aug(Variable(data.cuda())), Variable(target.cuda())
            else:
                data, target =  cus_aug(Variable(data)), Variable(target)
            optimizer.zero_grad()
            output = net(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            losses = np.append(losses,loss.item())
            mean_losses = np.append(mean_losses, np.mean(losses[max(0,iter_-100):iter_]))
            if iter_ % 500 == 0: #printing after 600 epochs
                clear_output()
                print('Iteration Number',iter_,'{} seconds'.format(time.time() - t0))
                t0 = time.time()
                pred = output.data.cpu().numpy()#[0]
                pred=sigmoid(pred)
                gt = target.data.cpu().numpy()#[0]
                acc = np.append(acc,accuracy(gt,pred))
                val_acc = np.append(val_acc,validation(net, val_))
                print('Train (epoch {}/{}) [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {}\tLearning Rate:{}'.format(
                    e, epochs, batch_idx, len(train_),
                    100. * batch_idx / len(train_), loss.item(), acc[-1],optimizer.param_groups[0]['lr']))
                if plot is True:
                    plt.plot(mean_losses) and plt.show()
                    plt.plot( range(len(acc)) ,acc,'b',label = 'training')
                    plt.plot( range(len(val_acc)), val_acc,'r--',label = 'validation')
                    plt.legend() and plt.show()
                    print('validation accuracy : {}'.format(val_acc[-1]))
                
                #print(mylabels[np.where(gt[1,:])[0]])
            iter_ += 1
            
            del(data, target, loss)
        if scheduler is not None:
           scheduler.step()
        if e % save_epoch == 0:
            
            torch.save(net.state_dict(), '.\Eurosat{}'.format(e))
    print('validation accuracy : {}'.format(val_acc[-1]))
    return net, val_acc[-1]


In [13]:
def bandit_selection(data, input_data, n_epochs = 3, n_it = 2, algorithm = "bandit",iter_samples = 160,
                     lr = .01, milestones = milestones,
                     criteria = criteria, output_path = "."):
    # prepare data ---
    
    target_val_loader =  torch.utils.data.DataLoader(torch.utils.data.Subset(data, input_data["idx_val"]), 
                                                  batch_size = 16, shuffle = True, num_workers = 0)
    target_train_loader =  torch.utils.data.DataLoader(torch.utils.data.Subset(data, input_data["idx_train"]), 
                                                      batch_size = 16, shuffle = True, num_workers = 0)
    target_test_loader =  torch.utils.data.DataLoader(torch.utils.data.Subset(data, input_data["idx_test"]), 
                                                      batch_size = 16, shuffle = True, num_workers = 0)
    

    
    # initialize hyperparameters ---
    
    bandit_selects = [None]
    alpha = dict.fromkeys(input_data["source_task"], [1])
    beta = dict.fromkeys(input_data["source_task"], [1])
    pi = dict.fromkeys(input_data["source_task"], [0])
    
    
    # initialize model ---
   
    net = Load_model()
    optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = milestones, gamma=0.1)
    if torch.cuda.is_available():
        net=net.cuda()

    net, acc = train(net, target_train_loader , target_test_loader , criteria, optimizer, n_epochs, scheduler)
    print("Model initiated with acc ", acc)
    accs = [acc]
    
    # train ---
    
    for t in range(n_it):
        if algorithm == "bandit":
            bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
            bandit_selects.append(bandit_current)
            current_id = [input_data["source_dict"]["id"][i] for (i, v) in enumerate(input_data["source_dict"]['country']) if v == bandit_current]
            current_id = random.choices(current_id, k = iter_samples)
        else:
            current_id = random.sample(input_data["idx_source"], k = iter_samples)
        current_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(data, input_data["idx_test"]), 
                                                          batch_size = 16, shuffle = True, num_workers = 0)
        net, acc = train(net, current_loader, target_test_loader , criteria, optimizer, n_epochs, scheduler)

        print("At iteration ", t, ", source country is ", bandit_current, ", acc is ", acc, "-------\n")

        accs += [acc]
        if algorithm == "bandit":
            alpha, beta = update_hyper_para(alpha, beta, t, accs,
                                            bandit_current
                                           )
        if not output_path is None:
            if t % 1 == 0:
                torch.save(net.state_dict(), output_path / Path(input_data["target_task"] + "_" + algorithm + ".pt" ))
    if not output_path is None:
        save_output(output_path / Path(input_data["target_task"] + "_" + algorithm + "_evaluation.csv" ), accs, accs)
    return net, bandit_selects, accs, alpha, beta, pi


In [14]:
_, bandit_selects, accs, alpha, beta, pi = bandit_selection(data, input_data, 
                                                            n_epochs = 1, n_it = 2,
                                                            algorithm = algorithm, iter_samples = 160,
                                                           output_path = output_path)

  return 1/(1+np.exp(-z))


Iteration Number 0 1.8680260181427002 seconds
validation accuracy : 0.25
At iteration  1 , source country is  Moldova , acc is  0.25 -------

