### Init ray

In [None]:
import ray
ray.init(address="auto", ignore_reinit_error=True)

### Load default config

In [1]:
# load defaulft config
import yaml
import os
import time
import pandas as pd
import random

config_path = './configs/Active_v0.yml'

with open(config_path) as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    config = yaml.load(file, Loader=yaml.FullLoader)

# create base dir and gr
if os.path.exists(config["PROJECT"]["project_dir"]) is False:
    os.mkdir(config["PROJECT"]["project_dir"])

if os.path.exists(config["PROJECT"]["group_dir"]) is False:
    os.mkdir(config["PROJECT"]["group_dir"])

### Get the data to annotate

For DataNet we should pass the list of the annotated images

In [2]:
# Get the data to annotate

#############################################################################################
# LOAD DATA
#############################################################################################
from data_utils import CIFAR10Data
# Load data
cifar10_data = CIFAR10Data()
num_classes = len(cifar10_data.classes)
x_train, y_train, x_test, y_test = cifar10_data.get_data(subtract_mean=True)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


CIFAR10 Training data shape: (50000, 32, 32, 3)
CIFAR10 Training label shape (50000, 1)
CIFAR10 Test data shape (10000, 32, 32, 3)
CIFAR10 Test label shape (10000, 1)


In [3]:
indices = list(range(len(x_train)))
random.shuffle(indices)
labeled_set = indices[:config["RUNS"]["ADDENDUM"] ]
unlabeled_set = indices[config["RUNS"]["ADDENDUM"] :]

In [4]:
# test with all the images
NUM_IMAGES_TEST = len(x_test)
# Initialize a labeled dataset by randomly sampling K=ADDENDUM=1,000 data points from the entire dataset.
test_set = list(range(NUM_IMAGES_TEST))

In [5]:
config["NETWORK"]["INPUT_SIZE"] =  x_train[0].shape[0]
config["NETWORK"]["CLASSES"] = cifar10_data.classes

In [6]:
config

{'ACTIVE_ALGO': {'LOSSLEARNING': 1.0},
 'DATASET': {'height_shift_range': 4,
  'horizontal_flip': True,
  'width_shift_range': 4},
 'NETWORK': {'CLASSES': ['plane',
   'car',
   'bird',
   'cat',
   'deer',
   'dog',
   'frog',
   'horse',
   'ship',
   'truck'],
  'INPUT_SIZE': 32,
  'MARGIN': 1.0,
  'embedding_size': 128},
 'PROJECT': {'Backbone': 'resnet18',
  'dataset_name': 'CIFAR',
  'group': 'Active_Learning_v1',
  'group_dir': '/mnt/Ressources/Andres/Temp_active/runs/Active_Learning_v1',
  'project': 'Active_Learning_CIFAR',
  'project_dir': '/mnt/Ressources/Andres/Temp_active/runs',
  'source': 'CIFAR'},
 'RUNS': {'ADDENDUM': 1000,
  'CYCLES': 10,
  'SUBSET': -1,
  'TRIALS': 1,
  'test_each': 5},
 'TEST': {'batch_size': 128},
 'TRAIN': {'Data_augementation': True,
  'EPOCH_SLIT': 80,
  'EPOCH_WARMUP': 2,
  'EPOCH_WHOLE': 120,
  'MILESTONES': [160],
  'batch_size': 128,
  'gamma': 0.1,
  'lr': 0.001,
  'start_epoch': 0,
  'transfer_weight_path': False,
  'w_c_loss': 1.0,
  'w_l

In [55]:
stages = [int(i.split('_')[-1]) for i in os.listdir(config['PROJECT']['group_dir']) if os.path.isdir(os.path.join(config['PROJECT']['group_dir'],i)) and i.startswith('Stage')]
stages.sort()
stages = {i:[] for i in stages}

In [62]:
for stage in stages.keys():
    path = os.path.join(config['PROJECT']['group_dir'],"Stage_"+str(stage),'checkpoint')
    path_check_point_file = os.path.join(path,'checkpoint')
    temp  =pd.read_csv(path_check_point_file,sep="\"",header=None,names=['1','paths','n'])
    epochs_checked = [int(i.split('-')[-1]) for i in set(temp.paths.values)]
    epochs_checked.sort()
    stages[stage] = epochs_checked
        

In [63]:
for num_run in stages.keys():
    for epoch in stages[num_run]:
        print(num_run,epoch)


0 5
0 10
0 15
0 20
0 25
0 30
0 35
0 40
0 45
0 50
0 55
0 60
0 65
0 70
0 75
0 80
0 85
0 90
0 95
0 100
0 105
0 110
0 115
0 120
0 125
0 130
0 135
0 140
0 145
0 150
0 155
0 160
0 165
0 170
0 175
0 180
0 185
0 190
0 195
0 200
1 5
1 10
1 15
1 20
1 25
1 30
1 35
1 40
1 45
1 50
1 55
1 60
1 65
1 70
1 75
1 80
1 85
1 90
1 95
1 100
1 105
1 110
1 115
1 120
1 125
1 130
1 135
1 140
1 145
1 150
1 155
1 160
1 165
1 170
1 175
1 180
1 185
1 190
1 195
1 200
2 5
2 10
2 15
2 20
2 25
2 30
2 35
2 40
2 45
2 50
2 55
2 60
2 65
2 70
2 75
2 80
2 85
2 90
2 95
2 100
2 105
2 110
2 115
2 120
2 125
2 130
2 135
2 140
2 145
2 150
2 155
2 160
2 165
2 170
2 175
2 180
2 185
2 190
2 195
2 200
3 5
3 10
3 15
3 20
3 25
3 30
3 35
3 40
3 45
3 50
3 55
3 60
3 65
3 70
3 75
3 80
3 85
3 90
3 95
3 100
3 105
3 110
3 115
3 120
3 125
3 130
3 135
3 140
3 145
3 150
3 155
3 160
3 165
3 170
3 175
3 180
3 185
3 190
3 195
3 200
4 5
4 10
4 15
4 20
4 25
4 30
4 35
4 40
4 45
4 50
4 55
4 60
4 65
4 70
4 75
4 80
4 85
4 90
4 95
4 100
4 105
4 110
4 115
4 

In [40]:

epochs_checked

[5,
 10,
 15,
 20,
 25,
 30,
 35,
 40,
 45,
 50,
 55,
 60,
 65,
 70,
 75,
 80,
 85,
 90,
 95,
 100,
 105,
 110,
 115,
 120,
 125,
 130,
 135,
 140,
 145,
 150,
 155,
 160,
 165,
 170,
 175,
 180,
 185,
 190,
 195,
 200]

### Set Configuration from the data

In [None]:
from train_agent_cifar import Active_Learning_train
from inference_agent_cifar import Active_Learning_inference

In [None]:
num_run =0

In [None]:
initial_weight_path = os.path.join(config['PROJECT']['group_dir'],'Stage_'+str(num_run-1),'checkpoint','epoch200.ckpt-200')

In [None]:
initial_weight_path

In [None]:
num_run = 0
for num_run in range(6,10):
    if num_run==0:
        initial_weight_path = False
    else:
        initial_weight_path = os.path.join(config['PROJECT']['group_dir'],'Stage_'+str(num_run-1),'checkpoint','epoch200.ckpt-200')
        
        
    NetworkActor =  Active_Learning_train.remote(config, labeled_set, test_set,  num_run, initial_weight_path)
    NetworkActor.start_training.remote()
    
    # Wait util the model is training
    while True:
        time.sleep(10)
        try:
            progress_id = NetworkActor.isTraining.remote()
            response = ray.get(progress_id)
            break
        except:
            pass
        
    # wait until the model finish training
    while True:
        time.sleep(10)
        progress_id = NetworkActor.isTraining.remote()
        response = ray.get(progress_id)
        if not response:
            break
    
    NetworkActor.__ray_terminate__.remote()
    
    del NetworkActor
    
    if config["TRAIN"]["w_l_loss"] > 0:
        
        weight_file = os.path.join(config['PROJECT']['group_dir'],'Stage_'+str(num_run),'checkpoint','epoch200.ckpt-200')
        
        AL_inference = Active_Learning_inference.remote( config, unlabeled_set, num_run, weight_file)
        AL_inference.evaluate.remote()

        run_dir   = os.path.join(config["PROJECT"]["group_dir"],"Stage_"+str(num_run))
        ordered_indexes   = os.path.join(run_dir, "ordered_indexes.csv")

        # wait the file qith the scores is generated
        while True:
            time.sleep(10)
            if os.path.isfile(ordered_indexes):
                break

        # read the scores file and create the new labeled set and unlabeled set to repeat the trainig
        pd_ordered_indexes = pd.read_csv(ordered_indexes)
        new_annotated_data = list(pd_ordered_indexes.iloc[:config["RUNS"]["ADDENDUM"]]['indexes'].to_numpy())
        labeled_set += new_annotated_data
        unlabeled_set =  list(pd_ordered_indexes.iloc[config["RUNS"]["ADDENDUM"]:]['indexes'].to_numpy())
    else :
        num_images = (num_run+2)*config["RUNS"]["ADDENDUM"]
        labeled_set = indices[: num_images]
        unlabeled_set = indices[num_images :]