some misc changes

acl21 · acl21 · commit ac4e063d4360 · 2021-03-27T19:12:31.000+05:30
diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ Please see [`GETTING_STARTED`](docs/GETTING_STARTED.md) for brief installation i
 * [CIFAR10/100](https://www.cs.toronto.edu/~kriz/cifar.html)
 * [MNIST](http://yann.lecun.com/exdb/mnist/)
 * [SVHN](http://ufldl.stanford.edu/housenumbers/)
-* [TinyImageNet](https://www.kaggle.com/c/tiny-imagenet) (Download the zip file [here](http://cs231n.stanford.edu/tiny-imagenet-200.zip))
+* [Tiny ImageNet](https://www.kaggle.com/c/tiny-imagenet) (Download the zip file [here](http://cs231n.stanford.edu/tiny-imagenet-200.zip))
 
 
 ## Model Zoo
diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md
@@ -21,7 +21,7 @@ EXP_NAME: 'SOME_RANDOM_NAME'
 # Note that non-determinism may still be present due to non-deterministic
 # operator implementations in GPU operator libraries
 RNG_SEED: 1
-# GPU ID you want to execute the process on
+# GPU ID you want to execute the process on (this isn't working as of now, use the commands shown in this file below instead)
 GPU_ID: '3'
 DATASET:
   NAME: CIFAR10 # or CIFAR100, MNIST, SVHN, TinyImageNet
@@ -97,7 +97,7 @@ Please refer to `pycls/core/config.py` to configure your experiments at a deeper
 Once the config file is configured appropriately, perform active learning with the following command. 
 
 ```
-python tools/train_al.py \
+CUDA_VISIBLE_DEVICES=0 python tools/train_al.py \
     --cfg configs/cifar10/al/RESNET18_DBAL.yaml
 ```
 
@@ -106,14 +106,14 @@ python tools/train_al.py \
 Watch out for the ensemble options in the config file.
 
 ```
-python tools/ensemble_al.py \
+CUDA_VISIBLE_DEVICES=0 python tools/ensemble_al.py \
     --cfg configs/cifar10/al/RESNET18_ENSEMBLE.yaml
 ```
 
 ### Passive Learning
 
 ```
-python tools/train.py \
+CUDA_VISIBLE_DEVICES=0 python tools/train.py \
     --cfg configs/cifar10/train/RESNET18.yaml
 ```
 
@@ -122,7 +122,7 @@ python tools/train.py \
 Watch out for the ensemble options in the config file.
 
 ```
-python tools/ensemble_train.py \
+CUDA_VISIBLE_DEVICES=0 python tools/ensemble_train.py \
     --cfg configs/cifar10/train/RESNET18_ENSEMBLE.yaml
 ```
 
@@ -131,7 +131,7 @@ python tools/ensemble_train.py \
 This is useful if you want to evaluate a particular saved model. 
 
 ```
-python tools/test_model.py \
+CUDA_VISIBLE_DEVICES=0 python tools/test_model.py \
     --cfg configs/cifar10/evaluate/RESNET18.yaml
 ```
 
diff --git a/pycls/datasets/data.py b/pycls/datasets/data.py
@@ -243,7 +243,7 @@ def makeLUVSets(self, train_split_ratio, val_split_ratio, data, seed_id, save_di
         
         return f'{save_dir}/lSet.npy', f'{save_dir}/uSet.npy', f'{save_dir}/valSet.npy'
 
-    def makeTVSets(self, train_split_ratio, val_split_ratio, data, seed_id, save_dir):
+    def makeTVSets(self, val_split_ratio, data, seed_id, save_dir):
         """
         Initialize the train and validation sets by splitting the train data according to split_ratios arguments.
 
@@ -252,9 +252,6 @@ def makeTVSets(self, train_split_ratio, val_split_ratio, data, seed_id, save_dir
         |<------------- Train -------------><--- Validation --->
 
         INPUT:
-        train_split_ratio: Float, Specifies the proportion of data in train set.
-        For example: 0.8 means beginning 80% of data is training data.
-
         val_split_ratio: Float, Specifies the proportion of data in validation set.
         For example: 0.1 means ending 10% of data is validation data.
 
@@ -268,7 +265,6 @@ def makeTVSets(self, train_split_ratio, val_split_ratio, data, seed_id, save_dir
         torch.manual_seed(seed_id)
         np.random.seed(seed_id)
 
-        assert isinstance(train_split_ratio, float),"Train split ratio is of {} datatype instead of float".format(type(train_split_ratio))
         assert isinstance(val_split_ratio, float),"Val split ratio is of {} datatype instead of float".format(type(val_split_ratio))
         assert self.dataset in ["MNIST","CIFAR10","CIFAR100", "SVHN", "TINYIMAGENET"], "Sorry the dataset {} is not supported. Currently we support ['MNIST','CIFAR10', 'CIFAR100', 'SVHN', 'TINYIMAGENET']".format(self.dataset)
 
@@ -278,14 +274,9 @@ def makeTVSets(self, train_split_ratio, val_split_ratio, data, seed_id, save_dir
         n_dataPoints = len(data)
         all_idx = [i for i in range(n_dataPoints)]
         np.random.shuffle(all_idx)
-
-        train_splitIdx = int(train_split_ratio*n_dataPoints)
+        
         # To get the validation index from end we multiply n_datapoints with 1-val_ratio 
         val_splitIdx = int((1-val_split_ratio)*n_dataPoints)
-        #Check there should be no overlap with train and val data
-        assert train_split_ratio + val_split_ratio < 1.0, "Validation data over laps with train data as last train index is {} and last val index is {}. \
-            The program expects val index > train index. Please satisfy the constraint: train_split_ratio + val_split_ratio < 1.0; currently it is {} + {} is not < 1.0 => {} is not < 1.0"\
-                .format(train_splitIdx, val_splitIdx, train_split_ratio, val_split_ratio, train_split_ratio + val_split_ratio)
         
         trainSet = all_idx[:val_splitIdx]
         valSet = all_idx[val_splitIdx:]
diff --git a/pycls/datasets/tiny_imagenet.py b/pycls/datasets/tiny_imagenet.py
@@ -29,7 +29,7 @@ class TinyImageNet(datasets.ImageFolder):
     """
     def __init__(self, root: str, split: str = 'train', **kwargs: Any) -> None:
         self.root = root
-        assert self.check_root(), "Something is wrong with the Tiny ImageNet dataset. Download the official dataset zip from http://cs231n.stanford.edu/tiny-imagenet-200.zip and unzip it inside {}.".format(self.root)
+        assert self.check_root(), "Something is wrong with the Tiny ImageNet dataset path. Download the official dataset zip from http://cs231n.stanford.edu/tiny-imagenet-200.zip and unzip it inside {}.".format(self.root)
         self.split = datasets.utils.verify_str_arg(split, "split", ("train", "val"))
 
         wnid_to_classes = self.load_wnid_to_classes()
diff --git a/tools/ensemble_al.py b/tools/ensemble_al.py
@@ -109,10 +109,10 @@ def main(cfg):
     cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
     if not os.path.exists(cfg.OUT_DIR):
         os.mkdir(cfg.OUT_DIR)
-    # Create "DATASET" specific directory
+    # Create "DATASET/MODEL TYPE" specific directory
     dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE)
     if not os.path.exists(dataset_out_dir):
-        os.mkdir(dataset_out_dir)
+        os.makedirs(dataset_out_dir)
     # Creating the experiment directory inside the dataset specific directory 
     # all logs, labeled, unlabeled, validation sets are stroed here 
     # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed}
diff --git a/tools/ensemble_train.py b/tools/ensemble_train.py
@@ -106,13 +106,13 @@ def main(cfg):
     cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
     if not os.path.exists(cfg.OUT_DIR):
         os.mkdir(cfg.OUT_DIR)
-    # Create "DATASET" specific directory
-    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME)
+    # Create "DATASET/MODEL TYPE" specific directory
+    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE)
     if not os.path.exists(dataset_out_dir):
-        os.mkdir(dataset_out_dir)
+        os.makedirs(dataset_out_dir)
     # Creating the experiment directory inside the dataset specific directory 
     # all logs, labeled, unlabeled, validation sets are stroed here 
-    # E.g., output/CIFAR10/{timestamp or cfg.EXP_NAME based on arguments passed}
+    # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed}
     if cfg.EXP_NAME == 'auto':
         now = datetime.now()
         exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}'
@@ -140,8 +140,8 @@ def main(cfg):
     print("\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n".format(cfg.DATASET.NAME, train_size, test_size))
     logger.info("Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n".format(cfg.DATASET.NAME, train_size, test_size))
     
-    trainSet_path, valSet_path = data_obj.makeTVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_L_RATIO, \
-        val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR)
+    trainSet_path, valSet_path = data_obj.makeTVSets(val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, \
+                                seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR)
 
     trainSet, valSet = data_obj.loadTVPartitions(trainSetPath=trainSet_path, valSetPath=valSet_path)
 
diff --git a/tools/train.py b/tools/train.py
@@ -105,13 +105,13 @@ def main(cfg):
     cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
     if not os.path.exists(cfg.OUT_DIR):
         os.mkdir(cfg.OUT_DIR)
-    # Create "DATASET" specific directory
-    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME)
+        # Create "DATASET/MODEL TYPE" specific directory
+    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE)
     if not os.path.exists(dataset_out_dir):
-        os.mkdir(dataset_out_dir)
+        os.makedirs(dataset_out_dir)
     # Creating the experiment directory inside the dataset specific directory 
     # all logs, labeled, unlabeled, validation sets are stroed here 
-    # E.g., output/CIFAR10/{timestamp or cfg.EXP_NAME based on arguments passed}
+    # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed}
     if cfg.EXP_NAME == 'auto':
         now = datetime.now()
         exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}'
@@ -139,8 +139,8 @@ def main(cfg):
     print("\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n".format(cfg.DATASET.NAME, train_size, test_size))
     logger.info("Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n".format(cfg.DATASET.NAME, train_size, test_size))
     
-    trainSet_path, valSet_path = data_obj.makeTVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_L_RATIO, \
-        val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR)
+    trainSet_path, valSet_path = data_obj.makeTVSets(val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, \
+                                seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR)
 
     trainSet, valSet = data_obj.loadTVPartitions(trainSetPath=trainSet_path, valSetPath=valSet_path)
 
diff --git a/tools/train_al.py b/tools/train_al.py
@@ -110,7 +110,7 @@ def main(cfg):
     cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
     if not os.path.exists(cfg.OUT_DIR):
         os.mkdir(cfg.OUT_DIR)
-    # Create "DATASET" specific directory
+    # Create "DATASET/MODEL TYPE" specific directory
     dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE)
     if not os.path.exists(dataset_out_dir):
         os.makedirs(dataset_out_dir)