In [5]:
import os
import shutil
import random
# Set the seed for reproducibility
random.seed(42)

# Prep dataset

In [None]:
data_path = 'dataset/caltech.zip'

if os.path.isfile(data_path):
  print('Dataset zip already exists')
else:
  print('Downloading N-Caltech101 dataset...')
  !mkdir dataset
  !cd dataset && wget -O caltech.zip https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/cy6cvx3ryv-1.zip

In [None]:
### Go to `dataset` folder
%cd dataset/

In [None]:
### Unzip main zip
!unzip caltech.zip

In [None]:
### Unzip the actual dataset Caltech101:
!unzip cy6cvx3ryv-1/Caltech101.zip

In [None]:
### Unzip the annotations of Caltech101:
!unzip cy6cvx3ryv-1/Caltech101_annotations.zip

In [10]:
### Make the new ncaltech101 main directory
import os
import shutil
import random
# Set the seed for reproducibility
random.seed(42)

!rm -rf ncaltech101

original_dir = 'Caltech101'
new_dir = 'ncaltech101'

if os.path.exists(new_dir):
    print('The directory `ncaltech101` already exists. Please delete it and try again.')
else:
  os.mkdir(new_dir)

In [11]:
### Make the train, validation, test, raw, annotations directories.
train_dir = os.path.join(new_dir, 'training')
val_dir = os.path.join(new_dir, 'validation')
test_dir = os.path.join(new_dir, 'test')
raw_dir = os.path.join(new_dir, 'raw')
ann_dir = os.path.join(new_dir, 'annotations')

os.mkdir(train_dir)
os.mkdir(val_dir)
os.mkdir(test_dir)
os.mkdir(raw_dir)
os.mkdir(ann_dir)

In [12]:
### Copy annotations into ncaltech101/annotations dir.
!cp -a 'Caltech101_annotations/.' 'ncaltech101/annotations'

In [13]:
### Make a directory for each class in the train, validation, test and raw directories.
### At the same time, get the number of images in each class.
class_counts = {}

class_folders = os.listdir(original_dir)[:3]

for class_dir in class_folders:
    os.mkdir(os.path.join(train_dir, class_dir))
    os.mkdir(os.path.join(val_dir, class_dir))
    os.mkdir(os.path.join(test_dir, class_dir))
    os.mkdir(os.path.join(raw_dir, class_dir))

    class_counts[class_dir] = len(os.listdir(os.path.join(original_dir, class_dir)))

print (class_counts)

{'crocodile': 50, 'lotus': 66, 'menorah': 87}


In [14]:
### Copy the data into the train, validation and test directories (80%, 10%, 10%)
for class_dir in class_folders:
    images = os.listdir(os.path.join(original_dir, class_dir))
    random.shuffle(images)

    train_images = images[:int(0.4 * class_counts[class_dir])]
    val_images = images[int(0.4 * class_counts[class_dir]):int(0.45 * class_counts[class_dir])]
    test_images = images[int(0.45 * class_counts[class_dir]):int(0.5 * class_counts[class_dir])]

    for image in train_images:
        shutil.copyfile(os.path.join(original_dir, class_dir, image), os.path.join(train_dir, class_dir, image))

    for image in val_images:
        shutil.copyfile(os.path.join(original_dir, class_dir, image), os.path.join(val_dir, class_dir, image))

    for image in test_images:
        shutil.copyfile(os.path.join(original_dir, class_dir, image), os.path.join(test_dir, class_dir, image))

In [15]:
### Check that the data has been split correctly
for split in [train_dir, val_dir, test_dir]:
    total = 0
    for class_dir in os.listdir(split):
        total += len(os.listdir(os.path.join(split, class_dir)))
    print(split, total)

ncaltech101/training 80
ncaltech101/validation 10
ncaltech101/test 11


In [None]:
%cd ..

In [None]:
%ls dataset/ncaltech101

# Pre-processing

In [22]:
!pip install -e .

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///home/andrei-carlo/aegnn
Installing collected packages: aegnn
  Attempting uninstall: aegnn
    Found existing installation: aegnn 0.0.0
    Uninstalling aegnn-0.0.0:
      Successfully uninstalled aegnn-0.0.0
  Running setup.py develop for aegnn
Successfully installed aegnn-0.0.0


In [16]:
! CUDA_VISIBLE_DEVICES=0 python scripts/preprocessing.py --dataset ncaltech101 --num-workers 0

Global seed set to 12345
100%|███████████████████████████████████████████| 80/80 [00:08<00:00,  9.61it/s]
100%|███████████████████████████████████████████| 10/10 [00:00<00:00, 17.71it/s]


In [None]:
! python scripts/train.py graph_res --task recognition --dataset ncaltech101 --gpu 0 --batch-size 8 --dim 3 --num-workers 0

In [23]:
! CUDA_VISIBLE_DEVICES=0 python evaluation/flops.py --device cuda

  0%|                                                   | 0/100 [00:00<?, ?it/s]Data pos shape:  torch.Size([25000, 3])
Data x   shape:  torch.Size([25000, 1])
  self.pool7 = MaxPoolingX(input_shape[:2] // 4, size=16)
  0%|                                                   | 0/100 [00:06<?, ?it/s]
Traceback (most recent call last):
  File "evaluation/flops.py", line 131, in <module>
    run_experiments(data_module, arguments, experiments=event_counts, num_trials=100,
  File "evaluation/flops.py", line 106, in run_experiments
    model = create_and_run_model(dm, num_events, index=exp_id, args=args, device=device, **model_kwargs)
  File "evaluation/flops.py", line 61, in create_and_run_model
    model = aegnn.asyncronous.make_model_asynchronous(model, args.radius, list(dm.dims), edge_attr, **kwargs)
  File "/home/andrei-carlo/aegnn/aegnn/asyncronous/__init__.py", line 54, in make_model_asynchronous
    module._modules[key] = make_conv_asynchronous(nn, r=r, edge_attributes=edge_attributes