In [1]:
"""
In this notebook we will convert the CIFAR10 dataset into Pixano Format
and we will augment the dataset by one more class (guns) using the G

Note: For running, activate the pixano env
"""

'\nIn this notebook we will convert the MNIST dataset into Pixano Format\n\nNote: For running, activate the pixano env\n'

### NOTE : Before running this notebook, set the value for the following variables

In [2]:
# The root dir name of the current repo (i.e. pixano or pixano-main etc.)
ROOTDIR='pixano'
# name of the dataset
DATASET_NAME="CIFAR11_guns_pixano_v1"
# directory where the raw cifar11 dataset will be saved to be transformed latter (images), and also to be used by the active learning auto-annotator (labels)
datasets_dir="/home/melissap/Desktop/LAGO/3.githubs/integration/datasets"
# directory in which the transformed cifar11 dataset will be saved to be used by Pixano
library_dir="/home/melissap/_pixano_datasets_"

## ... the rest of the notebook should run without any code adjustments/modifications.

In [3]:
import os
import sys
from pathlib import Path
import shutil
import random
from PIL import Image
import numpy as np
import pandas as pd
from torchvision import datasets
from dowload_GunDataset import getGunDataset

In [4]:
def insertRootDir(ROOTDIR='pixano'):
    pardir=os.path.dirname(os.path.realpath('__file__'))

    found = False
    potential_root_dir = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__'))))))

    while(os.path.basename(pardir)!=ROOTDIR):

        # print(pardir)
        pardir=os.path.dirname(pardir)
        

        if (os.path.basename(pardir) == ROOTDIR):
            found = True
            break
        if (pardir == "/" ):
            break
    
    if found:
        print("Inserting parent dir : ",pardir)
        sys.path.insert(0,pardir)
        return pardir
    else:
        print(f"ROOTDIR NOT FOUND. You may have to change ROOTDIR variable from : '{ROOTDIR}' to '{potential_root_dir}'")
        return "_NOT_FOUND_"

ROOTDIR = insertRootDir(ROOTDIR)

Inserting parent dir :  /home/melissap/Desktop/LAGO_43integrationDemo/pixano


In [5]:
from pixano.apps import Explorer
from pixano.data import ImageImporter

/home/melissap/miniconda3/envs/pixano_env/lib/python3.10/site-packages/pixano/apps/explorer/dist/assets


In [6]:
random.seed( 1 )

# local directory sto store the raw dataset
cifar_dir=Path(os.path.join(datasets_dir,"CIFAR11"))

def get_CIFAR11(data_dir):

    framecounter = 0 

    image_dir = os.path.join(data_dir,"images")
    annotation_dir = os.path.join(data_dir,"annotations")
    train_imdir = os.path.join(image_dir,"train")
    val_imdir = os.path.join(image_dir,"val")
    test_imdir = os.path.join(image_dir,"test")
    
    raw_downloaDir = os.path.join(data_dir,"raw_dataset")
    
    train_anfile = os.path.join(annotation_dir,"train.csv")
    val_anfile = os.path.join(annotation_dir,"val.csv") # not used
    test_anfile = os.path.join(annotation_dir,"test.csv")
    
    # if True:
    if os.path.isdir(image_dir) and os.path.isdir(annotation_dir):
       pass
    else: 
        try:
            os.makedirs(image_dir)
            os.makedirs(annotation_dir)
            os.makedirs(train_imdir)
            os.makedirs(val_imdir)
            os.makedirs(test_imdir)
        except:
            pass

        # GET CIFAR10
        raw_tr = datasets.CIFAR10(raw_downloaDir, train=True, download=True)
        raw_te = datasets.CIFAR10(raw_downloaDir, train=False, download=True)
        CF_X_tr = raw_tr.data
        CF_Y_tr = raw_tr.targets
        CF_X_te = raw_te.data
        CF_Y_te = raw_te.targets

        destination_tar = os.path.join(data_dir,"gun-dataset.tar.gz")
        
        # download the guns dataset
        GN_dataset_dir = getGunDataset(destination_tar)
        GN_tr_dir = os.path.join(GN_dataset_dir,"train")
        GN_te_dir = os.path.join(GN_dataset_dir,"test")

        # convert guns dataset - read, resize, to_numpy
        GN_X_tr = [np.array(Image.open(os.path.join(GN_tr_dir,im)).resize((32,32), Image.Resampling.LANCZOS)) for im in os.listdir(GN_tr_dir)]
        GN_X_te = [np.array(Image.open(os.path.join(GN_te_dir,im)).resize((32,32), Image.Resampling.LANCZOS)) for im in os.listdir(GN_te_dir)]
        GN_Y_tr = [int(10) for im in range(len(GN_X_tr))] # 10 classes on CIFAR (0 .. 9). The new one, has one-hot encoding equal to number of 10 (the 11th class).
        GN_Y_te = [int(10) for im in range(len(GN_X_te))]

        # subsampling to make a uniform distribution across all classes, including the new class inserted (i.e. GUNs)
        cf_num_classes = 10
        tr_num_samples = len(CF_Y_tr) // cf_num_classes # == num samples per class on the training set
        tr_subsampling_idxs = random.sample(range(0, len(GN_Y_tr)-1), tr_num_samples ) # apply subsampling on the training set
        GN_X_tr = [GN_X_tr[i] for i in tr_subsampling_idxs]
        GN_Y_tr = [GN_Y_tr[i] for i in tr_subsampling_idxs]

        te_num_samples = len(CF_Y_te) // cf_num_classes # == 1000 samples per class on the testing set
        # Testing split requires no subsampling. There are 1000 num samples per class in the CIFAR, and 998 num of samples in the GUNsDataset. Subsampling is skipped.

        # merge the two datasets
        X_tr = [*CF_X_tr , *GN_X_tr]
        Y_tr = [*CF_Y_tr , *GN_Y_tr]
        X_te = [*CF_X_te , *GN_X_te]
        Y_te = [*CF_Y_te , *GN_Y_te]

        # shuffle before saving
        tr_shuffled_idxs = list(range(len(Y_tr)))
        te_shuffled_idxs = list(range(len(Y_te)))
        random.shuffle(tr_shuffled_idxs)
        random.shuffle(te_shuffled_idxs)
        X_tr = [X_tr[i] for i in tr_shuffled_idxs]
        Y_tr = [Y_tr[i] for i in tr_shuffled_idxs]
        X_te = [X_te[i] for i in te_shuffled_idxs]
        Y_te = [Y_te[i] for i in te_shuffled_idxs]

        # store frames
        for i in range(len(X_tr)):
            x=np.array(X_tr[i])
            im = Image.fromarray(x)
            savepath = os.path.join(train_imdir,str(framecounter)+".jpeg")
            im.save(savepath)
            framecounter+=1

        for i in range(len(X_te)):
            x=np.array(X_te[i])
            im = Image.fromarray(x)
            savepath = os.path.join(test_imdir,str(framecounter)+".jpeg")
            im.save(savepath)
            framecounter+=1

        # store annotations
        y=pd.DataFrame(np.array(Y_tr))
        y.to_csv(train_anfile,index=False)
            
        y=pd.DataFrame(np.array(Y_te))
        y.to_csv(test_anfile,index=False)

        # remove raw data
        shutil.rmtree(raw_downloaDir)
        shutil.rmtree(GN_dataset_dir)
        print(f'Dataset succesfull downoladed within {data_dir}, framecounter = {framecounter}')

In [7]:

# Dataset information
name = "CIFAR11 dataset"
description = "CIFAR10 : https://www.cs.toronto.edu/~kriz/cifar.html , Gun Detection Dataset : https://www.linksprite.com/gun-detection-datasets/"
splits = ["train", "test"] # "val",

# Input information
input_dirs = {
    "image": cifar_dir / "images" #,
    # "objects": library_dir / "annotations",
}

library_dir=Path(library_dir)
import_dir = library_dir / DATASET_NAME

get_CIFAR11(cifar_dir)

In [8]:
help(ImageImporter.import_dataset)

Help on function import_dataset in module pixano.data.importers.importer:

import_dataset(self, input_dirs: dict[str, pathlib.Path], import_dir: pathlib.Path, portable: bool = False) -> pixano.data.dataset.Dataset
    Import dataset to Pixano format
    
    Args:
        input_dirs (dict[str, Path]): Input directories
        import_dir (Path): Import directory
        portable (bool, optional): True to copy or download files to import directory and use relative paths. Defaults to False.
    
    Returns:
        Dataset: Imported dataset



In [9]:
importer = ImageImporter(name, description, splits)
importer.import_dataset(input_dirs, import_dir, portable=True)

[2023-12-01T12:26:34Z WARN  lance::dataset] No existing dataset at /home/melissap/_pixano_datasets_/CIFAR11_guns_pixano_v1/db.lance, it will be created
[2023-12-01T12:26:34Z WARN  lance::dataset] No existing dataset at /home/melissap/_pixano_datasets_/CIFAR11_guns_pixano_v1/image.lance, it will be created


Importing dataset: 0it [00:00, ?it/s]

Copying media directories:   0%|          | 0/1 [00:00<?, ?it/s]

Creating dataset info file:   0%|          | 0/1 [00:00<?, ?it/s]

Creating dataset thumbnail:   0%|          | 0/1 [00:00<?, ?it/s]

<pixano.data.dataset.Dataset at 0x7f5447f60c10>

### !ERROR: Here we found and issue. explorer doesn't return a localhost port for opening Pixano GUI. A fix is required.

In [10]:
# explorer = Explorer(library_dir)
# explorer.display()