<a href="https://colab.research.google.com/github/buganart/BUGAN/blob/master/notebook_util/pre_data_augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Before starting please save the notebook in your drive by clicking on `File -> Save a copy in drive`

In [None]:
#@markdown Mount google drive.
from google.colab import output
from google.colab import drive
drive.mount('/content/drive')

# Check if we have linked the folder
from pathlib import Path
if not Path("/content/drive/My Drive/IRCMS_GAN_collaborative_database").exists():
    print(
        "Shortcut to our shared drive folder doesn't exits.\n\n"
        "\t1. Go to the google drive web UI\n"
        "\t2. Right click shared folder IRCMS_GAN_collaborative_database and click \"Add shortcut to Drive\""
    )

In [None]:
#@title Configure dataset
#@markdown Enter dataset location.  
#@markdown - For example via the file browser on the left to locate and right click to copy the path.)
#@markdown - zipfile example: `/content/drive/My Drive/h/k/a.zip`
#@markdown - file folder example: `/content/drive/My Drive/h/k`
#@markdown - if data_location_option is not empty, data_location_option will overwrite data_location_default
data_location_default = "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Research/Peter/Tree_3D_models_obj_auto_generated/sessions/simplified/tree-session-2020-09-14_23-23-Friedrich_2-target-face-num-1000.zip" #@param ["/content/drive/My Drive/IRCMS_GAN_collaborative_database/Research/Peter/Tree_3D_models_obj_auto_generated/sessions/simplified/tree-session-2020-09-14_23-23-Friedrich_2-target-face-num-1000.zip", "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Research/Peter/Tree_3D_models_obj_auto_generated/sessions/simplified/tree-sessions-2020-09-10-simplified-26k-target-face-num-1000.zip", "/content/drive/My Drive/Hand-Tool-Data-Set/turbosquid_thingiverse_dataset/dataset_ply_out_zipped.zip", "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Research/Peter/Chairs_Princeton/chair_train.zip", "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Research/Peter/Tree_3D_models_obj_auto_generated/sessions/simplified/tree-sessions-2020-09-10-simplified-26k-target-face-num-1000-class-label.zip"] 
data_location_option = "" #@param {type:"string"}
#@markdown Enter save location and file name of the processed npy file.
#@markdown - if save_location is empty, will save to the same folder specified by the data_location  
save_location = "" #@param {type:"string"}

#@markdown Data augmentation Config
#@markdown - enter the scale of data augmentation
#@markdown - if dataset_size_scale = 2, the final dataset size = len(dataset) * dataset_size_scale
dataset_size_scale = 2    #@param {type:"integer"}
#@markdown - choose rotation augmentation
aug_rotation_type = "random rotation"  #@param ["random rotation", "axis rotation"]
#@markdown - specify the rotation axis [x,y,z] (only for aug_rotation_type = "axis rotation")
rotation_axis_x = 0    #@param {type:"number"}
rotation_axis_y = 1    #@param {type:"number"}
rotation_axis_z = 0    #@param {type:"number"}

#@markdown - resolution of the voxelized array (shape resolution**3)
resolution = "32"    #@param [32, 64]

#@markdown - For conditional dataset
#@markdown - maximum number of classes to extract based on the data_location path
#@markdown - If the dataset to be processed is unconditional, please manually set this to 0
num_classes = 0 #@param {type:"integer"}


#adjust parameter datatype
resolution = int(resolution)
if data_location_option:
    data_location = data_location_option
else:
    data_location = data_location_default
if data_location.endswith(".zip"):
    dataset = Path(data_location).stem
else:
    dataset = "dataset_array_custom"
data_augmentation = True

if not save_location:
    filename = f"{dataset}_res{resolution}_aug{dataset_size_scale}"
    if num_classes > 0:
        filename = filename + f"_c{num_classes}.npz"
    else:
        filename = filename + ".npy"
    save_location = Path(data_location).parent / filename

colab_config = {
    "aug_rotation_type": aug_rotation_type,
    "data_augmentation": data_augmentation,
    "aug_rotation_axis": (rotation_axis_x,rotation_axis_y,rotation_axis_z),
    "data_location": data_location,
    "dataset": dataset,
    "resolution": resolution,
    "num_classes": num_classes,
    "save_location":save_location,
}

for k, v in colab_config.items():
    print(f"=> {k:20}: {v}")


# To just train a model, no edits should be required in any cells below.

In [None]:
import numpy as np
from pathlib import Path
dataset_path = Path(data_location)

from argparse import Namespace
config = Namespace(**colab_config)
config.seed = 1234
config.batch_size = 32

print("loading BUGAN package latest")
%pip install --upgrade git+https://github.com/buganart/BUGAN.git#egg=bugan
output.clear()

from bugan.trainPL import setup_datamodule

In [None]:
dataModule = setup_datamodule(config, tmp_folder="/tmp/")
dataModule.prepare_data()
dataModule.setup()
dataloader = dataModule.train_dataloader()
num_classes = dataModule.num_classes

In [None]:
final_data_array = []
final_index_array = []

for i in range(dataset_size_scale):
    if num_classes is None:
        for dataset_batch in dataloader:
            #len of shape of data should be around 4 - 6
            dataset_batch = dataset_batch.detach().cpu().numpy()
            if len(dataset_batch.shape) > 5:
                dataset_batch = dataset_batch[:,0]
            elif len(dataset_batch.shape) < 5:
                dataset_batch = dataset_batch[:,np.newaxis,:,:,:]
            final_data_array.append(dataset_batch)
    else:
        for dataset_batch, dataset_index in dataloader:
            #len of shape of data should be around 4 - 6
            dataset_batch = dataset_batch.detach().cpu().numpy()
            if len(dataset_batch.shape) > 5:
                dataset_batch = dataset_batch[:,0]
            elif len(dataset_batch.shape) < 5:
                dataset_batch = dataset_batch[:,np.newaxis,:,:,:]
            final_data_array.append(dataset_batch)
            final_index_array.append(dataset_index)

# concatenate all the data samples
final_data_array = np.concatenate(final_data_array, axis=0)
print("final_data_array.shape:", final_data_array.shape)

#save to save_location
if num_classes is None:
    np.save(save_location, final_data_array)
else:
    final_index_array = np.concatenate(final_index_array, axis=0)
    print("final_index_array.shape:", final_index_array.shape)
    np.savez(save_location, data=final_data_array, index=final_index_array,class_list=dataModule.class_list)