#Notebook configurations

In [2]:
CLONE = True #@param {type:"boolean"}


In [3]:
PULL = False #@param {type:"boolean"}


In [4]:
GDRIVE = False #@param {type:"boolean"}


In [5]:
MLFLOW = False #@param {type:"boolean"}


# Configure DAGsHub, GitHub and Git

In [6]:
import requests
import getpass
import datetime
import os

**Set Environment Variables - DAGsHub**


In [7]:
#@title Enter the DAGsHub repository owner name:

DAGSHUB_REPO_OWNER= "jinensetpal" #@param {type:"string"}

In [8]:
#@title Enter the DAGsHub repository name:

DAGSHUB_REPO_NAME= "panoptic-reproducibility" #@param {type:"string"}

In [9]:
#@title Enter the username of your DAGsHub account:

DAGSHUB_USER_NAME = "" #@param {type:"string"}

**Set Environment Variables - GitHub**


In [10]:
#@title Enter the GitHub repository owner name:

GITHUB_REPO_OWNER= "jinensetpal" #@param {type:"string"}

In [11]:
#@title Enter the GitHub repository name:

GITHUB_REPO_NAME= "panoptic-reproducibility" #@param {type:"string"}

In [12]:
#@title Enter the GitHub repository name:

BRANCH= "nir/pipeline" #@param {type:"string"}

In [13]:
#@title Enter the username of your GitHub account:

GITHUB_USER_NAME = "" #@param {type:"string"}

In [14]:
#@title Enter the email for your GitHub account:

GITHUB_EMAIL = "" #@param {type:"string"}

In [16]:
GITHUB_TOKEN = getpass.getpass('Please enter your GitHub token or password: ')
DAGSHUB_TOKEN = getpass.getpass('Please enter your DAGsHub token or password: ')

In [None]:
from google.colab import drive
if GDRIVE:
  drive.mount('/content/drive')
  %cd /content/drive/MyDrive

**Configure Git**

In [None]:
!git config --global user.email {GITHUB_EMAIL}
!git config --global user.name {GITHUB_USER_NAME}

**Clone the Repository**

In [None]:
if CLONE:
  !git clone -b {BRANCH} https://{GITHUB_USER_NAME}:{GITHUB_TOKEN}@github.com/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}.git
  %cd {GITHUB_REPO_NAME}
if PULL:
  !git pull

Cloning into 'panoptic-reproducibility'...
remote: Enumerating objects: 191, done.[K
remote: Counting objects: 100% (191/191), done.[K
remote: Compressing objects: 100% (148/148), done.[K
remote: Total 191 (delta 52), reused 162 (delta 34), pack-reused 0[K
Receiving objects: 100% (191/191), 27.63 MiB | 37.47 MiB/s, done.
Resolving deltas: 100% (52/52), done.
/content/panoptic-reproducibility


**Install Requirements**

In [None]:
!pip install --upgrade pip --quiet

!pip install -r requirements.txt --quiet

[K     |████████████████████████████████| 1.6MB 5.0MB/s 
[K     |████████████████████████████████| 472 kB 6.7 MB/s 
[K     |████████████████████████████████| 637 kB 42.2 MB/s 
[K     |████████████████████████████████| 46 kB 3.2 MB/s 
[K     |████████████████████████████████| 78 kB 6.5 MB/s 
[K     |████████████████████████████████| 170 kB 60.8 MB/s 
[K     |████████████████████████████████| 44 kB 2.2 MB/s 
[K     |████████████████████████████████| 207 kB 54.2 MB/s 
[K     |████████████████████████████████| 40 kB 4.7 MB/s 
[K     |████████████████████████████████| 296 kB 69.1 MB/s 
[K     |████████████████████████████████| 76 kB 4.5 MB/s 
[K     |████████████████████████████████| 108 kB 70.2 MB/s 
[K     |████████████████████████████████| 115 kB 67.1 MB/s 
[K     |████████████████████████████████| 49 kB 4.9 MB/s 
[K     |████████████████████████████████| 529 kB 48.7 MB/s 
[K     |████████████████████████████████| 4.6 MB 31.6 MB/s 
[K     |███████████████████████████████

**Configure DVC**

In [None]:
# Import DVC package - relevant only when working in a Colab environment
import dvc

if CLONE:
  # configure dvc
  # Set DVC remote storage as 'DAGsHub storage'
  !dvc remote add origin --local https://dagshub.com/{DAGSHUB_REPO_OWNER}/{DAGSHUB_REPO_NAME}.dvc

  # General DVC configuration
  !dvc remote modify --local origin auth basic
  !dvc remote modify --local origin user {DAGSHUB_USER_NAME}
  !dvc remote modify --local origin password {DAGSHUB_TOKEN}

if PULL:
  !dvc pull -r origin #&> /dev/null

  #Make sure that all files were pulled
  !dvc pull

**Configure MLflow**

In [None]:
if MLFLOW:
  !pip install mlflow --quiet

  import mlflow

  os.environ['MLFLOW_TRACKING_USERNAME'] = DAGSHUB_USER_NAME
  os.environ['MLFLOW_TRACKING_PASSWORD'] = DAGSHUB_TOKEN

  mlflow.set_tracking_uri(f'https://dagshub.com/{DAGSHUB_REPO_OWNER}/{DAGSHUB_REPO_NAME}.mlflow')

# Playground

### Imports

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

import glob
import os

### const

In [None]:
BASE_PATH = os.getcwd()
BASE_DATA_PATH = os.path.join(BASE_PATH, "data/example/cityscapes")

IMG_SIZE = (1024, 2048)
IMG_SHAPE = IMG_SIZE + (3,)
SEED_TRAIN = 1
SEED_TEST = 2
SEED_VAL = 3

### Loading images from directory

**Assumptions**:
- The data set is too big to load to the run time. Therefore, we will use a generator to yield the data from the directory in batches to the model.

**Implementation:**
- The implementation is based on the [TensorFlow docs](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator) (under "Example of transforming images and masks together." section)
- We can't use the `ImageDataGenerator.flow_from_directory` method due to the following reasons:
  - For every image we fit the model the target image is combined out of 3 images (`gtFine_color`, `gtFine_instanceIds`, `gtFine_labelIds`). However, this method is capable of reading only one target image at a time.

  - The structure of the dataset is not fitted to the needs of this method:
    - It's not separated into categories
    - The targe is combined out of three different images. The method is not capable of batching multiple targets for the same input image.

- Due to the above, we will use the `flow_from_dataframe` method. We will create a pandas data frame with the path to the input images (`*_X_path_df`). Based on the name of the images, we will create additional three data frames (`*_gtFine_color_path_df`, `*_gtFine_instanceIds_path_df`, `*_gtFine_labelIds_path_df`) that will correspond with the index of `*_X_path_df` 

<br>

**Additional implementation options:**
- [Custemize the data generator function](https://medium.com/analytics-vidhya/write-your-own-custom-data-generator-for-tensorflow-keras-1252b64e41c3)

1. Get all the paths to images

In [None]:
# TODO: write test that checks that all the df are correlated

# Input
train_X_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"leftImg8bit/train/*/*"))})
test_X_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"leftImg8bit/test/*/*"))})
val_X_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"leftImg8bit/val/*/*"))})

# GT Train
train_gtFine_color_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/train/*/*color*"))}) 
train_gtFine_instanceIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/train/*/*instanceIds*"))}) 
train_gtFine_labelIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/train/*/*labelIds*"))})

# GT Test
test_gtFine_color_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/test/*/*color*"))}) 
test_gtFine_instanceIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/test/*/*instanceIds*"))}) 
test_gtFine_labelIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/test/*/*labelIds*"))})

# GT Validation
val_gtFine_color_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/val/*/*color*"))}) 
val_gtFine_instanceIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/val/*/*instanceIds*"))}) 
val_gtFine_labelIds_path_df = pd.DataFrame({"filename":glob.glob(os.path.join(BASE_DATA_PATH,"gtFine/val/*/*labelIds*"))})

2. Create a `ImageDataGenerator` instance for every sub set

In [None]:
# create ImageDataGenerator instances with the same arguments

# TODO: Edit the augmentation based on the paper

data_gen_args = dict(#featurewise_center=True,
                     #featurewise_std_normalization=True,
                     rotation_range=90,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     zoom_range=0.2)

# Input
train_X_datagen = ImageDataGenerator(**data_gen_args)
test_X_datagen = ImageDataGenerator(**data_gen_args)
val_X_datagen = ImageDataGenerator(**data_gen_args)

# GT Train
train_gtFine_color_datagen = ImageDataGenerator(**data_gen_args)
train_gtFine_instanceIds_datagen = ImageDataGenerator(**data_gen_args)
train_gtFine_labelIds_datagen = ImageDataGenerator(**data_gen_args)

# GT Test
test_gtFine_color_datagen = ImageDataGenerator(**data_gen_args)
test_gtFine_instanceIds_datagen = ImageDataGenerator(**data_gen_args)
test_gtFine_labelIds_datagen = ImageDataGenerator(**data_gen_args)

# GT Validation
val_gtFine_color_datagen = ImageDataGenerator(**data_gen_args)
val_gtFine_instanceIds_datagen = ImageDataGenerator(**data_gen_args)
val_gtFine_labelIds_datagen = ImageDataGenerator(**data_gen_args)

3. Apply the `flow_from_dataframe` method for every sub set with its corresponding paths data frame

In [None]:
# Provide the same seed and keyword arguments to the fit and flow methods

# Input
train_X_generator = train_X_datagen.flow_from_dataframe(
    train_X_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None, 
    batch_size=2, seed=SEED_TRAIN,weight_col=None)

test_X_generator = test_X_datagen.flow_from_dataframe(
    test_X_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
    batch_size=2, seed=SEED_TEST,weight_col=None)

val_X_generator = val_X_datagen.flow_from_dataframe(
    val_X_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
    batch_size=2, seed=SEED_VAL,weight_col=None)


# GT Train
train_gtFine_color_generator = train_gtFine_color_datagen.flow_from_dataframe(
  train_gtFine_color_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TRAIN,weight_col=None)

train_gtFine_instanceIds_generator = train_gtFine_instanceIds_datagen.flow_from_dataframe(
  train_gtFine_instanceIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TRAIN,weight_col=None)

train_gtFine_labelIds_generator = train_gtFine_labelIds_datagen.flow_from_dataframe(
  train_gtFine_labelIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TRAIN,weight_col=None)

# GT Test
test_gtFine_color_generator = test_gtFine_color_datagen.flow_from_dataframe(
  test_gtFine_color_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TEST,weight_col=None)

test_gtFine_instanceIds_generator = test_gtFine_instanceIds_datagen.flow_from_dataframe(
  test_gtFine_instanceIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TEST,weight_col=None)

test_gtFine_labelIds_generator = test_gtFine_labelIds_datagen.flow_from_dataframe(
  test_gtFine_labelIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_TEST,weight_col=None)

# GT Validation
val_gtFine_color_generator = val_gtFine_color_datagen.flow_from_dataframe(
  val_gtFine_color_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_VAL,weight_col=None)

val_gtFine_instanceIds_generator = val_gtFine_instanceIds_datagen.flow_from_dataframe(
  val_gtFine_instanceIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_VAL,weight_col=None)

val_gtFine_labelIds_generator = val_gtFine_labelIds_datagen.flow_from_dataframe(
  val_gtFine_labelIds_path_df, y_col=None, target_size=IMG_SIZE,class_mode=None,
  batch_size=2, seed=SEED_VAL,weight_col=None)

Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.
Found 4 validated image filenames.


4. Zip the GT, the target generators, that will be iterated by the model, e.g.:

- train_gtFine_color_generator = [1,2,3]
- train_gtFine_instanceIds_generator = [1,2,3] 
- train_gtFine_labelIds_generator = [1,2,3]

generators will yield - (1,1,1), (2,2,2), (3,3,3)


In [None]:
# combine generators of GT into one which yields 3 target images
train_gt_generator = zip(train_gtFine_color_generator, train_gtFine_instanceIds_generator, train_gtFine_labelIds_generator)
test_gt_generator = zip(test_gtFine_color_generator, test_gtFine_instanceIds_generator, test_gtFine_labelIds_generator)
val_gt_generator = zip(val_gtFine_color_generator, val_gtFine_instanceIds_generator, val_gtFine_labelIds_generator)

5. Zip the X set with the target.

generator will yield - (x1,(1,1,1)), (x2,(2,2,2)), (x3,(3,3,3))

In [None]:
# combine generators into one which yields image and masks
train_generator = zip(train_X_generator, train_gt_generator)
test_generator = zip(test_X_generator, test_gt_generator)
val_generator = zip(val_X_generator, val_gt_generator)

### Build the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPooling2D, Conv2DTranspose

In [None]:
# model = Sequential([])
# model.summary()
# model.compile()
# model.fit()

# Commit Files

In [None]:
!git status

In [None]:
# !dvc add

# !git add

# !git commit

# !git status

# Push Files

In [None]:
# !git push https://{GITHUB_USER_NAME}:{GITHUB_TOKEN}@github.com/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}.git {BRANCH}

In [None]:
# !dvc push -r origin