In [None]:
pip install patchify

In [None]:
pip install -U segmentation-models

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'cvproject:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4610753%2F7860162%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240408%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240408T141934Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D6a491ae7dada1238235e5a33e81a57cc1e141e597ff67f2de3022cb86b24454f78bbf6e628301dd12ae80354746e8f3ff54cabdff76828e0c936725f0899768c595a7b7f4430a724e3d0c34fcf940dafa6c8ca5595096f916e8fbc5d42da80963bf8a99b8a29abd52037ba256073c4ab75222e733fea6ac2114156bf33b5a78892bbd3426f2dc82d9df150331ad6721902f5bbe76b7b8a60ef7aef5f36a18298e96f67950423deeae5307fe7b09577c114fe1c4a92e3536642ad377ae83ff31407b38e6331a8bca97635e415238c0331ccbc35b57ee8e0063126e918bb912519f4f03763e9bcc7296cc9b597a6b42ab4474504042a9ba64f3ad3c42615d44f3a'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import os
import cv2
import numpy as np
import glob

import numpy as np
from matplotlib import pyplot as plt
# from patchify import patchify
import tifffile as tiff
from PIL import Image
import tensorflow as tf
from tensorflow import keras
import segmentation_models as sm
from tensorflow.keras.metrics import MeanIoU
import random

In [None]:
#Quick understanding of the dataset
temp_img = cv2.imread("/kaggle/input/cvproject/augmented_dataset/images/train/12_13.jpg") #3 channels / spectral bands
plt.imshow(temp_img[:,:,2]) #View each channel...
temp_mask = cv2.imread("/kaggle/input/cvproject/augmented_dataset/masks/train/12_13.png") #3 channels but all same.
labels, count = np.unique(temp_mask[:,:,0], return_counts=True) #Check for each channel. All chanels are identical
print("Labels are: ", labels, " and the counts are: ", count)

In [None]:
images_path = "/kaggle/input/cvproject/augmented_dataset/images/train"
masks_path = "/kaggle/input/cvproject/augmented_dataset/masks/train"

In [None]:
from pathlib import Path
# Total Images
IMAGE_PATH = Path(images_path)
IMAGE_PATH_LIST = list(IMAGE_PATH.glob("*.jpg"))
IMAGE_PATH_LIST = sorted(IMAGE_PATH_LIST)

print(f'Total Images = {len(IMAGE_PATH_LIST)}')

In [None]:
# Total Masks
MASK_PATH = Path(masks_path)
MASK_PATH_LIST = list(MASK_PATH.glob("*.png"))
MASK_PATH_LIST = sorted(MASK_PATH_LIST)

print(f'Total Masks = {len(MASK_PATH_LIST)}')

In [None]:
import pandas as pd
images_paths = [None] * len(IMAGE_PATH_LIST)
masks_paths = [None] * len(MASK_PATH_LIST)

for i,(img_path, mask_path) in enumerate(zip(IMAGE_PATH_LIST, MASK_PATH_LIST)):
    images_paths[i] = img_path
    masks_paths[i] = mask_path

data = pd.DataFrame({'Image':images_paths, 'Mask':masks_paths})
data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
root_directory = '/content/drive/MyDrive/'

In [None]:
os.makedirs(root_directory + "256_patches/images/", exist_ok=True)
os.makedirs(root_directory + "256_patches/masks/", exist_ok=True)

In [None]:
patch_size = 256

In [None]:
for image_path in data['Image'].head(1000):
    image_path = str(image_path)
    image = cv2.imread(image_path, 1)
    SIZE_X = (image.shape[1] // patch_size) * patch_size
    SIZE_Y = (image.shape[0] // patch_size) * patch_size
    image = Image.fromarray(image)
    image = image.crop((0, 0, SIZE_X, SIZE_Y))
    image = np.array(image)

    print("Now patchifying image:", image_path)
    patches_img = patchify(image, (256, 256, 3), step=256)

    for i in range(patches_img.shape[0]):
        for j in range(patches_img.shape[1]):

            single_patch_img = patches_img[i,j,:,:]
            single_patch_img = single_patch_img[0] #Drop the extra unecessary dimension that patchify adds.

            # Save the image patch to the drive
            cv2.imwrite(root_directory+"256_patches/images/"+os.path.basename(image_path.split('.')[0])+"_patch_"+str(i)+str(j)+".jpg", single_patch_img)

In [None]:
for mask_path in data['Mask'].head(1000):
    mask_path = str(mask_path)  # Ensure the path is converted to a string
    mask = cv2.imread(mask_path, 1)  # Read each image as Grey (or color but remember to map each color to an integer)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
    SIZE_X = (mask.shape[1] // patch_size) * patch_size  # Nearest size divisible by our patch size
    SIZE_Y = (mask.shape[0] // patch_size) * patch_size  # Nearest size divisible by our patch size
    mask = Image.fromarray(mask)
    mask = mask.crop((0, 0, SIZE_X, SIZE_Y))  # Crop from top left corner
    mask = np.array(mask)

    print("Now patchifying mask:", mask_path)
    patches_img = patchify(mask, (256, 256, 3), step=256)

    for i in range(patches_img.shape[0]):
        for j in range(patches_img.shape[1]):

            single_patch_img = patches_img[i,j,:,:]
            #single_patch_img = (single_patch_img.astype('float32')) / 255. #We will preprocess using one of the backbones
            single_patch_img = single_patch_img[0] #Drop the extra unecessary dimension that patchify adds.

            # Save the image patch to the drive
            cv2.imwrite(root_directory+"256_patches/masks/"+os.path.basename(mask_path.split('.')[0])+"_patch_"+str(i)+str(j)+".png", single_patch_img)

In [None]:
train_img_dir = "/content/drive/MyDrive/256_patches/images/"
train_mask_dir = "/content/drive/MyDrive/256_patches/masks/"

In [None]:
img_list = os.listdir(train_img_dir)
msk_list = os.listdir(train_mask_dir)

In [None]:
num_images = len(os.listdir(train_img_dir))
num_images

In [None]:
img_num = random.randint(0, num_images-1)

In [None]:
img_for_plot = cv2.imread(train_img_dir+img_list[img_num], 1)
img_for_plot = cv2.cvtColor(img_for_plot, cv2.COLOR_BGR2RGB)

In [None]:
mask_for_plot =cv2.imread(train_mask_dir+msk_list[img_num], 0)

plt.figure(figsize=(12, 8))
plt.subplot(121)
plt.imshow(img_for_plot)
plt.title('Image')
plt.subplot(122)
plt.imshow(mask_for_plot, cmap='gray')
plt.title('Mask')
plt.show()

In [None]:
os.makedirs(root_directory + "256_patches/images_with_useful_info/images/", exist_ok=True)
os.makedirs(root_directory + "256_patches/images_with_useful_info/masks/", exist_ok=True)

In [None]:
useless=0  #Useless image counter
for img in range(len(img_list)):   #Using t1_list as all lists are of same size
    img_name=img_list[img]
    mask_name = msk_list[img]
    print("Now preparing image and masks number: ", img)

    temp_image=cv2.imread(train_img_dir+img_list[img], 1)

    temp_mask=cv2.imread(train_mask_dir+msk_list[img], 0)
    #temp_mask=temp_mask.astype(np.uint8)

    val, counts = np.unique(temp_mask, return_counts=True)

    if (1 - (counts[0]/counts.sum())) > 0.05:  #At least 5% useful area with labels that are not 0
        print("Save Me")
        cv2.imwrite('/content/drive/MyDrive/256_patches/images_with_useful_info/images/'+img_name, temp_image)
        cv2.imwrite('/content/drive/MyDrive/256_patches/images_with_useful_info/masks/'+mask_name, temp_mask)

    else:
        print("I am useless")
        useless +=1

In [None]:
print("Total useful images are: ", len(img_list)-useless)
print("Total useless images are: ", useless)

In [None]:
os.makedirs(root_directory + "256_patches/finaldata/", exist_ok=True)

In [None]:
pip install split-folders

In [None]:
import splitfolders  # or import split_folders

input_folder = '/content/drive/MyDrive/256_patches/images_with_useful_info/'
output_folder = '/content/drive/MyDrive/256_patches/finaldata/'
# Split with a ratio.
# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`.
splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.75, .25), group_prefix=None)

In [None]:
train_img_dir = "/content/drive/MyDrive/256_patches/data_train_final/train_images/"
train_mask_dir = "/content/drive/MyDrive/256_patches/data_train_final/train_masks/"

In [None]:
img_list = os.listdir(train_img_dir)
msk_list = os.listdir(train_mask_dir)

In [None]:
num_images = len(os.listdir(train_img_dir))


In [None]:
img_num = random.randint(0, num_images-1)

img_for_plot = cv2.imread(train_img_dir+img_list[img_num], 1)
img_for_plot = cv2.cvtColor(img_for_plot, cv2.COLOR_BGR2RGB)

mask_for_plot =cv2.imread(train_mask_dir+msk_list[img_num], 0)

plt.figure(figsize=(12, 8))
plt.subplot(121)
plt.imshow(img_for_plot)
plt.title('Image')
plt.subplot(122)
plt.imshow(mask_for_plot, cmap='gray')
plt.title('Mask')
plt.show()

In [None]:
os.makedirs(root_directory + "256_patches/data_train_fi/", exist_ok=True)

In [None]:
os.makedirs(root_directory + "256_patches/data_train_fi/train_images", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/train_masks", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/val_images", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/val_masks", exist_ok=True)

In [None]:
os.makedirs(root_directory + "256_patches/data_train_fi/train_images/train/", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/train_masks/train/", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/val_images/val/", exist_ok=True)
os.makedirs(root_directory + "256_patches/data_train_fi/val_masks/val", exist_ok=True)

In [None]:
import os
import shutil

def copy_files(source_folder, destination_folder):
    # Check if source folder exists
    if not os.path.exists(source_folder):
        print(f"Source folder '{source_folder}' does not exist.")
        return

    # Check if destination folder exists, create it if it doesn't
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Get list of files in the source folder
    files = os.listdir(source_folder)

    # Iterate over each file and copy it to the destination folder
    for file in files:
        source_file = os.path.join(source_folder, file)
        destination_file = os.path.join(destination_folder, file)
        shutil.copy2(source_file, destination_file)
        print(f"File '{file}' copied to '{destination_folder}'.")

In [None]:
source_folder = "/content/drive/MyDrive/256_patches/finaldata/val/masks"
destination_folder = "/content/drive/MyDrive/256_patches/data_train_fi/val_masks/val"
copy_files(source_folder, destination_folder)

In [None]:
root_directory_img = '/content/drive/MyDrive/256_patches/data_train_fi/train_images/train'
root_directory_msk = '/content/drive/MyDrive/256_patches/data_train_fi/train_masks/train'

In [None]:
root_directory_img

In [None]:
import segmentation_models as sm

In [None]:
seed=24
batch_size= 16
n_classes=6

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
from keras.utils import to_categorical


In [None]:
BACKBONE = 'resnet50'
preprocess_input = sm.get_preprocessing(BACKBONE)

In [None]:
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler


divider = '#6E4B26'.lstrip('#')
divider = np.array(tuple(int(divider[i:i+2], 16) for i in (0, 2, 4))) # 60, 16, 152

lane = '#87CEEB'.lstrip('#')
lane = np.array(tuple(int(lane[i:i+2], 16) for i in (0, 2, 4))) #132, 41, 246

road = '#808000'.lstrip('#')
road = np.array(tuple(int(road[i:i+2], 16) for i in (0, 2, 4))) #110, 193, 228

zebra =  '#FFFFFF'.lstrip('#')
zebra = np.array(tuple(int(zebra[i:i+2], 16) for i in (0, 2, 4))) #254, 221, 58

dlane = '#F1700A'.lstrip('#')
dlane = np.array(tuple(int(dlane[i:i+2], 16) for i in (0, 2, 4))) #226, 169, 41

background = '#000000'.lstrip('#')
background = np.array(tuple(int(background[i:i+2], 16) for i in (0, 2, 4))) #155, 155, 155

def rgb_to_2D_label(label):
    """
    Supply our label masks as input in RGB format.
    Replace pixels with specific RGB values...
    """
    label_seg = np.zeros(label.shape, dtype=np.uint8)
    label_seg[np.all(label == divider, axis=-1)] = 0
    label_seg[np.all(label == road, axis=-1)] = 1
    label_seg[np.all(label == lane, axis=-1)] = 2
    label_seg[np.all(label == zebra, axis=-1)] = 3
    label_seg[np.all(label == dlane, axis=-1)] = 4
    label_seg[np.all(label == background, axis=-1)] = 5
    return label_seg

def preprocess_data(img, mask, num_class):
      # Scale images
      scaler = MinMaxScaler()
      img = scaler.fit_transform(img.reshape(-1, img.shape[-1])).reshape(img.shape)
      img = preprocess_input(img)  #Preprocess based on the pretrained backbone...
      # Convert mask to one-hot
      mask = rgb_to_2D_label(mask)  # Convert RGB mask to 2D label
      mask = to_categorical(mask, num_class)
      return (img, mask)





In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def trainGenerator(train_img_path, train_mask_path, num_class):
    img_data_gen_args = dict(horizontal_flip=True,
                      vertical_flip=True,
                      fill_mode='reflect')

    image_datagen = ImageDataGenerator()
    mask_datagen = ImageDataGenerator()

    image_generator = image_datagen.flow_from_directory(
        train_img_path,
        class_mode=None,  # Set to None to return only the images without labels
        batch_size=batch_size,
        seed=seed)

    mask_generator = mask_datagen.flow_from_directory(
        train_mask_path,
        class_mode=None,  # Set to None to return only the masks without labels
        color_mode='rgb',
        batch_size=batch_size,
        seed=seed)

    for (img, mask) in zip(image_generator, mask_generator):
        img, mask = preprocess_data(img, mask, num_class)
        yield img, mask


In [None]:
train_img_path = "/content/drive/MyDrive/256_patches/data_train_fi/train_images/"
train_mask_path = "/content/drive/MyDrive/256_patches/data_train_fi/train_masks/"

In [None]:
train_img_gen = trainGenerator(train_img_path, train_mask_path, num_class=6)

In [None]:
val_img_path = "/content/drive/MyDrive/256_patches/data_train_fi/val_images/"
val_mask_path = "/content/drive/MyDrive/256_patches/data_train_fi/val_masks/"
val_img_gen = trainGenerator(val_img_path, val_mask_path, num_class=6)


In [None]:
x, y = train_img_gen.__next__()

In [None]:
x_val, y_val = val_img_gen.__next__()

In [None]:
for i in range(0,3):
    image = x[i]
    mask = np.argmax(y[i], axis=2)
    plt.subplot(1,2,1)
    plt.imshow(image)
    plt.subplot(1,2,2)
    plt.imshow(mask, cmap='gray')
    plt.show()

In [None]:
num_train_imgs = len(os.listdir('/content/drive/MyDrive/256_patches/data_train_fi/train_images/train/'))
num_val_images = len(os.listdir('/content/drive/MyDrive/256_patches/data_train_fi/val_images/val/'))
steps_per_epoch = num_train_imgs//batch_size
val_steps_per_epoch = num_val_images//batch_size

In [None]:
IMG_HEIGHT = x.shape[1]
IMG_WIDTH  = x.shape[2]
IMG_CHANNELS = x.shape[3]

In [None]:
history=model.fit(train_img_gen,
          steps_per_epoch=steps_per_epoch,
          epochs=25,
          verbose=1,
          validation_data=val_img_gen,
          validation_steps=val_steps_per_epoch)

In [None]:
model1 = sm.Unet(BACKBONE, encoder_weights='imagenet',
                input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS),
                classes=n_classes, activation='softmax')
model1.compile('Adam', loss='categorical_crossentropy', metrics=[sm.metrics.iou_score])

In [None]:
print(model1.summary())
print(model1.input_shape)

In [None]:
history=model1.fit(train_img_gen,
          steps_per_epoch=steps_per_epoch,
          epochs=5,
          verbose=1,
          validation_data=val_img_gen,
          validation_steps=val_steps_per_epoch)

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc = history.history['iou_score']
val_acc = history.history['val_iou_score']

plt.plot(epochs, acc, 'y', label='Training IoU')
plt.plot(epochs, val_acc, 'r', label='Validation IoU')
plt.title('Training and validation IoU')
plt.xlabel('Epochs')
plt.ylabel('IoU')
plt.legend()
plt.show()

In [None]:
test_image_batch, test_mask_batch = val_img_gen.__next__()

In [None]:
test_mask_batch_argmax = np.argmax(test_mask_batch, axis=3)
test_pred_batch = model.predict(test_image_batch)
test_pred_batch_argmax = np.argmax(test_pred_batch, axis=3)


In [None]:
img_num = random.randint(0, test_image_batch.shape[0]-1)

plt.figure(figsize=(12, 8))
plt.subplot(231)
plt.title('Testing Image')
plt.imshow(test_image_batch[img_num])
plt.subplot(232)
plt.title('Testing Label')
plt.imshow(test_mask_batch_argmax[img_num])
plt.subplot(233)
plt.title('Prediction on test image')
plt.imshow(test_pred_batch_argmax[img_num])
plt.show()

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'cvproject:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4610753%2F7860162%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240408%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240408T195953Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3ec4ba340ec4665069b52e82f71b28f3d2eec944c1115d45416f58d504eb0898a23461944a6e3a057150492d7943804eba1d5d7afdd3172a98a22221d1cf71f0c401e3be7bb43beed4198d3c71c7029e812e70d44205f5acfc473fe063dfff7da9fcea48be64f12f68fcea0c09d102f79043674b603633a3dac8c38cbe9b7d192bf8afdabfd457093de1d8c3f40dc9398c07a8db0dff2181702db1bd8728bacdb97baab7951c6d3ade19629fec582916708e44c5e8b5d615d1f2c00b81cc58573b4e943ada27b2ea8ba152e538a01a6d17cae6e20573ecc48a464d14de22ae5fa5ee1e8e69c96e4993f500629432c4758e287a58ddceb70c0dc0c57675483a95'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')
