# 1. Import Libraries

In [None]:
import os
import sys
import io

import numpy as np
import pandas as pd
import datetime

import requests
import zipfile
import shutil

import cv2

path_prefix = "../"
sys.path.insert(0, os.path.abspath(path_prefix))

import config.path_config as env

# 2. Define required Classes and Methods

In [None]:
def generate_processed_data(labels):
    
    path_empty = path_prefix + env.PATH_DATA_PROCESSED + env.PATH_DATA_COMBINED + "empty"
    path_occupied = path_prefix + env.PATH_DATA_PROCESSED + env.PATH_DATA_COMBINED + "occupied"
    
    if not os.path.exists(path_empty):
        os.makedirs(path_empty)
        
    if not os.path.exists(path_occupied):
        os.makedirs(path_occupied)
    
    for index, row in labels.iterrows():
        image_path = row['ImagePath']
        image_label = row['ImageLabel']
        
        if row["ImageSource"] == 'CNRPark':
            image_path = path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_CNRPARK + image_path
            image_name = image_path.split('/')[-1]
        elif row['ImageSource'] == 'CNR-EXT':
            image_path = path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_CNREXT + image_path
            image_name = image_path.split('/')[-1]
        else:
            image_path = path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_PKLOT + image_path
            image_name = image_path.split('/')[-1]

        if os.path.exists(image_path):
            if image_label == 0:
                shutil.move(image_path, path_empty)
            else:
                shutil.move(image_path, path_occupied)

            print("File Moved Successfully: " + image_name)
        else:
            print("File Not Found: %s" % image_name)
        pass
    
def remove_extra_images(min_width, min_height):
    
    data_combined_path = path_prefix + env.PATH_DATA_PROCESSED + env.PATH_DATA_COMBINED
    
    for label_folder in os.listdir(data_combined_path):
        for data_type in os.listdir(data_combined_path + label_folder + '/'):
            temp_path = data_combined_path + label_folder + '/' + data_type
            
            im_ = cv2.imread(temp_path)
            im_ = np.array(im_)
            
            if im_.shape[0] < min_width or im_.shape[1] < min_height:
                os.remove(temp_path)
                print('Removed ' + temp_path)
            pass
        pass

# 3. Import CNRPark+EXT dataset

### Documentation available here: http://cnrpark.it/

#### CNRPark - http://cnrpark.it/dataset/CNRPark-Patches-150x150.zip
#### CNR-EXT - http://cnrpark.it/dataset/CNR-EXT-Patches-150x150.zip

In [None]:
cnr_dataset_url = "http://cnrpark.it/dataset/"
cnr_park_dataset_name = "CNRPark-Patches-150x150.zip"
cnr_ext_dataset_name = "CNR-EXT-Patches-150x150.zip"

## 3.1. Download CNRPark dataset

In [None]:
%%time

if os.path.exists(path_prefix + env.PATH_DATA_RAW + cnr_park_dataset_name):
    cnr_park_dataset_zip = zipfile.ZipFile(path_prefix + env.PATH_DATA_RAW + cnr_park_dataset_name, 'r')
else:
    r = requests.get(cnr_dataset_url + cnr_park_dataset_name)
    with open(path_prefix + env.PATH_DATA_RAW + cnr_park_dataset_name, 'wb') as f:
        f.write(r.content)
        
    cnr_park_dataset_zip = zipfile.ZipFile(io.BytesIO(r.content))

## 3.2. Extract the zip file for CNRPark dataset

In [None]:
%%time

cnr_park_dataset_zip.extractall(path_prefix + env.PATH_DATA_RAW + "CNRPark-Patches-150x150")

## 3.3. Download CNR-Ext dataset

In [None]:
%%time

if os.path.exists(path_prefix + env.PATH_DATA_RAW + cnr_ext_dataset_name):
    cnr_ext_dataset_zip = zipfile.ZipFile(path_prefix + env.PATH_DATA_RAW + cnr_ext_dataset_name, 'r')
else:
    r = requests.get(cnr_dataset_url + cnr_ext_dataset_name)
    with open(path_prefix + env.PATH_DATA_RAW + cnr_ext_dataset_name, 'wb') as f:
        f.write(r.content)
        
    cnr_ext_dataset_zip = zipfile.ZipFile(io.BytesIO(r.content))

## 3.4. Extract the zip file for CNR-Ext dataset

In [None]:
%%time

cnr_ext_dataset_zip.extractall(path_prefix + env.PATH_DATA_RAW + "CNR-EXT-Patches-150x150")

# 4. Import PKLot dataset

### Documentation available here: https://web.inf.ufpr.br/vri/databases/parking-lot-database/

#### Manually download and extract the PKLot.tar.gz file and place the content inside data/raw/PKLot/ directory 
#### PKLot.tar.gz: http://www.inf.ufpr.br/vri/databases/PKLot.tar.gz

In [None]:
%%time

pklot_path = path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_PKLOT
pklot_labels_path = path_prefix + env.PATH_DATA_RAW + env.PATH_LABELS_PKLOT

if not os.path.exists(pklot_path):
    print('PKLot Directory does not exists')
else:
    labels_array = []

    for cameraFolder in os.listdir(pklot_path):
        cameraFolderPath = pklot_path + cameraFolder + '/'
        print('--Inside:', cameraFolder)

        for seasonFolder in os.listdir(cameraFolderPath):
            seasonFolderPath = cameraFolderPath + seasonFolder + '/'
            print('----Inside: ', cameraFolder + '/' + seasonFolder)

            for dateFolder in os.listdir(seasonFolderPath):
                dateFolderPath = seasonFolderPath + dateFolder + '/'
                print('--------Inside: ', cameraFolder + '/' + seasonFolder + '/' + dateFolder)

                for labelFolder in os.listdir(dateFolderPath):
                    labelFolderPath = dateFolderPath + labelFolder + '/'
                    print('----------------Inside: ',
                          cameraFolder + '/' + seasonFolder + '/' + dateFolder + '/' + labelFolder)

                    for image in os.listdir(labelFolderPath):
                        imagePath = cameraFolder + '/' + seasonFolder + '/' + dateFolder + '/' + labelFolder + '/' + image
                        # imagePath = os.path.relpath(imagePath, env.PATH_DATA_PKLOT)
                        print('--------------------Image: ' + image)
                        if labelFolder == 'Empty':
                            labels_array.append([imagePath, 0])
                        if labelFolder == 'Occupied':
                            labels_array.append([imagePath, 1])
            pass
        pass

    labels = pd.DataFrame(data=labels_array, columns=['ImagePath', 'ImageLabel'])
    
    
    if not os.path.exists(pklot_labels_path):
        os.makedirs(pklot_labels_path)
    
    pklot_labels = labels.sample(frac=1)
    
    print(pklot_labels.shape)
    
    pklot_labels.to_csv(pklot_labels_path + 'all.txt', sep=' ', index=None)
    
    
    # labels_train = labels.sample(frac=0.70)
    # labels.drop(index=labels_train.index, inplace=True)

    # labels_test = labels.sample(frac=0.15)
    # labels.drop(index=labels_test.index, inplace=True)

    # labels_val = labels.sample(frac=0.15)
    # labels.drop(index=labels_val.index, inplace=True)

    # labels_train.to_csv(pklot_labels_path + 'train.txt', sep=' ', index=None)
    # labels_test.to_csv(pklot_labels_path + 'test.txt', sep=' ', index=None)
    # labels_val.to_csv(pklot_labels_path + 'val.txt', sep=' ', index=None)

# 5. Merge CNR and PKLot data and move to data/processed/ directory

## 5.1. Move CNRPark dataset

In [None]:
%%time

cnr_park_labels = pd.DataFrame(columns=['ImagePath', 'ImageLabel', 'ImageSource'])

for category_folder in os.listdir(path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_CNRPARK):
    image_source = 'CNRPark'
    
    print('--Inside: ', category_folder)
    label_folder_path = path_prefix + env.PATH_DATA_RAW + env.PATH_DATA_CNRPARK + category_folder + "/"
    
    for label_folder in os.listdir(label_folder_path):
        print('----Inside: ' + category_folder + "/" + label_folder)
        file_folder_path = label_folder_path + label_folder + "/"
        
        for file in os.listdir(file_folder_path):
            print('--------Inside: ' + file)
            image_path = category_folder + "/" + label_folder + "/" + file
            
            image_label = 0
            if label_folder == 'busy':
                image_label = 1
            elif label_folder == 'free':
                image_label = 0
                
            
            cnr_park_labels = cnr_park_labels.append({"ImagePath": image_path, 
                                                      "ImageLabel": image_label, 
                                                      "ImageSource": image_source}, ignore_index = True)

In [None]:
print(cnr_park_labels.shape)

In [None]:
print(cnr_park_labels.head())

In [None]:
%%time 

generate_processed_data(cnr_park_labels)

## 5.2. Move CNR-EXT dataset

In [None]:
%%time

if os.path.exists(path_prefix + env.PATH_DATA_RAW + env.PATH_LABELS_CNREXT + 'all.txt'):
    print("File Found: %s" % env.PATH_LABELS_CNREXT + 'all.txt')
    cnr_ext_labels = pd.read_csv(path_prefix + env.PATH_DATA_RAW + env.PATH_LABELS_CNREXT + 'all.txt', delimiter=' ', header=None)
    cnr_ext_labels['ImageSource'] = 'CNR-EXT'
    cnr_ext_labels.columns = ['ImagePath', 'ImageLabel', 'ImageSource']
else:
    print("File Not Found: %" % env.PATH_LABELS_CNREXT + 'all.txt')

In [None]:
print(cnr_ext_labels.shape)

In [None]:
print(cnr_ext_labels.head())

In [None]:
%%time

generate_processed_data(cnr_ext_labels)

## 5.3. Move PKLot dataset

In [None]:
%%time

pklot_labels = pd.read_csv(pklot_labels_path + 'all.txt', delimiter = ' ', header = 0)
pklot_labels['ImageSource'] = 'Pklot'
pklot_labels.columns = ['ImagePath', 'ImageLabel', 'ImageSource']

In [None]:
print(pklot_labels.shape)

In [None]:
print(pklot_labels.head())

In [None]:
%%time

generate_processed_data(pklot_labels)

# 6. Remove Small/Extra Images from processed dataset

In [None]:
%%time

remove_extra_images(min_width = 100, min_height = 50)