# BDAPPV Data Setup

### 1. Download the dataset
Access the 8.2 GB dataset of satellite images and masks from this address:

[https://zenodo.org/record/7358126](https://zenodo.org/record/7358126)

Or download to your machine using:

```
  wget 'https://zenodo.org/record/7358126/files/bdappv.zip?download=1' -O 'bdappv.zip'
```
### 2. Define dataset directory
Extract this data into a known directory, for example:

`/home/username/Datasets/bdappv`

or if using Windows operating system, for example:

`D:\Datasets\bdappv`

### 3. Adjust dataset directory
NOTE: It may be necessary to adjust the dataset directory.
When extracting the archive, the dataset may produce multiple directories of the same name, for example:

`/home/username/Datasets/bdappv/bdappv/bdappv`

### 4. Inspect dataset contents
Using a file manager application or terminal, ensure the directory follows this structure:
```
bdappv/
  google/
    img/
      AAAMMB2EPPRCL.png
      AAAVZ3FC7DOKFO.png
      AABAF8ABACCLYE.png
      ...
    mask/
      AABCN78E1YHCWW.png
      AABNG2261PNFOZ.png
      AAGUA14E4DLNDU.png
      ...
  ign/
    img/
      AAAMMB2EPPRCL.png
      AAAVZ3FC7DOKFO.png
      AABAF8ABACCLYE.png
      ...
    mask/
      AABCN78E1YHCWW.png
      AABNG2261PNFOZ.png
      AACFE47AACFGPC.png
      ...
  metadata.csv
```

### 5. Provide first notebook cell with dataset directory
Modify the first notebook code cell below with the location of your directory.
This should be the full path, and not a relative path.

For example:

`/home/username/Datasets/bdappv`

### 6. Run notebook below to move images
Once the dataset location is set, run all cells below to setup dataset by moving images into new directories.

In [19]:
import os
import platform

if platform.system() == "Windows":
    print("The current operating system is Windows.")
    bdappv_path = "D:\Datasets\bdappv"
    bdappv_path = os.path.normpath(bdappv_path)

elif platform.system() == "Linux":
    print("The current operating system is Linux.")
    user_directory = os.path.split(os.getenv("HOME"))[-1]
    bdappv_path = "/home/username/Datasets/bdappv".replace('username', user_directory)
    bdappv_path = os.path.normpath(bdappv_path)

# YOU MUST MODIFY THIS LINE!
# bdappv_path = "/home/username/Datasets/bdappv"

bdappv_path = os.path.normpath(bdappv_path)
print("bdappv directory: ",bdappv_path)
assert os.path.exists(bdappv_path)
print("SUCCESS: The provided dataset directory path exists.")

# check for the Google images and masks directories
google_img_dir = os.path.join(bdappv_path,'google','img')
google_mask_dir = os.path.join(bdappv_path,'google','mask')
# check for the IGN images and masks directories
ign_img_dir = os.path.join(bdappv_path,'ign','img')
ign_mask_dir = os.path.join(bdappv_path,'ign','mask')
assert os.path.exists(google_img_dir)
assert os.path.exists(google_mask_dir)
assert os.path.exists(ign_img_dir)
assert os.path.exists(ign_mask_dir)
print("SUCCESS: The preset dataset directories all exists.")
# If this second check does not complete successfully,
#  you may have already completed this script before.

The current operating system is Linux.
bdappv directory:  /home/david/Datasets/bdappv
SUCCESS: The provided dataset directory path exists.
SUCCESS: The preset dataset directories all exists.


In [29]:
# MOVE IMAGES WITH AN ASSOCIATED MASK
# NOTE: You only need to run this once

import os
import glob
import shutil

# Create directories to move images into
google_img_w_mask_dir = os.path.join(bdappv_path,'google','img_with_mask')
if not os.path.exists(google_img_w_mask_dir):
    os.makedirs(google_img_w_mask_dir)
assert os.path.exists(google_img_w_mask_dir)

ign_img_w_mask_dir = os.path.join(bdappv_path,'ign','img_with_mask')
if not os.path.exists(ign_img_w_mask_dir):
    os.makedirs(ign_img_w_mask_dir)
assert os.path.exists(ign_img_w_mask_dir)

# Check if this script has been run before
if not len(glob.glob(os.path.join(google_img_w_mask_dir,"*.png"))) == 13303:
    # Go get some Google image paths
    google_imgs = glob.glob(os.path.join(google_img_dir,"*.png"))
    assert len(google_imgs) == 28807
    # Go get some Google image mask paths
    google_masks = glob.glob(os.path.join(google_mask_dir,"*.png"))
    assert len(google_masks) == 13303
    # Strip the path from the filename
    #  and build a big list of filenames
    google_img_files = []
    for path in google_imgs:
        google_img_files.append(os.path.basename(path))
    google_mask_files = []
    for path in google_masks:
        google_mask_files.append(os.path.basename(path))
    # Search for matching filenames
    #  and move the images which match
    print("Matching Google filenames...")
    for mask_file in google_mask_files:
        if mask_file in google_img_files:
            move_to_img_path = os.path.join(google_img_w_mask_dir, mask_file)
            move_from_img_path = os.path.join(google_img_dir, mask_file)
            shutil.move(move_from_img_path, move_to_img_path)

if not len(glob.glob(os.path.join(ign_img_w_mask_dir,"*.png"))) == 7685:
    # Go get some IGN image paths
    ign_imgs = glob.glob(os.path.join(ign_img_dir,"*.png"))
    assert len(ign_imgs) == 17325
    # Go get some IGN image mask paths
    ign_masks = glob.glob(os.path.join(ign_mask_dir,"*.png"))
    assert len(ign_masks) == 7685
    # Strip the path from the filename
    #  and build a big list of filenames
    ign_img_files = []
    for path in ign_imgs:
        ign_img_files.append(os.path.basename(path))
    ign_mask_files = []
    for path in ign_masks:
        ign_mask_files.append(os.path.basename(path))
    # Search for matching filenames
    #  and move the images which match
    print("Matching IGN filenames...")
    for mask_file in ign_mask_files:
        if mask_file in ign_img_files:
            move_to_img_path = os.path.join(ign_img_w_mask_dir, mask_file)
            move_from_img_path = os.path.join(ign_img_dir, mask_file)
            shutil.move(move_from_img_path, move_to_img_path)

# Count and report new file lists
google_img_matched = glob.glob(os.path.join(google_img_w_mask_dir,"*.png"))
assert len(google_img_matched) == 13303

ign_img_matched = glob.glob(os.path.join(ign_img_w_mask_dir,"*.png"))
assert len(ign_img_matched) == 7685

print("SUCCESS: Moved image files into new directories.")
print("NOTE: You only need to run this once.")

SUCCESS: Moved image files into new directories.
NOTE: You only need to run this once.
