# 2. Data Directory Setup #

## VERY IMPORTANT REMINDER! ##
If you have not done this already:
1. Create two folders in your directory - call one <code>Pre-v2</code> and call the other <code>Post-v2</code>
2. Move all folders containing pre-event imagery (downloaded in the <code>DownloadMoroccoData.ipynb</code> file) into the <code>Pre-v2</code> folder and all the folders containing post-event imagery into the <code>Post-v2</code> folder.

### Rename pre-event images and move all images into a new folder called <code>Pre-event-v2</code>

In [8]:
# Code reference: This work used ChatGPT-3.5 to generate the initial functions and report outline. This was done on 20/09/2023
# with the following prompts: "Generate code to move images in subfolders into a new folder and rename each image to be unique"

# Import required packages
import os
import shutil

# Specify the path to your Pre-v2 folder (main folder) that contains the pre-imagery subfolders
main_folder = 'Pre-v2'
# Specify the destination folder for renamed images to be moved to
dest_folder = 'Pre-event-v2'

# Check if the destination folder (Pre-event-v2) already exists and if it does not, then create the folder
if not os.path.exists(dest_folder):
    os.mkdir(dest_folder)
    print(f"Folder '{dest_folder}' created successfully.")
else:
    print(f"Folder '{dest_folder}' already exists.")

# List all subfolders in the main Pre-v2 folder
subfolders = [f.path for f in os.scandir(main_folder) if f.is_dir()]
print(subfolders)

# Iterate through the subfolders
for subfolder in subfolders:
    # List all files in the subfolder
    subfolder_files = os.listdir(subfolder)
    print("FOLDER",subfolder)
    
    # Iterate through the files in the subfolder
    for file in subfolder_files:
        print(file)
        
        # Get the full path to the file in the subfolder
        file_path = os.path.join(subfolder, file)

        # Create a new file name with the subfolder name as a prefix
        new_file_name = f"{os.path.basename(subfolder)}_{file}"
        
        # Get the full path to the new file in the subfolder
        new_file_path = os.path.join(subfolder, new_file_name)

        # Rename the file in the subfolder
        os.rename(file_path, new_file_path)
        
print("Images have been renamed.")

Folder 'Pre-event-v2' already exists.
['Pre-v2/10300100CBAF7D00', 'Pre-v2/1040050039DC5C00', 'Pre-v2/10400100405FEB00', 'Pre-v2/103001008244DA00', 'Pre-v2/1030010083A10700', 'Pre-v2/10400100797DAC00', 'Pre-v2/1040010083289000', 'Pre-v2/1040010045AE4B00']
FOLDER Pre-v2/10300100CBAF7D00
10300100CBAF7D00_10300100CBAF7D00_120202003312.tif
10300100CBAF7D00_10300100CBAF7D00_120202003320.tif
10300100CBAF7D00_10300100CBAF7D00_120202003301.tif
10300100CBAF7D00_10300100CBAF7D00_120202003123.tif
10300100CBAF7D00_10300100CBAF7D00_120202003330.tif
10300100CBAF7D00_10300100CBAF7D00_120202003303.tif
10300100CBAF7D00_10300100CBAF7D00_120202003300.tif
10300100CBAF7D00_10300100CBAF7D00_120202003120.tif
10300100CBAF7D00_10300100CBAF7D00_120202003132.tif
10300100CBAF7D00_10300100CBAF7D00_120202003302.tif
10300100CBAF7D00_10300100CBAF7D00_120202003310.tif
10300100CBAF7D00_10300100CBAF7D00_120202003121.tif
10300100CBAF7D00_10300100CBAF7D00_120202003130.tif
10300100CBAF7D00_10300100CBAF7D00_120202003321.tif


In [9]:
# Copy the images to the destination folder

# Iterate through the subfolders
for subfolder in subfolders:
    
    # List all files in the subfolder
    subfolder_files = os.listdir(subfolder)
    print("FOLDER",subfolder)
    
    # Iterate through the files in the subfolder
    for file in subfolder_files:
        print(file)
        
        # Get the full path to the file in the subfolder
        file_path = os.path.join(subfolder, file)
        
        # Copy the file to the main folder
        shutil.copy(file_path, dest_folder)
        
print("Images have been copied to the dest folder")

FOLDER Pre-v2/10300100CBAF7D00
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003123.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003301.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003132.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003321.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003320.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003312.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003122.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003121.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003303.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003302.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003310.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003300.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003330.tif
10300100CBAF7D00_10300100CBAF7D00_10300100CBAF7D00_120202003120.tif


### Rename all post-event images and move all images into a new folder called <code>Post-event-v2</code>

In [11]:
#Rename post-event images

# Specify the path to your main Post-v2 folder containing subfolders with post-event images
main_folder = 'Post-v2'
dest_folder = 'Post-event-v2' 

# List all subfolders in the main folder
subfolders = [f.path for f in os.scandir(main_folder) if f.is_dir()]
print(subfolders)

# Check if the Post-event-v2 folder already exists and if not, create an empty folder
if not os.path.exists(dest_folder):
    os.mkdir(dest_folder)
    print(f"Folder '{dest_folder}' created successfully.")
else:
    print(f"Folder '{dest_folder}' already exists.")

# Iterate through the subfolders
for subfolder in subfolders:
    
    # List all files in the subfolder
    subfolder_files = os.listdir(subfolder)
    print("FOLDER",subfolder)
    
    # Iterate through the files in the subfolder
    for file in subfolder_files:
        print(file)
        
        # Get the full path to the file in the subfolder
        file_path = os.path.join(subfolder, file)

        # Create a new file name with the subfolder name as a prefix
        new_file_name = f"{os.path.basename(subfolder)}_{file}"
        
        # Get the full path to the new file in the subfolder
        new_file_path = os.path.join(subfolder, new_file_name)

        # Rename the file in the subfolder
        os.rename(file_path, new_file_path)
        
        
print("Images have been renamed.")

['Post-v2/104001008A8E9800', 'Post-v2/10300100ED11EA00', 'Post-v2/10300500E4F92300', 'Post-v2/10300500E4F91700']
Folder 'Post-event-v2' already exists.
FOLDER Post-v2/104001008A8E9800
104001008A8E9800_120202003122.tif
104001008A8E9800_120202003302.tif
104001008A8E9800_120202003123.tif
104001008A8E9800_120202003120.tif
104001008A8E9800_120202003301.tif
104001008A8E9800_120202003300.tif
104001008A8E9800_120202003121.tif
FOLDER Post-v2/10300100ED11EA00
10300100ED11EA00_120202003120.tif
10300100ED11EA00_120202012022.tif
10300100ED11EA00_120202003131.tif
10300100ED11EA00_120202012202.tif
10300100ED11EA00_120202012020.tif
10300100ED11EA00_120202003330.tif
10300100ED11EA00_120202003310.tif
10300100ED11EA00_120202003320.tif
10300100ED11EA00_120202003331.tif
10300100ED11EA00_120202003303.tif
10300100ED11EA00_120202003133.tif
10300100ED11EA00_120202003321.tif
10300100ED11EA00_120202012023.tif
10300100ED11EA00_120202012200.tif
10300100ED11EA00_120202003123.tif
10300100ED11EA00_120202003313.tif
10

In [12]:
# Copy the renamed post-event images to a new folder called 'Post-event-v2'

# Iterate through the subfolders
for subfolder in subfolders:
    
    # List all files in the subfolder
    subfolder_files = os.listdir(subfolder)
    print("FOLDER",subfolder)
    
    # Iterate through the files in the subfolder
    for file in subfolder_files:
        print(file)
        
        # Get the full path to the file in the subfolder
        file_path = os.path.join(subfolder, file)
        
        # Copy the file to the main folder
        shutil.copy(file_path, dest_folder)


print("Images have been copied to the dest folder")

FOLDER Post-v2/104001008A8E9800
104001008A8E9800_104001008A8E9800_120202003120.tif
104001008A8E9800_104001008A8E9800_120202003122.tif
104001008A8E9800_104001008A8E9800_120202003121.tif
104001008A8E9800_104001008A8E9800_120202003123.tif
104001008A8E9800_104001008A8E9800_120202003302.tif
104001008A8E9800_104001008A8E9800_120202003300.tif
104001008A8E9800_104001008A8E9800_120202003301.tif
FOLDER Post-v2/10300100ED11EA00
10300100ED11EA00_10300100ED11EA00_120202003132.tif
10300100ED11EA00_10300100ED11EA00_120202003130.tif
10300100ED11EA00_10300100ED11EA00_120202003131.tif
10300100ED11EA00_10300100ED11EA00_120202012022.tif
10300100ED11EA00_10300100ED11EA00_120202003310.tif
10300100ED11EA00_10300100ED11EA00_120202012203.tif
10300100ED11EA00_10300100ED11EA00_120202012021.tif
10300100ED11EA00_10300100ED11EA00_120202003302.tif
10300100ED11EA00_10300100ED11EA00_120202003330.tif
10300100ED11EA00_10300100ED11EA00_120202003311.tif
10300100ED11EA00_10300100ED11EA00_120202012202.tif
10300100ED11EA00_1

#### Count images in the folder to check everything transferred.

In [13]:
# Cell reference: This work used ChatGPT-3.5 to generate the initial functions and report outline. This was done on 20/09/2023
# with the following prompts: "Generate code to count image files in a folder"

# Specify the path to your main folder containing images
main_folder = 'Post-event-v2'

# List all files in the main folder
all_files = os.listdir(main_folder)

# List of common image file extensions
image_extensions = ['.tif', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']

# Initialize a counter for the number of images
image_count = 0

# Iterate through the files in the main folder
for file in all_files:
    
    # Get the file extension in lowercase
    file_extension = os.path.splitext(file)[1].lower()
    
    # Check if the file has a known image extension
    if file_extension in image_extensions:
        image_count += 1

print(f"Number of images in the main folder: {image_count}")

Number of images in the main folder: 66


#### Copy everything to the bucket

In [27]:
#Copy everything to my bucket
import os
import git
import subprocess
from google.cloud import storage

# Enter your own bucket name below:
mybucket = 'ml-bucket-tmat406'
# Make a folder in your bucket called 'BDA_Data'
gs_path = f'gs://{mybucket}/BDA_Data/'

# Define the local directory that you want to move to the bucket
temp_directory = '.'

# Run the gsutil command to copy the data to Cloud Storage
command = f'gsutil -m cp -r {temp_directory} {gs_path}'
subprocess.run(command, shell=True)

Copying file://./BDA.ipynb [Content-Type=application/octet-stream]...
Copying file://./DownloadMorrocoData.ipynb [Content-Type=application/octet-stream]...
Copying file://./test_out.tif [Content-Type=image/tiff]...                      
Copying file://./BDA_tiled.ipynb [Content-Type=application/octet-stream]...     
Copying file://./test_out.tif.msk [Content-Type=application/octet-stream]...    
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums o

CompletedProcess(args='gsutil -m cp -r . gs://ml-bucket-tmat406/BDA_Data/', returncode=0)

Now move on to the <code>MosaicAndClip.ipynb</code> file