In [1]:
# packages
from pathlib import Path
import os
import random
import shutil

In [7]:
root_folder = Path("/Users/alextsagkas/Document/Office/solar_panels/data/")

# Subfolders
clean_folder = root_folder / "download" / "clean"
soiled_folder = root_folder / "download" / "soiled"
train_clean_folder = root_folder / "download_train" / "clean"
train_soiled_folder = root_folder / "download_train" / "soiled"
test_clean_folder = root_folder / "download_test" / "clean"
test_soiled_folder = root_folder / "download_test" / "soiled"

print(clean_folder, type(clean_folder))

/Users/alextsagkas/Document/Office/solar_panels/data/download/clean <class 'pathlib.PosixPath'>


In [9]:
# Create necessary folders
os.makedirs(train_clean_folder, exist_ok=True)
os.makedirs(train_soiled_folder, exist_ok=True)
os.makedirs(test_clean_folder, exist_ok=True)
os.makedirs(test_soiled_folder, exist_ok=True)

In [10]:
# Get the list of clean and soiled image files
clean_files = os.listdir(clean_folder)
soiled_files = os.listdir(soiled_folder)

In [11]:
# Shuffle the clean and soiled files
random.shuffle(clean_files)
random.shuffle(soiled_files)

print(clean_files[25:27])

['2023-09-09_02-30-39_85.jpeg', '2023-09-09_02-30-39_150.jpeg']


In [12]:
# Split the data in train and test sets
test_percentage = 0.2

test_clean_split = int(test_percentage * len(clean_files))
test_soiled_split = int(test_percentage * len(soiled_files))

train_clean_split = int((1-test_percentage) * len(clean_files))
train_soiled_split = int((1-test_percentage) * len(soiled_files))

print(f"Clean files: {len(clean_files)}, Clean test split: {test_clean_split}, Clean train split: {train_clean_split}")
print(f"Soiled files: {len(soiled_files)}, Soiled test split: {test_soiled_split}, Soiled train split: {train_soiled_split}")

Clean files: 512, Clean test split: 102, Clean train split: 409
Soiled files: 487, Soiled test split: 97, Soiled train split: 389


In [13]:
from typing import Union

# Move files to test folders
def moveFilesTest(
    test_split: int,
    files_list: list[str],
    prev_folder: Path,
    next_folder: Path
) -> Union[None, str]:
    """
    Move test files from the prev_folder to the next_folder
    """    
    if files_list is None:
        return "Files list is empty"
        
    for i in range(test_split):
        shutil.move(prev_folder / files_list[i], next_folder)

In [14]:
moveFilesTest(
    test_clean_split,
    clean_files,
    clean_folder,
    test_clean_folder
)

moveFilesTest(
    test_soiled_split,
    soiled_files,
    soiled_folder,
    test_soiled_folder
)

In [15]:
# Visualize the results
print(f"Test clean files: {len(os.listdir(test_clean_folder))}")
print(f"Test soiled files: {len(os.listdir(test_soiled_folder))}")

Test clean files: 102
Test soiled files: 97


In [16]:
from typing import Union

# Move files to train folders
def moveFilesTrain(
    files_list: list[str],
    prev_folder: Path,
    next_folder: Path
) -> Union[None, str]:
    """
    Move train files from the prev_folder to the next_folder
    """    
    if files_list is None:
        return "Files list is empty"

    for file in files_list:
        shutil.move(prev_folder / file, next_folder)

In [17]:
# Get the list of clean and soiled image files
clean_files = os.listdir(clean_folder)
soiled_files = os.listdir(soiled_folder)

moveFilesTrain(
    clean_files,
    clean_folder,
    train_clean_folder
)

moveFilesTrain(
    soiled_files,
    soiled_folder,
    train_soiled_folder
)

In [19]:
# Visualize the results
print(f"Train clean files: {len(os.listdir(train_clean_folder))}")
print(f"Train soiled files: {len(os.listdir(train_soiled_folder))}")

Train clean files: 410
Train soiled files: 390


In [20]:
# Remove the empty folders
shutil.rmtree(clean_folder)
shutil.rmtree(soiled_folder)