## Split Images into Folders for Git Push

Our original dataset included 10,015 images in one folder, which was ~3GB. This was too large for a single git push, so we wrote this code to split the images into two folders, which we were able to push to GitHub one at a time. 

In [None]:
import os
import shutil

source_folder = "Resources/Skin Cancer/Skin Cancer"
target_folder = "Resources/Skin Cancer/Split"
files_per_folder = 5100

def split_files_into_folders(source_folder, target_folder, files_per_folder):
    """
    Split files from a source folder into multiple folders, each containing a specified number of files.

    Args:
    - source_folder (str): Path to the source folder containing the files to be split.
    - target_folder (str): Path to the target folder where the split folders will be created.
    - files_per_folder (int): Number of files to be placed in each split folder.

    Returns:
    - None
    """
    # Create target folder if it doesn't exist
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Iterate over files in source folder
    files = os.listdir(source_folder)
    total_files = len(files)
    num_folders = (total_files + files_per_folder - 1) // files_per_folder

    # Split files into folders
    for i in range(num_folders):
        start_index = i * files_per_folder
        end_index = min((i + 1) * files_per_folder, total_files)
        folder_name = os.path.join(target_folder, f'folder_{i+1}')
        os.makedirs(folder_name)

        # Copy files to the new folder
        for file_name in files[start_index:end_index]:
            source_path = os.path.join(source_folder, file_name)
            target_path = os.path.join(folder_name, file_name)
            shutil.copyfile(source_path, target_path)

        print(f"Created folder '{folder_name}' with {end_index - start_index} files.")


split_files_into_folders(source_folder, target_folder, files_per_folder)