# Measuring Folders Size for all projects

In [4]:
def create_proj_dict(src_drive, path_pix4d_gnrl, field_id, flight_type):
    """
    Creates a dictionary of project paths for Pix4D projects, verifying whether each project folder exists.
    
    This function constructs a dictionary with project names as keys and their corresponding paths as values,
    based on the folder structure in the source directory. It also adds a path for the report folder if it exists.
    
    Parameters:
    - src_drive (str): The root drive path where the projects are stored.
    - path_pix4d_gnrl (str): The general folder path for the Pix4D projects.
    - field_id (str): The field ID used to identify the specific folder containing the projects.
    - flight_type (str): The type of flight (e.g., "drone", "aerial") used to further narrow down the project folder.
    
    Returns:
    - proj_dict (dict): A dictionary where keys are project names (starting with "2024") and values are the full
                         paths to the respective project folders. It also includes paths to report folders if available.
    - pix4d_path_src (str): The full path to the source Pix4D folder for reference.
    """
    # Print the status to indicate which field and flight type the function is processing.
    print(f"Preparing proj_dict for {field_id} flight type {flight_type}")
    
    # Generate the full source folder path for the Pix4D project files.
    pix4d_path_src = os.path.join(src_drive, path_pix4d_gnrl, field_id, flight_type)
    
    # List all folders in the main project directory
    folders = [name for name in os.listdir(pix4d_path_src) if os.path.isdir(os.path.join(pix4d_path_src, name))]

    # Filter the list of project folders to only include those starting with '2024'
    projects = [proj for proj in folders if proj[:4] == "2024"]
    
    # Initialize an empty dictionary to store project names and paths
    proj_dict = {}
    
    # Loop through each project folder, verifying that it exists before adding to the dictionary
    for proj in projects:
        path_proj = os.path.join(pix4d_path_src, proj)
        
        # Check if the project folder exists
        if os.path.exists(path_proj):
            proj_dict[proj] = [path_proj]  # Add the project path to the dictionary
        else:
            print(f"Path does not exist: {path_proj}")  # Print a message if the project folder is missing
    
    # For each project, if the path of report folder path does not exist in the dictionary, add it
    # Not using this path at the  moment. Just there in case.
    for proj_name in proj_dict:
        if len(proj_dict[proj_name]) == 1:  # If only the project folder is in the list
            report_path = os.path.join(proj_dict[proj_name][0], "1_initial", "report")
            if os.path.exists(report_path):
                proj_dict[proj_name].append(report_path)  # Add the report folder path to the dictionary
    
    # Return the project dictionary and the general source path for Pix4D projects
    return proj_dict, pix4d_path_src

In [5]:
import os

def get_folder_size(folder_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size

def measure_folders_size(folders_dict):
    folder_sizes = {}
    for folder_name, folder_info in folders_dict.items():
        folder_path = folder_info[0]
        size_in_bytes = get_folder_size(folder_path)
        size_in_gb = size_in_bytes / (1024 ** 3)  # Convert bytes to GB
        folder_sizes[folder_name] = size_in_gb
        print(folder_name, ": ", size_in_gb, "GB")
    return folder_sizes

# # Example usage
# folders_dict = {
#     "Documents": ["/path/to/documents"],
#     "Pictures": ["/path/to/pictures"],
#     "Music": ["/path/to/music"]
# }

# folder_sizes = measure_folders_size(folders_dict)
# import pprint

# # Prints the nicely formatted dictionary
# pprint.pprint(folder_sizes)

In [6]:
import os
# Counting with a list of extensinos

def count_files_with_extensions(folder_path, extensions):
    counts = {ext: 0 for ext in extensions}
    total_count = 0
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for f in filenames:
            for ext in extensions:
                if f.lower().endswith(ext.lower()):
                    counts[ext] += 1
                    total_count += 1
    counts["total"] = total_count
    return counts

def count_files_in_folders(folders_dict, extensions):
    file_counts = {}
    for folder_name, folder_info in folders_dict.items():
        folder_path = folder_info[0]
        counts = count_files_with_extensions(folder_path, extensions)
        file_counts[folder_name] = counts
        print(folder_name, ": ", file_counts[folder_name], "Images")
    return file_counts

# # Example usage
# folders_dict = {
#     "Documents": ["/path/to/documents"],
#     "Pictures": ["/path/to/pictures"],
#     "Music": ["/path/to/music"]
# }

# extensions = [".txt", ".pdf", ".docx"]
# file_counts = count_files_in_folders(folders_dict, extensions)
# print(file_counts)

In [7]:
# from storage_functions import *
# from Pix4D_cleaning_fucnt import *
import os
import pandas as pd
import itertools

# Import all CSV Logs
# Loading and preprocessing datasets
df_flight_log = pd.read_csv("P:\\PhenoCrop\\0_csv\\flight_log.csv")
df_flight_routes = pd.read_csv("P:\\PhenoCrop\\0_csv\\flight_routes.csv")
df_fields = pd.read_csv("P:\\PhenoCrop\\0_csv\\fields.csv")
df_processing_status = pd.read_csv("P:\\PhenoCrop\\0_csv\\processing_status.csv")


# field_ids = df_fields["Field ID"].tolist()
flight_types =  sorted(list(set(df_flight_routes["BaseType"].dropna())))
field_ids = ['PRO_BAR_VOLL', 'DIVERSITY_OATS', 'OAT_FRONTIERS', 'PILOT', 'E166', 'PRO_BAR_SØRÅS', 'PHENO_CROP']
field_ids, flight_types

src_drive = r"P:\\"
path_pix4d_gnrl = r"PhenoCrop\2_pix4d"

# Generate a dictionary with all combinations
field_data_combinations = {f"{key}_{value}": (key, value) for key, value in itertools.product(field_ids, flight_types)}

for key, combination  in field_data_combinations.items():
    field_id = combination[0]
    flight_type = combination[1]
    print(f"""
        Begining of evaluating new field
        Source Drive: {src_drive}
        Pix4D General Path: {path_pix4d_gnrl}
        Field ID: {field_id}
        Flight Type: {flight_type}
        """)
    
    proj_dict, pix4d_path_src = create_proj_dict(src_drive, path_pix4d_gnrl, field_id, flight_type)
    
    if proj_dict:
        data = "pix4d"
        folder_sizes = measure_folders_size(proj_dict)
        append_dict_to_csv(folder_sizes, field_id+flight_type, file_name='3_Pix4d_Size.csv')

        # Getting size of flights
        data = "flights"
        updated_proj_dict = {k: [v[0].replace("2_pix4d", "1_flights")] for k, v in proj_dict.items()}
        folder_sizes_flights = measure_folders_size(updated_proj_dict)
        append_dict_to_csv(folder_sizes_flights, field_id+"_"+flight_type+"_"+data, file_name='2_Flights_Size.csv')

        # Getting number of images in each flight
        data = "number-of-images"
        extension = [".jpg", ".tif"]
        file_counts = count_files_in_folders(updated_proj_dict, extension)
        append_dict_to_csv(file_counts, field_id+"_"+flight_type+"_"+data, file_name='1_Counting_of_Images.csv')
        
    print(f"""
        Evaluation complete!
        Source Drive: {src_drive}
        Field ID: {field_id}
        Flight Type: {flight_type}
        """)


        Begining of processing new field
        Source Drive: P:\\
        Pix4D General Path: PhenoCrop\2_pix4d
        Field ID: PRO_BAR_VOLL
        Flight Type: 3D
        
Preparing proj_dict for PRO_BAR_VOLL flight type 3D
20240521 PRO_BAR_VOLL M3M 20m 3D 80 85 :  11.543089693412185 GB


KeyboardInterrupt: 

In [8]:
proj_dict 

{'20240521 PRO_BAR_VOLL M3M 20m 3D 80 85': ['P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240521 PRO_BAR_VOLL M3M 20m 3D 80 85',
  'P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240521 PRO_BAR_VOLL M3M 20m 3D 80 85\\1_initial\\report'],
 '20240527 PRO_BAR_VOLL M3M 20m 3D 80 85': ['P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240527 PRO_BAR_VOLL M3M 20m 3D 80 85',
  'P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240527 PRO_BAR_VOLL M3M 20m 3D 80 85\\1_initial\\report'],
 '20240531 PRO_BAR_VOLL M3M 20m 3D 80 85': ['P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240531 PRO_BAR_VOLL M3M 20m 3D 80 85',
  'P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240531 PRO_BAR_VOLL M3M 20m 3D 80 85\\1_initial\\report'],
 '20240607 PRO_BAR_VOLL M3M 20m 3D 80 85': ['P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240607 PRO_BAR_VOLL M3M 20m 3D 80 85',
  'P:\\\\PhenoCrop\\2_pix4d\\PRO_BAR_VOLL\\3D\\20240607 PRO_BAR_VOLL M3M 20m 3D 80 85\\1_initial\\report'],
 '20240612 PRO_BAR_VOLL M3M 20m 3D 80 85': ['P:\

# Old backup

In [None]:
import subprocess
import os
import sys
import glob
import streamlit as st


# Function to open the folder in the file explorer
def open_folder(path):
    if sys.platform == 'win32':
        os.startfile(path)
    elif sys.platform == 'darwin':
        subprocess.Popen(['open', path])
    else:
        subprocess.Popen(['xdg-open', path])


# Function to find the tif files in a given folder
def find_files_in_folder(folder_path, file_ext):
    tif_files = []
    tif_files.extend(glob.glob(os.path.join(folder_path, rf"*.{file_ext}"), recursive=False))

    if tif_files == []:
        tif_files = [""]
    return tif_files


# Function to find the tif files in a given folder's subfolders (but not their subfolders)
def find_tif_files_in_subfolders(folder_path):
    tif_files = []
    tif_folders = []
    if os.path.isdir(folder_path):
        tif_folders = os.listdir(folder_path)
        # List all items in the given folder_path
        for item in os.listdir(folder_path):
            # Construct full path
            subdir_path = rf"{folder_path}\{item}"

            # Check if the item is a directory
            if os.path.isdir(subdir_path):
                # Look for .tif files in the current subdirectory
                found_tifs = glob.glob(os.path.join(subdir_path, "*.tif"))
                tif_files.extend(found_tifs)

    # Return list of .tif files found, or [""] if none were found
    if tif_files == []:
        tif_files = [""]
    return tif_files, tif_folders