In [1]:
import os
import shutil
from pathlib import Path, PosixPath
from datetime import datetime, timedelta

# setup logging to file reorg_log.txt
import logging


# create logger with 'spam_application'
logger = logging.getLogger('reorg')
logger.setLevel(logging.INFO)
# format
formatter = logging.Formatter('%(levelname)s: %(message)s')
# create file handler which logs even debug messages
fh = logging.FileHandler('reorg_log.txt')
fh.setFormatter(formatter)
fh.setLevel(logging.INFO)
logger.addHandler(fh)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setFormatter(formatter)
ch.setLevel(logging.INFO)
logger.addHandler(ch)

RUN = True


def create_directory_structure(base_dir: Path, date: str) -> Path:
    """
    Create the directory structure for the given date.
    """
    day_dir = base_dir / date
    if not day_dir.exists():
        # pass
        if RUN:
            day_dir.mkdir(parents=True, exist_ok=False)
        logger.info(f"Created directory {day_dir}")
        # make subdirectories as well
    if RUN:
        subdirs = ['subsetted_netcdf','cloud_images/resized_images','images/resized_images']
        for subdir in subdirs:
            subdir_path = day_dir / subdir
            if not subdir_path.exists():
                logger.info(f"Created directory {subdir_path}")
                subdir_path.mkdir(parents=True, exist_ok=False)
    
    return day_dir

def move_files_to_day_directory(base_dir: Path, in_dirs: list[Path], file_pattern: str, parser) -> None:
    """
    Move files matching the file_pattern to their respective day directories.
    """
    count = 0
    for data_dir in in_dirs:
        for file_path in data_dir.glob(file_pattern):
            if file_path.is_file():
                date = parser(file_path.name)
                day_dir = create_directory_structure(base_dir, date)
                new_path = day_dir / Path(*list(file_path.parts)[1:])
                
                if not new_path.exists():
                    # check the directory exists
                    if new_path.parent.exists():
                        logger.info(f"Moving {file_path} to {new_path}")
                        if RUN:
                            shutil.move(str(file_path), str(new_path))
                    else:
                        if RUN:
                            raise FileNotFoundError(f"Directory {new_path.parent} does not exist.")
                        else:
                            logger.error(f"Directory {new_path.parent} does not exist.")
                        
                else:
                    pass
                    # logger.error(f"File already exists: original: {file_path} new: {new_path}")
                    # print(f"File already exists:\n\t original: {file_path}\n\t new: {new_path}")
                
                count += 1
    print(f"Moved {count} files.")



In [2]:
exclude_dirs = [PosixPath('sep_23'),
 PosixPath('nov_17'),
 PosixPath('nov_25k'),
 PosixPath('nov_19'),
 PosixPath('nov_14b'),
 PosixPath('jul_10a')]

In [3]:
valid_directories =  list(set(d.parent.parent for d in  Path("./").glob('dec_*/subsetted_netcdf/*.nc')) - set(exclude_dirs))
list(valid_directories)

[PosixPath('dec_20'),
 PosixPath('dec_23'),
 PosixPath('dec_22'),
 PosixPath('dec_22a')]

In [4]:

# ['subsetted_netcdf','cloud_images/resized_images','images/resized_images']
base_dir = Path("./")
netcdf_pattern = "./subsetted_netcdf/*.nc"
image_pattern = "./images/*.png"
image_resized_pattern = "./images/resized_images/*.png"
cloud_image_pattern = "./cloud_images/*.png"
cloud_image_resized_pattern = "./cloud_images/resized_images/*.png"
valid_directories = list(set(d.parent.parent for d in  Path("./").glob('dec_*/subsetted_netcdf/*.nc')) - set(exclude_dirs))
# valid_directories = [Path("may_01_onward")]

def netcdf_parser(filename: str) -> str:
    date_str = filename.split('_')[-2]
    return (datetime.strptime(date_str, "%Y%m%dT%H%M%SZ") - timedelta(hours=5)).strftime('%Y.%m.%d')
# Move NetCDF files
move_files_to_day_directory(base_dir, valid_directories, netcdf_pattern, parser=netcdf_parser)

def image_parser(filename: str) -> str:
    date_str = filename.split('_')[-1].split('.')[0]
    return (datetime.strptime(date_str, "%Y-%m-%dT%Hh%Mm")- timedelta(hours=5)).strftime('%Y.%m.%d')
# Move image files
move_files_to_day_directory(base_dir, valid_directories, image_pattern, parser=image_parser)
move_files_to_day_directory(base_dir, valid_directories, image_resized_pattern, parser=image_parser)
move_files_to_day_directory(base_dir, valid_directories, cloud_image_pattern, parser=image_parser)
move_files_to_day_directory(base_dir, valid_directories, cloud_image_resized_pattern, parser=image_parser)


INFO: Created directory 2024.12.22
INFO: Created directory 2024.12.22/subsetted_netcdf
INFO: Created directory 2024.12.22/cloud_images/resized_images
INFO: Created directory 2024.12.22/images/resized_images
INFO: Moving dec_23/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T204328Z_S011.nc to 2024.12.22/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T204328Z_S011.nc
INFO: Moving dec_23/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T214328Z_S012.nc to 2024.12.22/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T214328Z_S012.nc
INFO: Moving dec_23/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T222336Z_S013.nc to 2024.12.22/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T222336Z_S013.nc
INFO: Moving dec_23/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T230344Z_S014.nc to 2024.12.22/subsetted_netcdf/TEMPO_NO2_L3_V03_20241222T230344Z_S014.nc
INFO: Created directory 2024.12.23
INFO: Created directory 2024.12.23/subsetted_netcdf
INFO: Created directory 2024.12.23/cloud_images/resized_images
INFO: Created directory 2024.12.23/

Moved 47 files.
Moved 47 files.
Moved 47 files.
Moved 47 files.
Moved 47 files.


[PosixPath('dec_19'), PosixPath('dec_21'), PosixPath('dec_20')]

In [23]:
valid_directories.exists()

False

In [103]:
d = [] 
[d.extend(i.glob(cloud_image_resized_pattern)) for i in valid_directories];

for i in d:
    try:
        image_parser(i.name)
    except:
        print(i)

In [None]:
logging.wr