In [2]:
import os
import shutil
import re

from tqdm.notebook import tqdm

In [3]:
def getKey(filename):
    # Extract the filename from the full path
    base_filename = os.path.basename(filename)
    
    # Use regular expression to find numerical portion
    matches = re.findall(r'\d+', base_filename)
    
    if matches:
        # Join the matched digits and return as a string
        return ''.join(matches)
    else:
        return None
    
def worldFileDict(source_dir):
    
    out = {}
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.tif', '.png')):
                file = os.path.join(root, file)
                if getKey(file) not in out:
                    out.update({getKey(file) : file})
                
    return out                

def get_world_file(image_path):
    base_path, ext = os.path.splitext(image_path)
    world_extensions = ['.jpw', '.tfw', '.pgw', '.pnw']
    for world_ext in world_extensions:
        world_file = base_path + world_ext
        if os.path.exists(world_file):
            return world_file
    return None

def move_file(image_path, destination_dir):
    file_name = os.path.basename(image_path)
    destination_path = os.path.join(destination_dir, file_name)
    shutil.move(image_path, destination_path)
    print(f"Moved {file_name} to {destination_dir}")
    return destination_path
    

def copy_world_file(source_image_path, destination_image_path):
    source_base_path, source_ext = os.path.splitext(source_image_path)
    destination_base_path, destination_ext = os.path.splitext(destination_image_path)

    world_extensions = {
        '.jpg': '.jpw',
        '.tif': '.tfw',
        '.png': '.pgw'
    }

    source_world_file = source_base_path + world_extensions.get(source_ext.lower())
    destination_world_file = destination_base_path + world_extensions.get(destination_ext.lower())

    if os.path.exists(source_world_file):
        shutil.copy(source_world_file, destination_world_file)
        print(f"Copied world file from {source_world_file} to {destination_world_file}")
    else:
        print("Source image does not have a corresponding world file.")

In [4]:
base_dir = r"C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\"

known_dir = f"{base_dir}Known"
unknown_dir = f"{base_dir}Unknown"
computed_dir = f"{base_dir}Computed"
encoded_dir  = f"{base_dir}Encoded"

known_dict = worldFileDict(known_dir)

In [33]:
image_extensions = ['.jpg', '.jpeg', '.png', '.tif', '.tiff']
    
folder_path = unknown_dir  

for file_name in tqdm(os.listdir(folder_path)):
    file_path = os.path.join(folder_path, file_name)
    if os.path.isfile(file_path) and any(file_name.lower().endswith(ext) for ext in image_extensions):
        key = getKey(file_path)
        if key in known_dict:
            out_image = move_file(file_path, computed_dir)
            copy_world_file(known_dict[key], out_image)
        

  0%|          | 0/694 [00:00<?, ?it/s]

Moved 48039C0010I.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48039C0010K.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48039C0010I.tfw
Moved 48039C0020H.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48039C0020K.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48039C0020H.tfw
Moved 48039C0030I.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48039C0030K.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data

Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0380N.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48201C0380G.tfw
Moved 48201C0380J.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0380N.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48201C0380J.tfw
Moved 48201C0385G.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0385N.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48201C0385G.tfw
Moved 48201C0385J.tif to C:\Users\fhacesga\Desktop\FIRMsDigitiz

Moved 48201C0880J.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0880M.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48201C0880J.tfw
Moved 48201C0880K.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0880M.tfw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed\48201C0880K.tfw
Moved 48201C0885J.tif to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Computed
Copied world file from C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\\Known\AAA_EffectiveDownload\48201C0885N.pgw to C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data

In [None]:
#inst1 = strrep(strcat("for %N in (",visualdir,"Survey_",num2str(surveycount),"*.jpg) DO ",gdalwarp," -of GTiff -co tiled=yes -dstnodata ""0 0 0"" %N  """,visualdir,"%~nN_t.tif"""),"\\","\");
#inst2 = strrep(strcat(gdalvrt," """,path2,"\visual_survey",num2str(surveycount),".vrt"""," """,visualdir,"*.tif""" ),"\\","\");
#inst3 = strrep(strcat(gdaltranslate," """,path2,"\visual_survey",num2str(surveycount),".vrt"""," """,path2,"\visual",num2str(surveycount),".tif"""),"\\","\");

Instruction to encode world file into GEOTIFF

In [16]:
gdal_warp = r'C:\Program Files\QGIS 3.26.3\bin\gdalwarp.exe'
gdal_vrt  = r'C:\Program Files\QGIS 3.26.3\bin\gdalbuildvrt.exe'

gdal_warp_options= '-of GTiff -co tiled=yes -dstnodata "255 255 255" %N'
output_gdal_warp =  f'"{encoded_dir}\\%~nN_t.tif"'

inst1 = f'for %N in ("{computed_dir}\\*.tif") DO "{gdal_warp}" {gdal_warp_options} {output_gdal_warp}'

inst1 = inst1.replace("\\\\", '\\')

print(inst1)

for %N in ("C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\Computed\*.tif") DO "C:\Program Files\QGIS 3.26.3\bin\gdalwarp.exe" -of GTiff -co tiled=yes -dstnodata "255 255 255" %N "C:\Users\fhacesga\Desktop\FIRMsDigitizing\RECTDNN\data\000_WorkingFiles\Encoded\%~nN_t.tif"


In [15]:
import os
import pandas as pd



In [14]:
data


[['48029C0265G.tfw',
  '2.500203185928199',
  '0.00014730100984926576',
  '-8.7495621604628313e-005',
  '-2.5001435976703577',
  '2119434.4091506097',
  '13754026.194645211'],
 ['48029C0313F.pgw',
  '0.380996620139209540',
  '0.00000676529918001116920',
  '0.0000617134237794037370',
  '-0.380972205525804960',
  '559343.17084432940',
  '3267078.8855543220'],
 ['48029C0385G.tfw',
  '2.4997255470471984',
  '-0.00014038076871707115',
  '-0.00014038076899527903',
  '-2.4997255470472983',
  '2099650.0649210545',
  '13731209.133634955'],
 ['48029C0405G.tfw',
  '2.5001381289058999',
  '9.7606220068754967e-005',
  '-0.00022486753039311193',
  '-2.5001248194075005',
  '2119537.9493601881',
  '13731300.55342673'],
 ['48029C0410G.tfw',
  '2.499803464109883',
  '0.00046819315721959243',
  '0.00046819315679337797',
  '-2.499803464110411',
  '2139484.6473782375',
  '13731502.469413478'],
 ['48029C0440G.tfw',
  '2.5005257114978483',
  '-8.0202076464523504e-005',
  '-9.9699683072433504e-005',
  '-2.500