In [10]:
import os
import datetime
import exiftool
import pandas as pd
from datetime import timedelta
import numpy as np
import shutil

def list_files_in_directory(directory):
    file_paths = []  # To store the full paths of files

    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)
    return file_paths

####Function to read Date and Time data from Exif and use it to make new filename
def create_new_filenames(image_dir):
    file_paths=list_files_in_directory(image_dir)
    with exiftool.ExifToolHelper() as et:
        exif = et.get_metadata(file_paths)  
    exif_info=pd.DataFrame.from_dict(exif)
    
    exif_info['Station']=Station
    exif_info['Camera']=Camera
    exif_info['FormattedDateTime'] = exif_info['EXIF:DateTimeOriginal'].apply(convert_datetime)
    exif_info['EXIF:DateTimeOriginal'] = pd.to_datetime(exif_info['EXIF:DateTimeOriginal'],format='%Y:%m:%d %H:%M:%S')
    exif_info = exif_info.sort_values(by=['Station','Camera', 'EXIF:DateTimeOriginal']).reset_index()
    exif_info['diff'] = exif_info.groupby(['Station','Camera'])['EXIF:DateTimeOriginal'].diff()

    columns_to_keep = ['SourceFile','File:FileName',
                       'File:Directory','FormattedDateTime',
                       'File:FileTypeExtension', 'diff',
                        'EXIF:Make', 'EXIF:Model',
                       'EXIF:DateTimeOriginal','Station','Camera']
    # Create a new DataFrame with only the specified columns
    exif_info = exif_info.loc[:, columns_to_keep]
    exif_info.columns = [col.split(':')[1] if ':' in col else col for col in exif_info.columns]

    threshold = timedelta(seconds=1)
    Sequence= []
    for i in range(len(exif_info)):
        diff = exif_info['diff'][i]
        if pd.isna(diff) or diff > threshold  :
            sequence = 1
        else:
            sequence=Sequence[i-1] + 1
        Sequence.append(sequence)
    exif_info['Sequence'] = Sequence

    # Apply the function to the 'DateTimeOriginal' column
    exif_info['FileNameNew'] = exif_info['Station'] + '_' + exif_info['Camera'] + '_' + exif_info['FormattedDateTime'] + '(' + exif_info['Sequence'].astype(str) + ')' + '.' + exif_info['FileTypeExtension']
    exif_info['Directory'] = exif_info['Directory'].apply(clean_path)
    copy_dir=[]
    for i in range(len(exif_info)):
        d=exif_info['Directory'][i]
        c=os.path.join(dest_drive,"\\".join(d.split("\\")[1:]))
        copy_dir.append(c)
    exif_info['copy_dir']=copy_dir
    exif_info['SourceFile'] = exif_info['SourceFile'].apply(clean_path)
    exif_info['SourceFileNew'] = exif_info.apply(lambda row: os.path.join(row['Directory'], row['FileNameNew']), axis=1)
    exif_info['SourceFileNew_copy'] = exif_info.apply(lambda row: os.path.join(row['copy_dir'], row['FileNameNew']), axis=1)
    
    return exif_info


def copy_images(table):
    source_path = table['SourceFile']
    target_path = table['SourceFileNew_copy']
    shutil.copy(source_path, target_path)

import shutil
import datetime
from concurrent.futures import ThreadPoolExecutor

def copy_files(src='tmp', dest='tmp2'):
    # create full paths for all files we wish to copy
    files = list_files_in_directory(src)
    # create the thread pool
    for path in files:
        with ThreadPoolExecutor(10) as exe:
            # submit all copy tasks
            _ = [exe.submit(shutil.copy, path, dest)]

def copy_images(table):
    files = table['SourceFileNew']
    with ThreadPoolExecutor(10) as exe:
        _ = [exe.submit(shutil.copy, path, dest_drive) for path in files]

In [None]:
# Path to the exiftool executable (change this to match your system)
exiftool_path = r"C:\Windows\exiftool.exe"

In [18]:
camera_dir=r"G:\Guzzler_data\2023\CameraTrap\SudasariACD\A"
start = datetime.datetime.now()
renaming_table = create_new_filenames(camera_dir)
end= datetime.datetime.now()
print(end-start)

0:12:36.417687


In [None]:
def delete_images_batch(src_list, batch_size=512):
    src_files = src_list
    with concurrent.futures.ProcessPoolExecutor() as exe:
        batch_tasks = []
        for i in range(0, len(src_files), batch_size):
            src_batch = src_files[i:i + batch_size]
            batch_tasks.extend([exe.submit(os.remove, src) for src in src_batch])
            # Wait for all tasks in the batch to complete
            _ = [task.result() for task in batch_tasks]
            # print(f"First {i + 1 * 1000} images deleted at {datetime.now()}")

    return

def copy_images_batch(src_list, dest_list, batch_size=512):
    src_files=set(src_list)
    dest_files=dest_list
    with concurrent.futures.ThreadPoolExecutor(20) as exe:
        batch_tasks = []
        for i in range(0, len(src_files), batch_size):
            src_batch = src_files[i:i + batch_size]
            dest_batch = dest_files[i:i + batch_size]
            batch_tasks.extend([exe.submit(shutil.move, src, dest) for src, dest in zip(src_batch, dest_batch)])
            # Wait for all tasks in the batch to complete before proceeding to the next batch
            _ = [task.result() for task in batch_tasks]

    return
            
def move_images_batch(src_list, dest_list, batch_size=1024):
    src_files = src_list
    dest_files = dest_list
    unique_src_list = list(set(src_list) - set(dest_list))
    with concurrent.futures.ThreadPoolExecutor() as exe:
        # Copy images to the destination directory
        batch_tasks_copy = []
        for i in range(0, len(src_files), batch_size):
            src_batch = src_files[i:i + batch_size]
            dest_batch = dest_files[i:i + batch_size]
            batch_tasks_copy.extend([exe.submit(shutil.copy, src, dest) for src, dest in zip(src_batch, dest_batch)])
        # Wait for all copy tasks in the batch to complete
        _ = [task.result() for task in batch_tasks_copy]

        # Now, remove the source images
        batch_tasks_remove = []
        for i in range(0, len(unique_src_files), batch_size):
            src_batch = unique_src_files[i:i + batch_size]
            batch_tasks_remove.extend([exe.submit(os.remove, src) for src in src_batch])
        # Wait for all remove tasks in the batch to complete
        _ = [task.result() for task in batch_tasks_remove]

        print(f"First {len(src_files)} images copied and removed at {datetime.now()}")

In [1]:
import datetime
import pandas as pd
import exifread
import os
from datetime import timedelta
import shutil
from concurrent.futures import ThreadPoolExecutor

def list_files_in_directory(directory):
    file_paths = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg')):
                file_path = os.path.join(root, file)
                file_paths.append(file_path)
            else:
                continue
    return file_paths

def convert_datetime(dt):
    try:
        dt = pd.to_datetime(dt, format='%Y-%m-%d %H-%M-%S')
        return dt.strftime('%Y%m%d_%H%M%S')
    except pd.errors.OutOfBoundsDatetime:
        return 'OutOfBoundsDatetime'

def clean_path(path):
    return os.path.normpath(path)

def process_image(image_path):
    with open(image_path, 'rb') as image_file:
        tags = exifread.process_file(image_file, details=False)
        
        directory = os.path.dirname(image_path)
        filename = os.path.basename(image_path)
        filetype_extension = os.path.splitext(filename)[1]
        make = tags.get('Image Make', 'N/A')
        model = tags.get('Image Model', 'N/A')
        datetime_original = tags.get('EXIF DateTimeOriginal', 'N/A')
        
        return {
            'SourceFile': image_path,
            'Directory': directory,
            'FileName': filename,
            'FileTypeExtension': filetype_extension,
            'Make': make,
            'Model': model,
            'DateTimeOriginal': datetime_original
        }

def read_exif(image_dir):
    file_paths = list_files_in_directory(image_dir)
    print(len(file_paths))
    with ThreadPoolExecutor(20) as executor:  # Adjust max_workers as needed
        image_metadata_list = list(executor.map(process_image, file_paths))
    exif_info = pd.DataFrame(image_metadata_list)
    return exif_info

def create_new_filenames(exif_info):
    exif_info['Station'] = Station
    exif_info['Camera'] = Camera
    exif_info['DateTimeOriginal'] = pd.to_datetime(exif_info['DateTimeOriginal'], format='%Y:%m:%d %H:%M:%S')
    exif_info['FormattedDateTime'] = exif_info['DateTimeOriginal'].apply(convert_datetime)
    exif_info = exif_info.sort_values(by=['Station', 'Camera', 'DateTimeOriginal']).reset_index(drop=True)
    exif_info['diff'] = exif_info.groupby(['Station', 'Camera'])['DateTimeOriginal'].diff()

    ### Add sequence number
    threshold = timedelta(seconds=1)
    Sequence = []
    for i in range(len(exif_info)):
        diff = exif_info['diff'][i]
        if pd.isna(diff) or diff > threshold:
            sequence = 1
        else:
            sequence = Sequence[i - 1] + 1
        Sequence.append(sequence)
    exif_info['Sequence'] = Sequence

    ### Construct new filename
    exif_info['FileNameNew'] = exif_info['Station'] + '_' + exif_info['Camera'] + '_' + exif_info['FormattedDateTime'] + '(' + exif_info['Sequence'].astype(str) + ')' + exif_info['FileTypeExtension']
    exif_info['Directory'] = exif_info['Directory'].apply(clean_path)
    exif_info['SourceFile'] = exif_info['SourceFile'].apply(clean_path)
    exif_info['DestFile'] = (dest_dir + "\\" + exif_info['FileNameNew']).apply(clean_path)
    
    #copy_dir = []
    #for i in range(len(exif_info)):
        #d = exif_info['Directory'][i]
        #c = os.path.join(dest_drive, "\\".join(d.split("\\")[1:]))
        #copy_dir.append(c)

    #exif_info['copy_dir'] = copy_dir
    #exif_info['SourceFileNew_copy'] = exif_info.apply(lambda row: os.path.join(row['copy_dir'], row['FileNameNew']), axis=1)
    return exif_info

def rename_images(table):
    source_path = table['SourceFile']
    target_path = table['DestFile']
    os.rename(source_path, target_path)

def copy_images(table):
    src_files = table['SourceFile']
    dest_files = table['DestFile']
    with ThreadPoolExecutor(10) as exe:
        _ = [exe.submit(shutil.copy, src_path, dest_path) for src_path,dest_path in zip(src_files,dest_files)]

In [None]:
def move_images_batch2(src_list, dest_list, batch_size=512):
    src_files=src_list
    dest_files=dest_list
    with concurrent.futures.ProcessPoolExecutor() as exe:
        batch_tasks = []
        for i in tqdm(range(0, len(src_files), batch_size)):
            src_batch = src_files[i:i + batch_size]
            dest_batch = dest_files[i:i + batch_size]
            
            batch_tasks.extend([exe.submit(shutil.move, src, os.path.dirname(dest)) for src, dest in zip(src_batch, dest_batch)])
            # Wait for all tasks in the batch to complete before proceeding to the next batch
            _ = [task.result() for task in batch_tasks]
    return

In [None]:
##old (working, but from direct CSV path)
animal_list = ["GIB"]
for df_dir in df_dirs:
    print(df_dir)
    parent_path = os.path.dirname(df_dir)
    df = pd.read_csv(df_dir)
    for animal in animal_list:
        animal_folder = os.path.join(parent_path, animal)
        # print(animal_folder)
        if not os.path.exists(animal_folder):
            os.makedirs(animal_folder)
        
        for item, row in df.iterrows():
            file_name = f"{row['Filename']}.jpg"
            img_path = os.path.join(parent_path, file_name)
            # file_dir = row["File_directory"]
            pred_class = row["Order_pred"]
            new_path = os.path.join(parent_path, pred_class, file_name)
            # print(img_path)
            # print(new_path)
            if pred_class == animal:  
                print(file_name)
                break
        break
    break
          