In [None]:
from PIL import Image
from PIL.TiffTags import TAGS
import xml.etree.ElementTree as ET


In [None]:
def extract_creation_date(img_path):
    #open the image file
    with Image.open(img_path) as img:
        # Extract metadata from image using PIL ExifTags 
        # create dictionary mapps tag names to their values 
        meta_dict = {TAGS[key] : img.tag[key] for key in img.tag.keys()}

        # print the metadata dictionary for debugging or insepection 
        print(meta_dict)

    # Extract XML information from the metadata dictionary
    xml_info = meta_dict['ImageDescription'][0]
    
    # Parse the XML using ElementTree
    myroot = ET.fromstring(xml_info)

    # Access the children elements of the XML tree
    children = list(myroot[2])

    # Extract the creation date from the XML tree 
    creation_date = children[0].text

    # Return the creation date 
    return creation_date

In [None]:
import glob
import os 

# Define the specifier pattern to search for files
specifier = "/*Plate_D*d4*"

# Specify the directory path where the files are located 
directory = "/Users/isabelgibson/Desktop/test sets for scripts"

#Use Glob to search for files matching the pattern in the specified directory
image_file_list = glob.glob(directory + specifier) 

In [None]:
# Initialize a dictionary to store extracted data
date_dict = {'filename':[], 'creation_date' : [],'time': [],'time_per_well':[]}

#import necessary libraries 
from datetime import datetime,timedelta

# Loop through each image file in the list:
for img_path in image_file_list:

    # extract creation date from the image file 
    file_creation_date = extract_creation_date(img_path)

    # extract base name of the image file
    base_name  = os.path.basename(img_path)

    # extract time from the creation date string 
    time = file_creation_date[-17:-1:1]
    time = time.replace("T","")

    # extract date from the creation date string 
    date = file_creation_date[:10]
    generic_time = "00:00:00.000000" 

    # Ensure time is formatted properly 
    if time.find(":") == 3: # Check if the row has  3 digits before the first colon    
        time = time[1:] #Shorten the string, only 2 digits before the first colon
    if time.find(":") == 4: #check if row has 4 digits before the first colon
        time = time[2:] #Shorten the string, only 2 digits before the first colon

    #combine date and time into a datetime object
    combined_date_time = datetime.strptime(date[0:11] + " " + time[:14], '%Y-%m-%d %H:%M:%S.%f')

    # Append data to the dictionary
    date_dict['filename'].append(base_name)
    date_dict['time'].append(combined_date_time)
    date_dict["creation_date"].append(date)
    date_dict["time_per_well"].append((generic_time[:14]))

In [None]:
#In this cell: Check that time is formated as expected: HH:MM:SS.ddddd and will convert to datetime object
from datetime import datetime,timedelta
time_stamps = []
filename_timestamp = {} #collects the time difference between first and last frame and the filename of the first image taken

for filename,time in zip(date_dict["filename"],date_dict["time"]):    
    filename_timestamp[filename] = time

#next order the dictionary based on the time data in ascending order  >> will correct later issues 
filename_timestamp_sorted = dict(sorted(filename_timestamp.items(), key = lambda item: item[1]))

In [None]:
counter = 0

# Initialize an empty dictionary to store time differences 
delta_dict = {}

# Iterate over the items(filename,time) in the dictionary "filename_timestamp_sorted"
for filename,time in filename_timestamp_sorted.items():
    # If it's the first iteration (counter == 0), set time of the first frame and name of the first image collected
    if counter == 0:
        time_0 = time # Store the timestamp for the first file
        file = filename # Store the filename of the first timestamp
    
    # If it's the fourth iteration (counter == 3), calculate the time difference between the first and last fourth frame in each well
    if counter == 3:
        time_3 = time # Store the timesamp for the fourth file
        delta = time_3 - time_0 # Calculate the time difference
        delta_dict[file] = delta # store the time difference in the dictionary with the filename of the first image 
        delta_dict[filename] = float('nan') # Set the time difference for the current file to NaN
        counter = 0 # Reset the counter for the next batch of image files

    # for iterations between 1 and 3, increment the counter 
    elif counter != 3:
        counter = counter + 1

        # Set the time difference for the current file to NaN
        delta_dict[filename] = float('nan')


In [None]:
#update the time_per_well list in date_dict using calculate well exposure time from delta_dict
for i, file in enumerate(date_dict["filename"]):
    if file in delta_dict:
        date_dict["time_per_well"][i] = delta_dict[file]

# Combine the lists into tuples
combined = zip(date_dict['filename'], date_dict['creation_date'], date_dict['time'], date_dict['time_per_well'])

# Sort the combined list based on creation_date
sorted_combined = sorted(combined, key=lambda x: x[2])  # x[1] is the creation_date

# Unpack sorted result into separate lists
sorted_filenames, sorted_creation_dates, sorted_times, sorted_time_per_well = zip(*sorted_combined)

# Update the date_dict with sorted lists 
date_dict['filename'] = list(sorted_filenames)
date_dict['creation_date'] = list(sorted_creation_dates)
date_dict['time'] = list(sorted_times)
date_dict['time_per_well'] = list(sorted_time_per_well)

In [None]:
#Initialize lists to store passage number and well coordinates 
pass_num = []
coord = []

# Iterate over the filenames in date_dict
for filename in date_dict["filename"]:

    #Extract passage number from the filenames 
    new_pass = (str(filename[-19:-15])).strip("_p") # Extract passage number from the filename 
    pass_num.append(new_pass) #Append the passage number to the pass_num list
    
    # Extract well coordinates from the filename
    coordinates = str(filename[-12:-6]) # well coordinates from the filename 
    coord.append(coordinates) # Append the coordinates to the coord list 

date_dict["coordinates"] = coord # Add the coord list as "coordinates" in date_dict
date_dict["passage_number"] = pass_num # Add the pass_num list as "passage_number" in date_dict


In [None]:
#import pandas library 
import pandas as pd 

# Create a Dataframe from date_dict
df = pd.DataFrame(date_dict)

# Display the first few rows of the DataFrame 
    #df.head()

# Ask user to input the specifier for the output file 
csvfilename = input("enter the specifier for the ouput file here: ")

# Export the DataFrame to a CSV file 
df.to_csv(csvfilename + "_creation_date.csv",index = False)