In [7]:
import os

#function to rename files
def file_rename():
    """
    Loops over and renames each file in the images directory
    """
    #use counter to count up + 1 for every image
    count = 10
    #for loop that iterates over each image in images
    for image in os.listdir('images'):
       #rename image file to img_(file number)
       
        os.rename(f'./images/{image}', f'./images/img_{count}.jpg')
        count += 1
file_rename()

In [1]:
from exif import Image
from PIL import Image
from PIL.ExifTags import TAGS
from csv import DictWriter
import pandas as pd
import os
import numpy as np

def img_filepaths():
    """
    Returns a list of all the file filepaths in our directory variable.
    """
    directory = "./images"
    # assigns a string of our images directory to a variable
    filepaths = []
    # create an empty list for our filepaths to be added to
    for image in os.listdir(directory):
        # looping over the assigned images directory using os.listdir that lists all files in a directory
        i = os.path.join(directory, image)
        # this adds the images path to the directory, assigning it to "i"
        if os.path.isfile(i):
            # if "i" is a file,
            filepaths.append(i)
            # append its path to the filepaths list
    return filepaths
    # gives back our completed filepaths list 

def dict_convert():
    """
    Returns a list of dictionaries of all of the metadata for a list of items in our file path from img_filepaths().
    """
    dict_list = []
    image_names = img_filepaths()
    # assign image names as our img_filepaths function
    for image in image_names:
    # Loop through each image in our directory
        try:
            image_file = Image.open(image)
            # assign image_file as the image file being open.
            exifdata = image_file.getexif()
            # assign pillow metadata tags onto id fields
            file_dict = {}
            for tag_id in exifdata:
                # loop through the tag_ids (metadata tags)
                tag = TAGS.get(tag_id, tag_id)
                # acquire the tags and convert them into human readable metadata tags
                data = exifdata.get(tag_id)
                # get the value attributed to the tags in the metadata
                if isinstance(data, bytes):
                # check if our data is readable data
                    data = data.decode()
                # if not readable, decode it.
                file_dict[tag] = data
                # create a dictionary key-value pair with {metadata tag: data from photo}
            name = {'File Path': image}
            file_dict.update(name)
            dict_list.append(file_dict)
            # append the dictionary to our empty list "dict_list"
        except:
        # If the file is not readable by our function, instead of raising a value error. Pass it on through and leave it's dictionary empty.
            pass   
    return dict_list
    # returns our list of dictionaries for each photo.

dict_list = dict_convert()
# assign our dict_list to our function(dict_convert()) output
field_names = ['TileWidth', 'TileLength', 'GPSInfo','ResolutionUnit', 'ExifOffset', 'Make', 'Model', 'Software', 'Orientation', 'DateTime', 'XResolution', 'YResolution', 'HostComputer', 'File Path']
# assign our header column names to the second photo in our directory (The first one is Blank [0])
with open("./data/meta_data.csv", 'w',newline='') as csvfile:
# write to meta_data.csv
    writer = DictWriter(csvfile, fieldnames=field_names, extrasaction='ignore')
    # to the csv assign the fieldnames as 'field_names' and ignore any data that doesnt fit in our columns(field_names)
    writer.writeheader()
    # write the header with "field_names"
    writer.writerows(dict_list)
    # write the rows with our dict_list which was the output of our dict_convert() function.

In [3]:
import pandas as pd
import numpy as np
import hashlib
import datetime

#read csv file and export to pandas df
meta_file = './data/meta_data.csv'
meta_df = pd.read_csv(meta_file, header=0)

def md5_hash():
    def calculate_hash_val(path, block_size=''):
        image = open(path, 'rb')
        hasher = hashlib.md5()
        data = image.read()
        while len(data) > 0:
            hasher.update(data)
            data = image.read()
        image.close()
        return hasher.hexdigest()
    #run calculate_hash_val func over file path column and add to df as 'md5 hash'
    meta_df['MD5 Hash'] = meta_df['File Path'].map(calculate_hash_val)
    #drop duplicate columns using Md5 Hash
    meta_df.drop_duplicates(keep='first', subset='MD5 Hash', inplace = True)

md5_hash()

# function that finds rows missing meta data and adds them to new dataframe
def reject_rows():
    reject_df = meta_df[meta_df[['Make', 'Model', 'DateTime']].isna().all(axis=1)]
    #writes reject_df to csv file
    reject_df.to_csv('data/reject.csv', encoding='utf-8', index=False)

reject_rows()

def drop_na():
    meta_df.dropna(axis=0, how='all', subset=['Make', 'Model', 'DateTime'], inplace= True)
    return meta_df

drop_na()

def date_format():
    """
    Performs a mapper using remove_time() to sort all DateTime columns and then write to a new csv file
    """
    def remove_time(value):
        """
        Removes the time from a DateTime column
        """
    # Define inner function to act on the DateTime column
        date = value
        # assign the DateTime column value to date
        try:
            date = datetime.datetime.strptime(str(value), '%Y:%m:%d %H:%M:%S').date()
            # format column date with only the date returned, with hours, minutes, seconds removed
        except:
            pass
        return date
    meta_df["DateTime"] = meta_df["DateTime"].map(remove_time)
    # use .map to call remove_time() on all DateTime columns
    date_sorted_df = meta_df.sort_values(["DateTime"])
    # assign variable to sorted dates from earliest to newest
    date_sorted_df.to_csv("data/sorted.csv", encoding="utf-8", index=False)
    # write sorted rows to a new dataframe
    return date_sorted_df

date_format()
    

Unnamed: 0,TileWidth,TileLength,GPSInfo,ResolutionUnit,ExifOffset,Make,Model,Software,Orientation,DateTime,XResolution,YResolution,HostComputer,File Path,MD5 Hash
65,,,3152.0,2.0,238.0,samsung,SM-G900V,G900VVRU2DQL1,6.0,2018-10-11,72.0,72.0,,./images/weird_tree.jpg,a54ef06d3c8053152829c67619bd8e91
3,,,3152.0,2.0,238.0,samsung,SM-G900V,G900VVRU2DQL1,1.0,2018-10-25,72.0,72.0,,./images/grasshopper2.jpg,dfd9868db12d2991323042b79a2f5cca
24,,,3152.0,2.0,238.0,samsung,SM-G900V,G900VVRU2DQL1,1.0,2019-01-13,72.0,72.0,,./images/staples_center.jpg,f505a5c090af0cd4fc81d0ba5ccfb37b
17,,,3152.0,2.0,238.0,samsung,SM-G900V,G900VVRU2DQL1,1.0,2019-03-23,72.0,72.0,,./images/sumo.jpg,c237c09d1d78631007f261720a3fbdf5
1,,,762.0,2.0,238.0,samsung,SM-G970U,G970USQS2BSIV,6.0,2019-11-16,72.0,72.0,,./images/tj_food.jpg,d1654cb2f80b9d713590ac1e386ea385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,,,,2.0,228.0,samsung,SM-G991U1,G991U1UEU2AUC8,6.0,,72.0,72.0,,./images/20210425_031811.jpg,b6cef40d6710c898883e0ed277e50a05
37,,,,2.0,226.0,samsung,SM-G970U,G970USQU6GUJ3,1.0,,72.0,72.0,,./images/duma.jpg,b4a3450d531a9e37e27592a9811e22a3
69,,,,2.0,170.0,Apple,iPhone XR,13.6.1,,,72.0,72.0,,./images/IMG_5120.jpeg,3d28caed7fa67261973cdfb9f42795df
73,,,,2.0,228.0,samsung,SM-G991U1,G991U1UES5BVA6,1.0,,72.0,72.0,,./images/20220306_134640.jpg,6e339f692ae4df79fbd13b70fec28ff5


In [4]:
import glob, os

def thumbs_n_nails():
    """
    Create a new directory and adds a resized version of each image in the images directory to it
    """
    size = 100, 100
    # define size variable with value 100, 100 to later be used
    os.mkdir('./images/thumbnails')
    # make a new directory in images called thumbnails
    try:
        for in_file in glob.glob("./images/*.jpg"):
            # loop through each file in images with .jpg extension
            new = os.path.split(in_file)
            # access and split the current files filepath to manipulate
            new_filepath = os.path.join(new[0], "thumbnails", "thumbnail_" + new[1])
            # make new filepath
            with Image.open(in_file) as img:
                # open the current file image
                img.thumbnail(size)
                # resize current file image as a thumbnail
                img.save(new_filepath)
                # and save to the new path
    except:
        pass

thumbs_n_nails()