In [None]:
import os

#function to rename files
def file_rename():
    #use counter to count up + 1 for every image
    count = 10
    #for loop that iterates over each image in images
    for image in os.listdir('images'):
       #rename image file to img_(file number)
       
        os.rename(f'./images/{image}', f'./images/img_{count}.jpg')
        count += 1

In [None]:
from exif import Image
from PIL import Image
from PIL.ExifTags import TAGS
from csv import DictWriter
import pandas as pd
import os
import numpy as np

def img_filepaths():
    """
    Returns a list of all the file filepaths in our directory variable.
    """
    directory = "./images"
    filepaths = []
    for image in os.listdir(directory):
        i = os.path.join(directory, image)
        if os.path.isfile(i):
            filepaths.append(i)
    return filepaths

def dict_convert():
    """
    Returns a list of dictionaries of all of the metadata for a list of items in our file path from img_filepaths().
    """
    dict_list = []
    image_names = img_filepaths()
    # assign image names as our img_filepaths function
    for image in image_names:
    # Loop through each image in our directory
        try:
            image_file = Image.open(image)
            # assign image_file as the image file being open.
            exifdata = image_file.getexif()
            # assign pillow metadata tags onto id fields
            file_dict = {}
            for tag_id in exifdata:
                # loop through the tag_ids (metadata tags)
                tag = TAGS.get(tag_id, tag_id)
                # acquire the tags and convert them into human readable metadata tags
                data = exifdata.get(tag_id)
                # get the value attributed to the tags in the metadata
                if isinstance(data, bytes):
                # check if our data is readable data
                    data = data.decode()
                # if not readable, decode it.
                file_dict[tag] = data
                # create a dictionary key-value pair with {metadata tag: data from photo}
            name = {'File Path': image}
            file_dict.update(name)
            dict_list.append(file_dict)
            # append the dictionary to our empty list "dict_list"
        except:
        # If the file is not readable by our function, instead of raising a value error. Pass it on through and leave it's dictionary empty.
            pass   
    return dict_list
    # returns our list of dictionaries for each photo.

print(dict_list)

dict_list = dict_convert()
# assign our dict_list to our function(dict_convert()) output
field_names = ['TileWidth', 'TileLength', 'GPSInfo','ResolutionUnit', 'ExifOffset', 'Make', 'Model', 'Software', 'Orientation', 'DateTime', 'XResolution', 'YResolution', 'HostComputer', 'File Path']
# assign our header column names to the second photo in our directory (The first one is Blank [0])
print(field_names)
with open("./data/meta_data.csv", 'w',newline='') as csvfile:
# write to meta_data.csv
    writer = DictWriter(csvfile, fieldnames=field_names, extrasaction='ignore')
    # to the csv assign the fieldnames as 'field_names' and ignore any data that doesnt fit in our columns(field_names)
    writer.writeheader()
    # write the header with "field_names"
    writer.writerows(dict_list)
    # write the rows with our dict_list which was the output of our dict_convert() function.

In [None]:
import datetime

def date_format():
    metadata_file = "./data/meta_data.csv"
    metadata = pd.read_csv(metadata_file, header=0)
    def remove_time(value):
        date = value
        try:
            date = datetime.datetime.strptime(str(value), '%Y:%m:%d %H:%M:%S').date()
        except:
            pass
        return date
    metadata["DateTime"] = metadata["DateTime"].map(remove_time)
    date_sorted = metadata.sort_values(["DateTime"])
    return date_sorted

In [None]:
import glob, os

def thumbs_n_nails():
    size = 100, 100
    for in_file in glob.glob("./images/*.jpg"):
        new = os.path.split(in_file)
        new_filepath = os.path.join(new[0], "thumbnails", "thumbnail_" + new[1])
        with Image.open(in_file) as img:
            img.thumbnail(size)
            img.save(new_filepath)

In [None]:
import pandas as pd
import numpy as np
import hashlib

#read csv file and export to pandas df
meta_file = './data/meta_data.csv'
meta_df = pd.read_csv(meta_file, header=0)

def md5_hash():
    def calculate_hash_val(path, block_size=''):
        image = open(path, 'rb')
        hasher = hashlib.md5()
        data = image.read()
        while len(data) > 0:
            hasher.update(data)
            data = image.read()
        image.close()
        return hasher.hexdigest()
    #run calculate_hash_val func over file path column and add to df as 'md5 hash'
    meta_df['MD5 Hash'] = meta_df['File Path'].map(calculate_hash_val)
    #drop duplicate columns using Md5 Hash
    meta_df.drop_duplicates(keep='first', subset='MD5 Hash', inplace = True)

In [None]:
# function that finds rows missing meta data and adds them to new dataframe
def reject_rows():
    reject_df = meta_df[meta_df[['Make', 'Model', 'DateTime']].isna().all(axis=1)]
    #writes reject_df to csv file
    reject_df.to_csv('data/reject.csv', encoding='utf-8', index=False)