In [45]:
import os
import pandas as pd
from pprint import pprint
from pathlib import Path
from datetime import datetime

def splitall(path):
    '''splits a path into each piece that corresponds to a mount point, directory name, or file'''
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts


def build_file_dataframe(chosenDir, DF=None, ignoreThumbs=True):
    def timestamp_to_date(timestamp):
        DT = datetime.fromtimestamp(timestamp)
        return DT.strftime("%m/%d/%Y, %H:%M:%S")

    fileList = []
    for root, dirs, files in os.walk(chosenDir):
        if files:
            for file in files:
                if ignoreThumbs:
                    if file.split('.')[0] != "Thumbs":
                        filePath = os.path.join(root, file)
                        extension = file.split('.')[-1]
                        extension.lower()
                        name = '.'.join(file.split('.')[:-1])
                        fileStats = os.stat(filePath)
                        fileSize = str(fileStats.st_size)
                        fileSize.zfill(15)
                        fileCreationTime = timestamp_to_date(fileStats.st_ctime)
                        fileModifiedTime = timestamp_to_date(fileStats.st_mtime)
                        fileList.append([filePath, file, name, extension, fileSize, fileCreationTime, fileModifiedTime])
                else:
                    filePath = os.path.join(root, file)
                    extension = file.split('.')[-1]
                    extension.lower()
                    name = '.'.join(file.split('.')[:-1])
                    fileStats = os.stat(filePath)
                    fileSize = str(fileStats.st_size)
                    fileCreationTime = timestamp_to_date(fileStats.st_ctime)
                    fileModifiedTime = timestamp_to_date(fileStats.st_mtime)
                    fileList.append([filePath, file, name, extension, fileSize, fileCreationTime, fileModifiedTime])
    fileDF = pd.DataFrame(fileList,
                          columns=["Filepath", "File", "Name", "Extension", "Filesize", "Created", "Modified"])
    if DF != None:
        fileDF = DF.append(fileDF)
        fileDF.drop_duplicates(subset="Filepath", keep='first', inplace=True)
    return fileDF


def file_discrepancies(targetDF, currentDF):
    '''targetDF is dataframe of backup which shou;ld have no different files from most current server as represented in currentDF'''

    for targetIndex, targetRow in targetDF.iterrows():
        currentSameType = currentDF[currentDF["Extension"] == targetRow['Extension']]
        for currentIndex, currentRow in currentSameType.iterrows():
            #compare filename and filesize to see if same file
            if currentRow["Name"] == targetRow["Name"] :
                if (int(currentRow["Filesize"]) - int(targetRow["Filesize"])) < 350:
                    currentDF.drop(currentIndex, inplace=True)
                    targetDF.drop(targetIndex, inplace=True)
                    break
    return targetDF

In [46]:
testDir = r"F:\29xx   University House"
#while not os.path.isdir(currentDir):
#    currentDir = input("Enter directory from where search will start:  ")
    
testDirCopy = r"F:\29xx   University House - Copy"

originalDF = build_file_dataframe(testDir)
copyDF  = build_file_dataframe(testDirCopy)

In [49]:
deltaDF = file_discrepancies(originalDF, copyDF)
revdeltaDF = file_discrepancies(copyDF, originalDF)

In [50]:
revdeltaDF

Unnamed: 0,Filepath,File,Name,Extension,Filesize,Created,Modified
1,F:\29xx University House - Copy\2901\2901-00...,notis Blank Stamp.pdf,notis Blank Stamp,pdf,58654,"10/22/2019, 16:52:16","07/10/2017, 10:22:16"
2,F:\29xx University House - Copy\2901\2901-00...,Extra review transmittal page.pdf,Extra review transmittal page,pdf,277384,"10/22/2019, 16:52:22","05/16/2019, 09:34:36"
