In [15]:
import os
import pandas as pd
from pprint import pprint
from pathlib import Path
from datetime import datetime

def user_csv_choice():
    '''Prompts user for csv file and checks that the user string corresponds to a file in current directory'''
    aPrompt = "Enter csv filename including its extension." + os.linesep + "(The file must be in same directory as this script.)"
    userStr = input(aPrompt)
    try:
        os.path.isfile(os.path.join(os.getcwd(), userStr))
    except:
        print("error occured with that filename. Try again.")
        user_csv_choice()

    return userStr

def user_chooses_yes(promptText):
    '''asks yes or no question to user and returns 'True' for a yes answer and 'False' for a no answer'''
    yesNo = ['yes', 'y', 'Yes', 'Y', 'No', 'no', 'n', 'N']
    response = ''
    while response not in yesNo:
        response = input(promptText)
    if response in yesNo[4:]:
        return False
    else:
        return True

def remove_chars_from_str(someStr, chars):
    '''Removes every character in string, chars, from other string, someStr. Returns string with removed characters'''
    return someStr.translate({ord(i): None for i in chars})

def splitall(path):
    '''splits a path into each piece that corresponds to a mount point, directory name, or file'''
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts


def build_file_dataframe(chosenDir, DF=None, ignoreThumbs=True):
    def timestamp_to_date(timestamp):
        DT = datetime.fromtimestamp(timestamp)
        return DT.strftime("%m/%d/%Y, %H:%M:%S")

    def file_data_to_list(root, file):
        filePath = os.path.join(root, file)
        extension = file.split('.')[-1]
        extension = extension.lower()
        name = '.'.join(file.split('.')[:-1])
        now = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")

        try:
            fileStats = os.stat(filePath)
            fileSize = str(fileStats.st_size)
            fileSize.zfill(15)
            fileCreationTime = timestamp_to_date(fileStats.st_ctime)
            fileModifiedTime = timestamp_to_date(fileStats.st_mtime)
            error = None
            retrieved = now
        except:
            fileSize = "123456789"
            fileSize.zfill(15)
            fileCreationTime = now
            fileModifiedTime = now
            error = "error getting file metadata"
            retrieved = now
        return [filePath, file, name, extension, fileSize, fileCreationTime, fileModifiedTime, retrieved, error]


    fileList = []
    for root, dirs, files in os.walk(chosenDir):
        if files:
            for file in files:
                if ignoreThumbs:
                    if file.split('.')[0] != "Thumbs":
                        fileList.append(file_data_to_list(root,file))
                else:
                    fileList.append(file_data_to_list(root, file))

    fileDF = pd.DataFrame(fileList,
                          columns=["Filepath", "File", "Name", "Extension", "Filesize", "Created", "Modified", "Retrieved", "Error"])
    if DF != None:
        fileDF = DF.append(fileDF)
        fileDF.drop_duplicates(subset="Filepath", keep='first', inplace=True)
    return fileDF

def file_df_discrepancies(targetDF, currentDF):
    '''targetDF is dataframe of backup which shou;ld have no different files from most current server as represented in currentDF'''
    def filepath_is_similar(filepath1, filepath2):
        similar = False
        path1List = splitall(filepath1)[1:-1]
        path2List = splitall(filepath2)[1:-1]
        if len(path1List) == len(path2List):
            try:
                if path2List[-1] == path1List[-1] and path2List[-2] == path1List[-2]:
                    similar = True
            except:
                pass
        return similar

    for targetIndex, targetRow in targetDF.iterrows():
        currentSameType = currentDF[currentDF["Extension"] == targetRow['Extension']]


        if targetRow["Name"] in currentSameType["Name"].values:

            currentSameName = currentSameType[currentSameType["Name"] == targetRow["Name"]]
            for currentIndex, currentRow in currentSameName.iterrows():
                # check if filesize is within 550bits or they share same last two directories in directory path
                if (abs(float(currentRow["Filesize"]) - float(targetRow["Filesize"])) < 550) or (filepath_is_similar(
                    targetRow["Filepath"], currentRow["Filepath"])):
                    currentDF.drop(currentIndex, inplace=True)
                    targetDF.drop(targetIndex, inplace=True)
                    break
                else:
                    pass
    return targetDF

def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]


def convert_backup_to_main(backUpPath, mainPath):
    mainSubDirs = get_immediate_subdirectories(mainPath)
    backUpPathList = splitall(backUpPath)
    shareStartIndex = None
    backUpMainEquivalent = None
    mainMount = splitall(mainPath)[0]
    converted = []
    for pathDir in backUpPathList:
        #if directory corresponds with one of the main subdirectories of the project server. (Those that correspond with buildings)
        if remove_chars_from_str(pathDir, ' .,') in [remove_chars_from_str(i, ' .,') for i in mainSubDirs]:
            shareStartIndex =  backUpPath.index(pathDir)
            backUpMainEquivalent = pathDir
            break

    for mainSubDir in mainSubDirs:
        if len(backUpMainEquivalent) <= 4 and backUpMainEquivalent == mainSubDir:
            converted = [mainMount, mainSubDir] + backUpPathList[shareStartIndex-1:]
        if len(backUpMainEquivalent) > 4 and mainSubDir.startswith(backUpMainEquivalent[:3]):
            converted = [mainMount, mainSubDir] + backUpPathList[shareStartIndex-1:]

    if not shareStartIndex or not converted:
        print("error converting backup path to records server path")
    return os.path.join(*converted)

In [16]:
testDir = r"F:\29xx   University House"
#extra space in "File 2901-005.F5 Plans & Gen  Notes"
testDirCopy = r"F:\29xx   University House - Copy"

k = splitall(testDir)
print(k[0])

F:\


In [22]:
def convert_backup_to_main(backUpPath, mainPath):
    mainMount = splitall(mainPath)[0]
    mainPath = os.path.normpath(mainMount)
    mainSubDirs = get_immediate_subdirectories(mainPath)
    backUpPathList = splitall(backUpPath)
    shareStartIndex = None
    backUpMainEquivalent = None
    converted = []
    for pathDir in backUpPathList:
        #if directory corresponds with one of the main subdirectories of the project server. (Those that correspond with buildings)
        if remove_chars_from_str(pathDir, ' .,') in [remove_chars_from_str(i, ' .,') for i in mainSubDirs]:
            shareStartIndex =  backUpPath.index(pathDir)
            backUpMainEquivalent = pathDir
            break

    for mainSubDir in mainSubDirs:
        if len(backUpMainEquivalent) <= 4 and backUpMainEquivalent == mainSubDir:
            converted = [mainMount, mainSubDir] + backUpPathList[shareStartIndex-1:]
        if len(backUpMainEquivalent) > 4 and mainSubDir.startswith(backUpMainEquivalent[:3]):
            converted = [mainMount, mainSubDir] + backUpPathList[shareStartIndex-1:]

    if not shareStartIndex or not converted:
        print("error converting backup path to records server path")
    return os.path.join(*converted)


man = "R:\\"
bu = r"B:\23xx   Crown College (College 3)\2307\C1.3- Agreement\Richmond, T.R"
pth = convert_backup_to_main(bu, man)
pth


'R:\\23xx   Crown College (College 3)\\2307\\C1.3- Agreement\\Richmond, T.R'

In [13]:
mainDirectoryMountLetter = 'R'
mainDirectoryMountPoint = mainDirectoryMountLetter + r":\\"
mainSubDirs = get_immediate_subdirectories(mainDirectoryMountPoint)
jj = r"R:\84xx   Arboretum\8499\8499-001\DOCS\E6 - Reports"
jjList =  splitall(jj)
r = os.path.join(*jjList)
jjList[2:]

['8499', '8499-001', 'DOCS', 'E6 - Reports']

In [23]:
 help(os.walk)

Help on function walk in module os:

walk(top, topdown=True, onerror=None, followlinks=False)
    Directory tree generator.
    
    For each directory in the directory tree rooted at top (including top
    itself, but excluding '.' and '..'), yields a 3-tuple
    
        dirpath, dirnames, filenames
    
    dirpath is a string, the path to the directory.  dirnames is a list of
    the names of the subdirectories in dirpath (excluding '.' and '..').
    filenames is a list of the names of the non-directory files in dirpath.
    Note that the names in the lists are just names, with no path components.
    To get a full path (which begins with top) to a file or directory in
    dirpath, do os.path.join(dirpath, name).
    
    If optional arg 'topdown' is true or not specified, the triple for a
    directory is generated before the triples for any of its subdirectories
    (directories are generated top down).  If topdown is false, the triple
    for a directory is generated after the 

In [32]:
def remove_chars_from_str(toRemove, someChars):
    '''Removes every character in string, someChars, from other string, someStr. Returns string with removed characters'''
    if type(toRemove) == list:
        return [i.translate({ord(i): None for i in someChars}) for i in toRemove]
    if type(toRemove) == str:
        return toRemove.translate({ord(i): None for i in someChars})
    else:
        print("Error: Wrong Type")
        
remove_chars_from_str(["beep boop", ".bbb,"], ' ,.')

['beepboop', 'bbb']