In [13]:
import os
from typing import List

def getFiles(path: str) -> List[str]:
    
    filelist = []
    for root, dirs, files in os.walk(path):
        for file in files:
            filelist.append(os.path.join(root, file))

    return filelist

In [14]:
import hashlib

def compute_md5(file_name: str) -> str:
    hash_md5 = hashlib.md5()
    with open(file_name, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

In [16]:
from collections import defaultdict

def groupFiles(path: str) -> List[List[str]]:

    contentDict = defaultdict(list)
    for name in getFiles(path):
        key = compute_md5(name)
        contentDict[key].append(name)

    return contentDict.values()

In [18]:
import filecmp

file1 = "C:/Users/arthu/CloudStation/Jobs/Interviews/Amazon/SDE Prep/Amazon SDE Interview Prep 2021.pdf"
file2 = "C:/Users/arthu/CloudStation/Jobs/Interviews/Amazon/Amazon SDE Interview Prep.pdf"

check = filecmp.cmp(file1, file2)

In [20]:
print(check)

False


In [8]:
import enum

class Typ(enum.Enum):
    txt = 1
    csv = 2
    xml = 3
    json = 4

In [9]:
from dataclasses import dataclass

@dataclass
class File:
    name: str
    size: int
    typ: Typ
    isFile: bool

In [4]:
from abc import ABC, abstractmethod

class Filter(ABC):

    @abstractmethod
    def apply(self, file: File):
        pass

In [15]:
class TypeFilter(Filter):
    def __init__(self, typ: Typ) -> None:
        self.typ = typ

    def apply(self, file: File) -> bool:
        return file.typ == self.typ

In [14]:
class MinSizeFilter(Filter):
    def __init__(self, minSize: int) -> None:
        self.minSize = minSize

    def apply(self, file: File) -> bool:
        return file.size > self.minSize

In [13]:
class NameFilter(Filter):
    def __init__(self, name: str) -> None:
        self.name = name

    def apply(self, file: File) -> bool:
        return self.name in file.name

In [18]:
import os
from typing import List

class FindCommand:

    @staticmethod
    def getFiles(path: str) -> List[str]:
        
        fileList = []
        for root, dirs, files in os.walk(path):
            for file in files:
                fileList.append(os.path.join(root, file))

        return fileList

    @staticmethod
    def findWithFilters(path: str, filters: List[Filter]) -> List[File]:
        
        output = []
        files = getFiles(path)

        for file in files:
            selectFile = True
            for filter in filters:
                if not filter.apply(file):
                    selectFile = False
            if selectFile:
                output.append(file)

        return output

In [21]:
def findWithFilters(path: str, filters: List[Filter]) -> List[File]:

    files = []

    def getFiles(path: str) -> None:
        for directory in path.getDirectory():
            if directory.isFile():
                files.append(directory)
            elif directory.isFolder():
                getFiles(directory)
    
    output = []

    for file in getFiles(path):
        selectFile = True
        for filter in filters:
            if not filter.apply(file):
                selectFile = False
        if selectFile:
            output.append(file)

    return output