- [How to list directory tree structure in python?](https://www.tutorialspoint.com/how-to-list-directory-tree-structure-in-python)

# Using `os.walk()` for Recursive Directory Traversal

In [3]:
import os

def list_directory_tree_with_os_walk(starting_directory):
    for root, directories, files in os.walk(starting_directory):
        print(f"Directory: {root}")
        for file in files:
            print(f"  File: {file}")

list_directory_tree_with_os_walk(".")

Directory: .
  File: ipywidgets-example.ipynb
  File: itchat.ipynb
  File: local-maven.ipynb
  File: scholarly.ipynb
  File: SlackDeveloperKitforPython.ipynb
  File: wget-download-files.ipynb
  File: win32.ipynb
Directory: .\Ethereum
  File: Ethereum-JSON.ipynb
  File: Ethereum-playground.ipynb


# Utilizing `pathlib.Path` for Directory Tree Traversal

- https://docs.python.org/3/library/pathlib.html

In [7]:
from pathlib import Path

def list_directory_tree_with_pathlib(starting_directory, glob="*"):
    path_object = Path(starting_directory)
    for file_path in path_object.rglob(glob):
        if file_path.is_file():
            print(f"File: {file_path}")
        elif file_path.is_dir():
            print(f"Directory: {file_path}")

list_directory_tree_with_pathlib(".")

Directory: Ethereum
File: ipywidgets-example.ipynb
File: itchat.ipynb
File: local-maven.ipynb
File: scholarly.ipynb
File: SlackDeveloperKitforPython.ipynb
File: wget-download-files.ipynb
File: win32.ipynb
File: Ethereum\Ethereum-JSON.ipynb
File: Ethereum\Ethereum-playground.ipynb


In [None]:
list_directory_tree_with_pathlib("C:/Users/zhouj/.m2/repository", "*.pom")

In [61]:
from pathlib import Path
import pandas as pd
from datetime import datetime

# verify with shell: find . | grep -E 'jar$' | wc -l
def list_artifact(starting_directory : str, glob="*") -> list[str]:
    poms = []
    path_object = Path(starting_directory)
    for file_path in path_object.rglob(glob):
        if file_path.is_file():
            stat = file_path.stat()
            p = file_path.as_posix()
            if p.find('-sources') != -1: # skip sources
                continue
            artifacts = p.removeprefix(starting_directory).split('/')
            # print(artifacts)
            poms.append({
                # 'folder': file_path.parent.as_posix(),
                # 'path': artifacts,
                'group': '.'.join(artifacts[:-3]),
                'artifact':  artifacts[-3],
                'version': artifacts[-2],
                'size': stat.st_size, # / 1024 / 1024, # MB
                'ctime': datetime.fromtimestamp(stat.st_ctime)
            })
    return poms

artifact_home = "C:/Users/zhouj/.m2/repository/"
artifacts = list_artifact(artifact_home, "*.jar")
s = pd.json_normalize(artifacts)
s.sort_values(by=['size', 'group', 'artifact'], ascending=False)
# s['path.size'].sum()

Unnamed: 0,group,artifact,version,size,ctime
2278,org.rocksdb,rocksdbjni,9.4.0,70111775,2024-08-07 11:15:18.763036
2098,org.jetbrains.kotlin,kotlin-compiler,1.9.22,61634292,2024-04-26 17:09:10.732087
2277,org.rocksdb,rocksdbjni,7.0.3,54269203,2024-04-10 10:55:52.766078
1832,org.bytedeco,opencv,4.9.0-1.5.10,32402985,2024-07-26 09:41:59.711084
1829,org.bytedeco,opencv,4.9.0-1.5.10,30232840,2024-07-26 09:43:46.465367
...,...,...,...,...,...
2126,org.jetbrains.kotlin,kotlin-stdlib-jdk7,1.9.22,961,2024-04-26 17:08:46.489465
726,io.projectreactor.netty,reactor-netty,1.0.10,365,2024-04-10 10:54:38.302140
2238,org.openjfx,javafx-graphics,17,261,2024-07-25 18:29:24.046959
2236,org.openjfx,javafx-base,17,261,2024-07-25 18:30:57.794965


In [62]:
s[s['group'] == 'org.rocksdb']

Unnamed: 0,group,artifact,version,size,ctime
2277,org.rocksdb,rocksdbjni,7.0.3,54269203,2024-04-10 10:55:52.766078
2278,org.rocksdb,rocksdbjni,9.4.0,70111775,2024-08-07 11:15:18.763036


# Displaying Indentation for Better Structure

In [5]:
import os

def list_directory_tree_with_indentation(directory, indent=0):
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path):
            print(f"{'  ' * indent}File: {item}")
        elif os.path.isdir(item_path):
            print(f"{'  ' * indent}Directory: {item}")
            list_directory_tree_with_indentation(item_path, indent+1)

list_directory_tree_with_indentation(".")

Directory: Ethereum
  File: Ethereum-JSON.ipynb
  File: Ethereum-playground.ipynb
File: ipywidgets-example.ipynb
File: itchat.ipynb
File: local-maven.ipynb
File: scholarly.ipynb
File: SlackDeveloperKitforPython.ipynb
File: wget-download-files.ipynb
File: win32.ipynb


# Excluding Certain Directories from the Listing

In [6]:
import os

def list_directory_tree_exclude_directories(directory, exclude_dirs=[]):
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path):
            print(f"File: {item}")
        elif os.path.isdir(item_path):
            if item not in exclude_dirs:
                print(f"Directory: {item}")
                list_directory_tree_exclude_directories(item_path, exclude_dirs)

list_directory_tree_exclude_directories(".")

Directory: Ethereum
File: Ethereum-JSON.ipynb
File: Ethereum-playground.ipynb
File: ipywidgets-example.ipynb
File: itchat.ipynb
File: local-maven.ipynb
File: scholarly.ipynb
File: SlackDeveloperKitforPython.ipynb
File: wget-download-files.ipynb
File: win32.ipynb


# Displaying File Sizes and Last Modified Dates

In [1]:
import os
from datetime import datetime

def list_directory_tree_with_file_info(directory):
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path):
            file_size = os.path.getsize(item_path)
            last_modified = datetime.fromtimestamp(os.path.getmtime(item_path))
            print(f"File: {item} - Size: {file_size} bytes - Last Modified: {last_modified}")
        elif os.path.isdir(item_path):
            print(f"Directory: {item}")
            list_directory_tree_with_file_info(item_path)

list_directory_tree_with_file_info(".")

Directory: Ethereum
File: Ethereum-JSON.ipynb - Size: 51675 bytes - Last Modified: 2024-08-01 21:38:36.656669
File: Ethereum-playground.ipynb - Size: 40346 bytes - Last Modified: 2024-08-01 21:38:36.657774
File: ipywidgets-example.ipynb - Size: 31761 bytes - Last Modified: 2024-08-01 21:38:36.658773
File: itchat.ipynb - Size: 3691 bytes - Last Modified: 2024-08-01 21:38:36.659777
File: local-maven.ipynb - Size: 0 bytes - Last Modified: 2024-08-07 12:56:07.192273
File: scholarly.ipynb - Size: 133429 bytes - Last Modified: 2024-08-01 21:38:36.660772
File: SlackDeveloperKitforPython.ipynb - Size: 61389 bytes - Last Modified: 2024-08-01 21:38:36.657774
File: wget-download-files.ipynb - Size: 4083 bytes - Last Modified: 2024-08-01 21:38:36.660772
File: win32.ipynb - Size: 3099 bytes - Last Modified: 2024-08-01 21:38:36.660772
