# pathlib
* The pathlib module was introduced in Python 3.4
* 比string 类型path 能提供更灵活功能
  * cheat sheet: https://github.com/chris1610/pbpython/blob/master/extras/Pathlib-Cheatsheet.pdf
<img decoding="async" src="./pathlib_cheat.png" width="100%">


## pathlib  创建文件夹，如果不存在


In [25]:
from pathlib import Path
import pathlib

from sympy import print_rcode

save_dir = "./test_dir"
p = Path(save_dir)
p.mkdir(parents=True, exist_ok=True)
print(str(p.absolute().as_posix()))

# 判断是否是绝对路径
print(p.is_absolute(), p.absolute().is_absolute())

# parent

for par in p.absolute().parents:
    print(par)
    
print('parts', p.absolute().parts, type(p.absolute().parts[-1]))

parts = p.absolute().parts

print('joined:', p.joinpath(* parts[-2:]))

d:/code/learn/pylearn/pybasic/test_dir
False True
d:\code\learn\pylearn\pybasic
d:\code\learn\pylearn
d:\code\learn
d:\code
d:\
parts ('d:\\', 'code', 'learn', 'pylearn', 'pybasic', 'test_dir') <class 'str'>
joined: test_dir\pybasic\test_dir


In [3]:
### get current directory

print(Path.cwd())
print(Path.home())

print(pathlib.Path.home().joinpath('python', 'scripts', 'test.py'))

print(pathlib.Path.home().joinpath('python', 'scripts', 'test.py').absolute)

print(pathlib.Path.home().joinpath('python', 'scripts', 'test.py').suffix)

d:\code\learn\pylearn\pybasic
C:\Users\73915
C:\Users\73915\python\scripts\test.py
<bound method Path.absolute of WindowsPath('C:/Users/73915/python/scripts/test.py')>
.py


## useful functions
* .read_text(): open the path in text mode and return the contents as a string.
* .read_bytes(): open the path in binary/bytes mode and return the contents as a bytestring.
* .write_text(): open the path and write string data to it.
* .write_bytes(): open the path in binary/bytes mode and write data to it.
* .resolve() method will find the full path. 

In [15]:
# Reading and Writing Files

path = pathlib.Path.cwd() / 'test.txt'
with open(path, mode='r') as fid:
    headers = [line.strip() for line in fid if line.startswith('#')]
print('\n'.join(headers))

print('full text', path.read_text())

print(path.resolve().parent == pathlib.Path.cwd())

## title
full text this is a test file.
## title
True


* .name: the file name without any directory
* .parent: the directory containing the file, or the parent directory if path is a directory
* .stem: the file name without the suffix
* .suffix: the file extension
* .anchor: the part of the path before the directories

In [18]:
print('path', path)
print('stem', path.stem)
print('suffix', path.suffix)
print('parent', path.parent)
print('parent of parent', path.parent.parent)
print('anchor', path.anchor)

path /mnt/c/code/learn/pylearn/pybasic/test.txt
stem test
suffix .txt
parent /mnt/c/code/learn/pylearn/pybasic
parent of parent /mnt/c/code/learn/pylearn
anchor /


In [26]:
# move or replace file

path.with_suffix('.py')
path.replace(path.with_suffix('.md')) # 改后缀
path.with_suffix('.md').replace(path.with_suffix('.txt'))

In [5]:
# Display a Directory Tree

def tree(directory):
    print(f'+ {directory}')
    for path in sorted(directory.rglob('*')):
        depth = len(path.relative_to(directory).parts)
        spacer = '    ' * depth
        print(f'{spacer}+ {path.name}')

tree(pathlib.Path.cwd())

+ /mnt/d/code/learn/pylearn/pybasic
    + .ipynb_checkpoints
        + profile-checkpoint.ipynb
        + yaml config-checkpoint.ipynb
        + 基本操作实例-checkpoint.ipynb
    + profile.ipynb
    + profile_test.out
    + profile_test.py
    + test.txt
    + test_dir
    + yaml config.ipynb
    + 基本操作实例.ipynb


## Find the Last Modified File


In [None]:
from datetime import datetime

directory = pathlib.Path.cwd()

time, file_path = max((f.stat().st_mtime, f) for f in directory.iterdir())
print(datetime.fromtimestamp(time), file_path)
directory = pathlib.Path.home()

file_list = list(directory.glob('*.*'))
print(file_list)

## dir modifier time and iter sub-dirs

In [18]:
from pathlib import Path
from datetime import datetime

dir_path = Path(r'D:/wps/剪印/auto_generate/girlsteam/3a069423751f43fb6b8fcaabe3e3d32f-[AIGC GIRLS]The 027 Girls')

type(datetime.fromtimestamp(dir_path.stat().st_mtime)) # 修改时间

c_time = datetime.fromtimestamp(dir_path.stat().st_ctime)  # 创建时间

print(f"修改时间：{datetime.fromtimestamp(dir_path.stat().st_mtime)}， 创建时间：{c_time}")

修改时间：2023-11-13 08:55:17， 创建时间：2023-11-12 22:51:22.661104


In [None]:
from pathlib import Path
from datetime import datetime, timedelta

dir_path = Path(r"D:\wps\剪印\auto_generate\girlsteam/")

sub_dirs = [(f, datetime.fromtimestamp(f.stat().st_mtime)) for f in dir_path.iterdir() if f.is_dir() and 'AIGC' in f.stem]

start_date = datetime(2023, 11, 13, 0, 0, 0)
end_date = datetime(2023, 11, 14, 23, 59, 59)
# end_data = start_date + timedelta(hours=1)
end_date = datetime(2023, 11, 14, 3, 0, 0)

# sub_dirs_filtered = filter(lambda x: start_date <= x[1] <= end_date, sub_dirs)

sub_dirs_filtered = filter(lambda x: x[1] - start_date <= timedelta(days=2), sub_dirs)
print(sorted(sub_dirs_filtered, key=lambda x: x[1], reverse=False))

# print(list(sub_dirs_filtered))


## Create a Unique File Name

In [41]:


def unique_path(directory, name_pattern):
    counter = 0
    while True:
        counter += 1
        path = directory / name_pattern.format(counter)
        if not path.exists():
            return path

path = unique_path(pathlib.Path.cwd(), 'test{:03d}.txt')
print(path)

/mnt/c/code/learn/pylearn/pybasic/test001.txt


## dir exist and then glob with multiple extensions

In [15]:
input_path = Path("/mnt/d/code/image/hedian-demo/data/test/220425")
file_list = []
if input_path.exists():
    if input_path.is_dir():
        # for a in input_path.glob("*"):
        #     print(a)
        file_list = [p.resolve() for p in input_path.glob("*") if
                 p.suffix in {".png", ".jpg", ".JPG", ".PNG"}]
        print(len(file_list), file_list)
    else:
        print(input_path)
# PosixPath as str: str(p.resolve())

13 [PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/10_20220423184855509278_seq1387.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/12_20220423184937333633_seq1675.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/12_20220423184937729933_seq1689.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/15_20220423185025918203_seq2188.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/18_20220423185112230272_seq2561.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/1_20220423184640035090_seq0.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/1_20220423184640073726_seq1_rtmp.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/2_20220423184657550659_seq260_rtmp.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/2_20220423184701886412_seq412.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/6_20220423184800381243_seq831.png'), PosixPath('/mnt/d/code/image/h

## List all subdirectories

In [None]:
from pathlib import Path
 
rootdir = 'path/to/dir'
for path in Path(rootdir).iterdir():
    if path.is_dir():
        print(path)

# shutil
* sh utils

In [None]:
# move all .txt file to achive fold

import glob
import os
import shutil

for file_name in glob.glob('*.txt'): # return a list of 
    new_path = os.path.join('archive', file_name)
    shutil.move(file_name, new_path)

# collections Counter

In [29]:
# counting files

import collections

print(collections.Counter(p.suffix for p in pathlib.Path.cwd().iterdir()))

print('漂亮', collections.Counter(p.suffix for p in pathlib.Path.cwd().glob('*.t*')))

Counter({'': 2, '.txt': 1, '.ipynb': 1})
漂亮 Counter({'.txt': 1})


# inspect


In [1]:
def iam_a_function():
    pass

import inspect 
inspect.isfunction(iam_a_function)

""" useful code snippet

if inspect.isfunction(path_or_read_func):
    assert lazy is not None, "lazy can not be None in custom mode."
    kwargs['name'] = name
    kwargs['data_files'] = data_files
    kwargs['splits'] = splits
    custom_kwargs = {}
    for name in inspect.signature(path_or_read_func).parameters.keys():
        if name in kwargs.keys():
            custom_kwargs[name] = kwargs[name]

    reader_instance = SimpleBuilder(lazy=lazy, read_func=path_or_read_func)
    return reader_instance.read(**custom_kwargs)
"""


True

# glob

In [7]:
import more_itertools as mit


iterable = list(range(1, 26))
for c in mit.divide(6, iterable):
    print(c)

[1, 2, 3, 4, 5]
[6, 7, 8, 9]
[10, 11, 12, 13]
[14, 15, 16, 17]
[18, 19, 20, 21]
[22, 23, 24, 25]
