# 文件系统相关操作

## pathlib
* The pathlib module was introduced in Python 3.4
* 比string 类型path 能提供更灵活功能
* cheat sheet: https://github.com/chris1610/pbpython/blob/master/extras/Pathlib-Cheatsheet.pdf


## shutil
* 文件夹操作


### pathlib  创建文件夹，如果不存在


In [4]:
from pathlib import Path
import pathlib

save_dir = "./test_dir"
Path(save_dir).mkdir(parents=True, exist_ok=True)


In [11]:
### get current directory

print(Path.cwd())
print(Path.home())

print(pathlib.Path.home().joinpath('python', 'scripts', 'test.py'))

/mnt/c/code/learn/pylearn/pybasic
/home/jeffye
/home/jeffye/python/scripts/test.py


# useful functions
* .read_text(): open the path in text mode and return the contents as a string.
* .read_bytes(): open the path in binary/bytes mode and return the contents as a bytestring.
* .write_text(): open the path and write string data to it.
* .write_bytes(): open the path in binary/bytes mode and write data to it.
* .resolve() method will find the full path. 

In [15]:
# Reading and Writing Files

path = pathlib.Path.cwd() / 'test.txt'
with open(path, mode='r') as fid:
    headers = [line.strip() for line in fid if line.startswith('#')]
print('\n'.join(headers))

print('full text', path.read_text())

print(path.resolve().parent == pathlib.Path.cwd())

## title
full text this is a test file.
## title
True


* .name: the file name without any directory
* .parent: the directory containing the file, or the parent directory if path is a directory
* .stem: the file name without the suffix
* .suffix: the file extension
* .anchor: the part of the path before the directories

In [18]:
print('path', path)
print('stem', path.stem)
print('suffix', path.suffix)
print('parent', path.parent)
print('parent of parent', path.parent.parent)
print('anchor', path.anchor)

path /mnt/c/code/learn/pylearn/pybasic/test.txt
stem test
suffix .txt
parent /mnt/c/code/learn/pylearn/pybasic
parent of parent /mnt/c/code/learn/pylearn
anchor /


In [26]:
# move or replace file

path.with_suffix('.py')
path.replace(path.with_suffix('.md')) # 改后缀
path.with_suffix('.md').replace(path.with_suffix('.txt'))

In [5]:
# Display a Directory Tree

def tree(directory):
    print(f'+ {directory}')
    for path in sorted(directory.rglob('*')):
        depth = len(path.relative_to(directory).parts)
        spacer = '    ' * depth
        print(f'{spacer}+ {path.name}')

tree(pathlib.Path.cwd())

+ /mnt/d/code/learn/pylearn/pybasic
    + .ipynb_checkpoints
        + profile-checkpoint.ipynb
        + yaml config-checkpoint.ipynb
        + 基本操作实例-checkpoint.ipynb
    + profile.ipynb
    + profile_test.out
    + profile_test.py
    + test.txt
    + test_dir
    + yaml config.ipynb
    + 基本操作实例.ipynb


## Find the Last Modified File


In [38]:
from datetime import datetime

directory = pathlib.Path.cwd()

time, file_path = max((f.stat().st_mtime, f) for f in directory.iterdir())
print(datetime.fromtimestamp(time), file_path)


directory = pathlib.Path.home()

file_list = list(directory.glob('*.*'))
print(file_list)

2021-10-10 00:16:20.399517 /mnt/c/code/learn/pylearn/pybasic/基本操作实例.ipynb
[PosixPath('/home/jeffye/.astropy'), PosixPath('/home/jeffye/.bashrc'), PosixPath('/home/jeffye/.bash_aliases'), PosixPath('/home/jeffye/.bash_history'), PosixPath('/home/jeffye/.bash_logout'), PosixPath('/home/jeffye/.byobu'), PosixPath('/home/jeffye/.cache'), PosixPath('/home/jeffye/.cheat'), PosixPath('/home/jeffye/.condarc'), PosixPath('/home/jeffye/.config'), PosixPath('/home/jeffye/.inputrc'), PosixPath('/home/jeffye/.ipynb_checkpoints'), PosixPath('/home/jeffye/.ipython'), PosixPath('/home/jeffye/.jupyter'), PosixPath('/home/jeffye/.landscape'), PosixPath('/home/jeffye/.local'), PosixPath('/home/jeffye/.motd_shown'), PosixPath('/home/jeffye/.pip'), PosixPath('/home/jeffye/.profile'), PosixPath('/home/jeffye/.python_history'), PosixPath('/home/jeffye/.ssh'), PosixPath('/home/jeffye/.sudo_as_admin_successful'), PosixPath('/home/jeffye/.viminfo'), PosixPath('/home/jeffye/.wget-hsts'), PosixPath('/home/jeffye/

## Create a Unique File Name

In [41]:


def unique_path(directory, name_pattern):
    counter = 0
    while True:
        counter += 1
        path = directory / name_pattern.format(counter)
        if not path.exists():
            return path

path = unique_path(pathlib.Path.cwd(), 'test{:03d}.txt')
print(path)

/mnt/c/code/learn/pylearn/pybasic/test001.txt


## dir exist and then glob with multiple extensions

In [15]:
input_path = Path("/mnt/d/code/image/hedian-demo/data/test/220425")
file_list = []
if input_path.exists():
    if input_path.is_dir():
        # for a in input_path.glob("*"):
        #     print(a)
        file_list = [p.resolve() for p in input_path.glob("*") if
                 p.suffix in {".png", ".jpg", ".JPG", ".PNG"}]
        print(len(file_list), file_list)
    else:
        print(p)
# PosixPath as str: str(p.resolve())

13 [PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/10_20220423184855509278_seq1387.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/12_20220423184937333633_seq1675.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/12_20220423184937729933_seq1689.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/15_20220423185025918203_seq2188.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/18_20220423185112230272_seq2561.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/1_20220423184640035090_seq0.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/1_20220423184640073726_seq1_rtmp.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/2_20220423184657550659_seq260_rtmp.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/2_20220423184701886412_seq412.png'), PosixPath('/mnt/d/code/image/hedian-demo/data/test/220425/6_20220423184800381243_seq831.png'), PosixPath('/mnt/d/code/image/h

# shutil

In [None]:
# move all .txt file to achive fold

import glob
import os
import shutil

for file_name in glob.glob('*.txt'): # return a list of 
    new_path = os.path.join('archive', file_name)
    shutil.move(file_name, new_path)

# collections Counter

In [29]:
# counting files

import collections

print(collections.Counter(p.suffix for p in pathlib.Path.cwd().iterdir()))

print('漂亮', collections.Counter(p.suffix for p in pathlib.Path.cwd().glob('*.t*')))

Counter({'': 2, '.txt': 1, '.ipynb': 1})
漂亮 Counter({'.txt': 1})
