### 从一个文件夹中选择百分比的文件作为子集

In [87]:
import numpy as np
import os
import glob
from shutil import copyfile

In [27]:


def get_percent_items(items: list, percentage:float, seed=42):
    """
        从一个列表中随机选取前百分比的条目
    """
    random_state = np.random.RandomState(seed)

    # randomly permuted ids
    random_items = random_state.permutation(items)

    sub_counts = int(percentage * len(random_items))
    sub_items = random_items[:sub_counts]
    sub_items.sort()
    return sub_items.tolist()

In [36]:
def get_sub_dirs(root_dir: str):
    """
    获取目录下所有的子目录
    """
    return [os.path.join(root_dir, name) for name in os.listdir(root_dir)
            if os.path.isdir(os.path.join(root_dir, name))]
    

In [23]:
def get_all_image_files(directory: str, extension: str="png"):
    pattern = os.path.join(directory, f"*{extension}")
    imagefiles = glob.glob(pattern)
    
    return imagefiles

In [29]:

files = get_all_image_files(r"F:\workspace\empty_scan_test\testSet5.0_lit\03_04L01")
print(type(files), len(files), files)
sub_files = get_percent_items(files, 0.4)
print(type(sub_files), sub_files)

<class 'list'> 8 ['F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0001.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0002.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0003.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0010.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0011.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0012.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0013.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0014.png']
<class 'list'> ['F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0001.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0002.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit\\03_04L01\\frm-0012.png']


In [None]:
def save_file_list_to_new_directory(items: list, new_dir: int=0):
    """
    按顺序保存图片
    """
    for ifile in items:
        with open(ifile, 'wb') as fp:
            fp.write
    pass

In [106]:
import functools
import pathlib
def get_sub_set_of_image(root_dir: str, percentage:float, out_dir: str):
    """
    获取一个图片数据集的子集, 从每个最小文件夹中分别获取固定比例
    """
    print("out:", out_dir)
    sub_dirs = get_sub_dirs(root_dir)
    for folder in sub_dirs:
        files = get_all_image_files(folder)
        sub_files = get_percent_items(files, percentage)
        sub_files_part = [subfile.split(rootstr)[-1] for subfile in sub_files]
        sub_files_correct = map(lambda f: f[1:] if f.startswith(os.sep) else f, sub_files_part)
        new_files = [os.path.join(out_dir, subfile) for subfile in sub_files_correct]
        for src, dst in zip(sub_files, new_files):
            dst_dir = os.path.dirname(dst)
            print("dst_dir", dst_dir)
            if not os.path.exists(dst_dir):
                os.makedirs(dst_dir, exist_ok=True)
                #dst_dir = pathlib.Path(dst_dir)
                #pathlib.Path.mkdir(dst_dir, exist_ok=True)
            copyfile(src, dst) 

        #new_files = map(lambda root, nroot, f: os.path.join(nroot, f.split(root)[-1]), sub_files)
        print("newfile:", new_files)

In [107]:
rootstr=r"F:\workspace\empty_scan_test\testSet5.0_lit"
new_rootstr=r"F:\workspace\empty_scan_test\testSet5.0_lit2"
fullstr=r"F:\workspace\empty_scan_test\testSet5.0_lit\03_04L01\1233.png"
fullstr.split(rootstr)
    

['', '\\03_04L01\\1233.png']

In [108]:
get_sub_set_of_image(rootstr, 0.3, new_rootstr)

out: F:\workspace\empty_scan_test\testSet5.0_lit2
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\03_04L01
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\03_04L01
newfile: ['F:\\workspace\\empty_scan_test\\testSet5.0_lit2\\03_04L01\\frm-0002.png', 'F:\\workspace\\empty_scan_test\\testSet5.0_lit2\\03_04L01\\frm-0012.png']
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.52026.48447242.6617.7.3.1.dcm_frms
dst_dir F:\workspace\empty_scan_test\testSet5.0_lit2\1.3.6.1.4.1.5

In [49]:
import functools

In [51]:
func = map(lambda root, nroot, f: os.path.join(nroot, f.split(root)[-1]), files)

In [73]:
path0=r"\\03_04L01\\1233.png'"
path0.startswith(os.sep)

True

In [96]:
import pathlib
help(pathlib.Path.mkdir)

Help on function mkdir in module pathlib:

mkdir(self, mode=511, parents=False, exist_ok=False)
    Create a new directory at this given path.



In [71]:
dir(os.path)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_abspath_fallback',
 '_get_bothseps',
 '_getfinalpathname',
 '_getfullpathname',
 '_getvolumepathname',
 'abspath',
 'altsep',
 'basename',
 'commonpath',
 'commonprefix',
 'curdir',
 'defpath',
 'devnull',
 'dirname',
 'exists',
 'expanduser',
 'expandvars',
 'extsep',
 'genericpath',
 'getatime',
 'getctime',
 'getmtime',
 'getsize',
 'isabs',
 'isdir',
 'isfile',
 'islink',
 'ismount',
 'join',
 'lexists',
 'normcase',
 'normpath',
 'os',
 'pardir',
 'pathsep',
 'realpath',
 'relpath',
 'samefile',
 'sameopenfile',
 'samestat',
 'sep',
 'split',
 'splitdrive',
 'splitext',
 'splitunc',
 'stat',
 'supports_unicode_filenames',
 'sys']