-
Notifications
You must be signed in to change notification settings - Fork 3
python manage files
ghdrako edited this page Jun 10, 2023
·
2 revisions
from pathlib import Path
data_folder = Path("data")
data_folder.mkdir()
assert data_folder.exists()
subject_ids = [123, 124, 125]
extensions = ["config", "dat", "txt"]
for subject_id in subject_ids:
for extension in extensions:
filename = f"subject_{subject_id}.{extension}"
filepath = data_folder / filename # Creates a file path
with open(filepath, "w") as file:
file.write(f"It's the file {filename}.")
When you create a filepath using directory_path / filename
, this operation is operating
system agnostic, meaning that the same code can run on either of these platforms.
This cross-platform compatibility is another advantage of using pathlib instead of the os module (in which you may have to use the raw strings as paths), which is platform dependent.
data_folder = Path("data")
data_files = data_folder.glob("*.dat") # Creates a generator object
print("Data files:", data_files)
for data_file in data_files:
print(f"Processing file: {data_file}")
# applicable data processing steps here
data_files = data_folder.glob("*.dat")
for data_file in sorted(data_files): # sort generator
print(f"Processing file: {data_file}")
# applicable data processing steps here
subject_ids = [123, 124, 125]
data_folder = Path("data")
for subject_id in subject_ids:
subject_folder = Path(f"subjects/subject_{subject_id}")
subject_folder.mkdir(parents=True, exist_ok=True) # Creates the subject folder
for subject_file in data_folder.glob(f"*{subject_id}*"):
filename = subject_file.name
target_path = subject_folder / filename # Constructs the target path
_ = subject_file.rename(target_path)
print(f"Moving {filename} to {target_path}")
import shutil
shutil.rmtree("subjects") # Removes a folder and its contents
subject_ids = [123, 124, 125]
data_folder = Path("data")
for subject_id in subject_ids:
subject_folder = Path(f"subjects/subject_{subject_id}")
subject_folder.mkdir(parents=True, exist_ok=True)
for subject_file in data_folder.glob(f"*{subject_id}*"):
filename = subject_file.name
target_path = subject_folder / filename
_ = shutil.copy(subject_file, target_path)
print(f"Copying {filename} to {target_path}")
we use the rmtree
function to remove a folder and its contents, as rmtree
doesn’t care about the directory’s emptiness. By contrast, we could run into a problem if we use Path.rmdir
to remove a directory that is not empty.
Path("subjects").rmdir()
# ERROR: OSError: [Errno 66] Directory not empty: 'subjects'
data_folder = Path("data")
for file in data_folder.glob("*.txt"):
before = file.exists()
file.unlink()
after = file.exists()
print(f"Deleting {file}, existing? {before} -> {after}")
from pathlib import Path
subjects_folder = Path("subjects")
for dat_path in subjects_folder.glob("**/*.dat"): # the pattern involves **/ , meaning that the files reside in subdirectories
subject_dir = dat_path.parent
filename = dat_path.stem
config_path = subject_dir / f"{filename}.config"
print(f"{subject_dir} & {filename} -> {config_path}")
dat_exists = dat_path.exists()
config_exists = config_path.exists()
with open(dat_path) as dat_file, open(config_path) as config_file:
print(f"Process {filename}: dat? {dat_exists}, config? {config_exists}\n")
# process the subject's data
def process_data_using_size_cutoff(min_size, max_size):
data_folder = Path("data")
for dat_path in data_folder.glob("*.dat"):
filename = dat_path.name
size = dat_path.stat().st_size
if min_size < size < max_size:
print(f"{filename}, Good; {size}, within [{min_size}, {max_size}]")
else:
print(f"{filename}, Bad; {size}, outside [{min_size}, {max_size}]")
import time
subject_dat_path = Path("data/subject_123.dat")
modified_time = subject_dat_path.stat().st_mtime
readable_time = time.ctime(modified_time)
print(f"Modification time: {modified_time} -> {readable_time}")
# output: Modification time: 1652123144.9999998 -> Mon May 9 14:05:44 2022
Test