# File system interaction

In [1]:
from datetime import datetime
import fileinput
import os
import pathlib
from pathlib import Path
import re
import shutil
import stat
import tempfile

## Contents of directories & properties

Iterate over the contents of the current working directory, and for each file, print its name, size, and last access time.

In [3]:
for item in Path.cwd().iterdir():
    if item.is_file() and item.name.endswith('.ipynb'):
        lstat = item.lstat()
        access_time = datetime.strftime(datetime.fromtimestamp(lstat.st_atime),
                                        '%Y-%m-%d %H:%M:%S')
        print(f'{item.name}: {lstat.st_size} bytes, last accessed: {access_time}')

shell_interaction.ipynb: 13615 bytes, last accessed: 2019-12-03 07:18:08
system_information.ipynb: 8870 bytes, last accessed: 2019-12-03 06:59:53
compressed_files.ipynb: 7857 bytes, last accessed: 2019-11-22 06:48:58
julia.ipynb: 5311 bytes, last accessed: 2019-12-03 07:54:28
filesystem_interaction.ipynb: 18180 bytes, last accessed: 2019-12-03 07:57:53


## File permissions

Create a new directory, don't throw an error if it exists. Note that the mode must be specified in octal.

In [None]:
tmp_dir = Path.cwd() / 'tmp'
tmp_dir.mkdir(mode=0o700, exist_ok=True)

More specifically, testing whether the current user can access a file is also quite useful.

In [None]:
if pathlib.os.access(tmp_dir, pathlib.os.W_OK):
    print(f'current user can write in {tmp_dir.name}')

Set the access mode a bit more liberal and check the permissions.

In [None]:
tmp_dir.chmod(0o755)
print(oct(tmp_dir.lstat().st_mode))

Detailed tests on access permissions can be written using the bitmasks defined in the `stat` module.

In [None]:
mode = tmp_dir.lstat().st_mode

In [None]:
if mode & stat.S_IREAD and mode & stat.S_IWRITE and mode & stat.S_IEXEC:
    print(f'owner has full permissions on {tmp_dir.name}')

In [None]:
if not (mode & stat.S_IWGRP):
    print(f'group can not write in {tmp_dir.name}')

## Creating files & globbing

Creating some file in the `tmp` directory can be done by creating a `Path` object for each file, and opening it directly.

In [None]:
readme = tmp_dir / 'README.md'
with readme.open('w') as readme_file:
    print('# Contents', file=readme_file, end='\n\n')
    nr_files = 5
    for i in range(1, nr_files + 1):
        file = tmp_dir / f'data_{i:03d}.txt'
        with file.open('w') as data_file:
            print(f'data {i}', file=data_file)
        print(f'  * `{file.name}`: data file {i}', file=readme_file)

In [None]:
!cat tmp/README.md

In [None]:
for file in tmp_dir.glob('*.txt'):
    print(f'{file.name}: ', end='')
    with file.open('r') as data_file:
        data = data_file.readline().rstrip()
        print(data)

## Dissecting paths

Splitting a path into its various functional parts is of course OS dependent, pathlib lets you do this in a platform independent way.

### Files

In [None]:
readme.name

In [None]:
readme.suffix

In [None]:
readme.parent

In [None]:
readme.drive

### Directories

In [None]:
tmp_dir.name

In [None]:
tmp_dir.suffix

In [None]:
tmp_dir.parent

## Copying, moving, deleting

Some operations can be done using `pathlib` such as moving or deleting files.

In [None]:
!ls tmp

In [None]:
readme.unlink()

In [None]:
!ls tmp

Renaming files is straightforward.

In [None]:
for file in tmp_dir.glob('*.txt'):
    new_file = file.parent / file.name.replace('.txt', '.dat')
    file.rename(new_file)

In [None]:
!ls tmp

`pathlib` has no facilities for copying files, however `shutil` does.

In [None]:
for file in tmp_dir.glob('*.dat'):
    new_file = file.parent / file.name.replace('.dat', '.bak')
    shutil.copy(file, new_file)

In [None]:
!ls tmp

Move the `.bak` files into a subdirectory `bak`.

In [None]:
bak_dir = tmp_dir / 'bak'
bak_dir.mkdir()
for file in tmp_dir.glob('*.bak'):
    file.rename(bak_dir / file.name)

Print the sizes of all the files in `tmp`, and also compute the total size.

In [None]:
total_size = 0
for dir_name, _, files in os.walk(tmp_dir, topdown=False):
    dir_size = 0
    print(f'{dir_name}:')
    for file in map(lambda f: Path(dir_name) / f, files):
        file_size = file.lstat().st_size
        dir_size += file_size
        print(f'\t{file.name}: {file_size} bytes')
    print(f'\tTotal: {dir_size} bytes')
    total_size += dir_size
print(f'Total: {total_size} bytes')

`Path`'s `rmdir` method can be used to remove an empty directory, however, if the directory is not empty, the `rmtree` function in `shutil` can be used.

In [None]:
shutil.rmtree(tmp_dir)

## Temporary files and directories, `inputfile`

Especially when running application concurrently, it is important to ensure that names for temporary files are unique.  The module `tempfile` implements this. The temporary directory is created in `/tmp` and will be deleted with all its contents as soon as the end of the context is reached.  Files are created in that directory with random names, but will not be deleted. The default mode for temporary files is `w+b`, in this case we choose only `w` since the files are permanent, and we open them for reading later.

In [None]:
nr_files = 3
nr_lines = 4
with tempfile.TemporaryDirectory(prefix='data_', dir='/tmp') as tmp_dir:
    print(f'director {tmp_dir} exists: {Path(tmp_dir).exists()}')
    # write the data files, use random names
    for file_nr in range(nr_files):
        with tempfile.NamedTemporaryFile(mode='w', prefix='data_', suffix='.txt',
                                         dir=tmp_dir, delete=False) as tmp_file:
            for i in range(nr_lines):
                print(f'line {i + 1} of {file_nr + 1}', file=tmp_file)
    # for each data file, print the name, and, indented, all the lines
    for tmp_file in Path(tmp_dir).iterdir():
        print(tmp_file.name)
        with tmp_file.open('r') as file:
            for line in file:
                print(f'\t{line.rstrip()}')
    # treat all the data files as one input, and compute the sum of all the numbers in the file
    total = 0
    for line in fileinput.FileInput(Path(tmp_dir).glob('*.txt')):
        total += sum(map(int, re.findall(r'\d+', line)))
    print(f'total = {total}')
print(f'director {tmp_dir} exists: {Path(tmp_dir).exists()}')

## Miscellaneous

Get the current working directory.

In [None]:
Path.cwd()

Get the current user's home directory.

In [None]:
Path.home()

Get the system's temporary directory.

In [None]:
tempfile.gettempdir()