In [59]:
from pathlib import Path
from more_itertools import take

## Useful Properties

![path](./path.png)

In [50]:
# p = Path("/Users/avilay/temp/kaggle-criteo/data.tar.gz")
p = Path("/home/avilay/mldata/criteo/kaggle/data.tar.gz")

In [51]:
p.parts

('/', 'home', 'avilay', 'mldata', 'criteo', 'kaggle', 'data.tar.gz')

In [52]:
p.root

'/'

In [53]:
for i, parent in enumerate(p.parents):
    print(f"[{i}]: {parent}")

[0]: /home/avilay/mldata/criteo/kaggle
[1]: /home/avilay/mldata/criteo
[2]: /home/avilay/mldata
[3]: /home/avilay
[4]: /home
[5]: /


In [54]:
p.parent

PosixPath('/home/avilay/mldata/criteo/kaggle')

In [55]:
p.name

'data.tar.gz'

In [56]:
p.suffix

'.gz'

In [57]:
p.suffixes

['.tar', '.gz']

In [58]:
p.stem

'data.tar'

## Useful Methods

These are just some of the methods that I think are useful for my day-to-day. There are other methods to mess around with the permissions, etc. that are not listed here.

### Shortcut Paths
There are two useful methods here to get the home directory and the current working directory. These are just getters and don't actually change the directory or anything.
  * `cwd()`
  * `home()`

There are also useful methods for expanding string paths with shortcuts in them.
  * `Path.expanduser()`
  * `resolve()`

In [56]:
Path.home()

PosixPath('/Users/avilay')

In [57]:
Path.cwd()

PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python')

In [None]:
datafile = "~/mldata/criteo/kaggle/data.tar.gz"
Path.expanduser(Path(datafile))

PosixPath('/Users/avilay/mldata/criteo/kaggle/data.tar.gz')

In [None]:
p = Path(".")
print(p.resolve())

p = Path("docs/../setup.py")
print(p.resolve())

/Users/avilay/projects/bitbucket/learn/learn-python
/Users/avilay/projects/bitbucket/learn/learn-python/setup.py


### List Contents
The most straightforward method is `iterdir` which acts just like `ls` in that it does not recurse in any other directory. A more useful method however is the `glob` method where I can search the path object if it is a directory. If the path object is a file, then it returns an empty iterator. A related method is to check if the path object matches a glob pattern.

  * `iterdir()`
  * `glob()`
  * `match()`

In [63]:
# `iterdir` is like `ls` and there is no recursive listing.
curr_dir = Path.cwd()
take(3, curr_dir.iterdir())

[PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/functions.py'),
 PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/Generators.pdf'),
 PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/binary.py')]

In [62]:
curr_dir = Path.cwd()
take(3, curr_dir.glob("*.py"))


[PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/functions.py'),
 PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/binary.py'),
 PosixPath('/Users/avilay/projects/bitbucket/learn/learn-python/db.py')]

In [61]:
this_file = Path.cwd() / "pathstuff.ipynb"
take(3, this_file.glob("*.py"))

[]

In [64]:
datafile = Path.home() / "mldata" / "criteo" / "kaggle" / "data.tar.gz"
print(datafile)
datafile.match("**/data*")

/Users/avilay/mldata/criteo/kaggle/data.tar.gz


True

### File or Directory Information
There are a number of methods that give info on the path object.
  * `exists()`
  * `is_file()` and `is_dir()`
  * `is_absolute()` and `is_relative_to()`
  * `stat()`

In [None]:
this_file = Path.cwd() / "pathstuff.ipynb"
this_file.exists()

True

In [None]:
this_file = Path.cwd() / "pathstuff.ipynb"
curr_dir = Path.cwd()
print(this_file.is_file(), curr_dir.is_dir())

True True


In [None]:
p = Path("/a/b")
print(p.is_absolute())

p = Path("a/b")
print(p.is_absolute())

True
False


In [None]:
p = Path("/etc/passwd")
print(p.is_relative_to("/etc"))
print(p.is_relative_to("/usr"))

True
False


In [None]:
this_file = Path.cwd() / "pathstuff.ipynb"
this_file.stat()

os.stat_result(st_mode=33188, st_ino=7612687, st_dev=16777231, st_nlink=1, st_uid=501, st_gid=20, st_size=5050, st_atime=1662144154, st_mtime=1662144154, st_ctime=1662144154)

### Modify Path Objects
`Path` objects are immutable, but the class provides a number of convenience methods to modify the path. The modified path is ofcourse returned as a new object.
  * `with_name()`
  * `with_stem()`
  * `with_suffix()`

In [None]:
this_file = Path.cwd() / "pathstuff.ipynb"
print(this_file)
print(this_file.with_name("scratch.ipynb"))
print(this_file.with_stem("scratch"))
print(this_file.with_suffix(".py"))

/Users/avilay/projects/bitbucket/learn/learn-python/pathstuff.ipynb
/Users/avilay/projects/bitbucket/learn/learn-python/scratch.ipynb
/Users/avilay/projects/bitbucket/learn/learn-python/scratch.ipynb
/Users/avilay/projects/bitbucket/learn/learn-python/pathstuff.py


### File I/O
There are a couple of convenience methods for file i/o without using the builtin `open()` function.
  * `open()`
  * `read_text()` and `write_text()`
  * `read_bytes()` and `write_bytes()`
  * `mkdir()`

In [None]:
py = Path.cwd() / "dates.py"
py.exists()
with py.open("rt") as f:
    for line in f:
        print(line.strip())

from datetime import datetime, timezone
import pytz

localtime = datetime.now(timezone.utc).astimezone(pytz.timezone('US/Pacific')).isoformat()
print(localtime)


In [None]:
# read_text reads all the lines in a single string but preserves the line breaks.
# There is also a corresponding write_text method.
py = Path.cwd() / "dates.py"
txt = py.read_text()
print(type(txt), txt)

<class 'str'> from datetime import datetime, timezone
import pytz

localtime = datetime.now(timezone.utc).astimezone(pytz.timezone('US/Pacific')).isoformat()
print(localtime)



In [None]:
# read_bytes reads the contents as bytes.
# there is also a corresponding write_bytes method
py = Path.cwd() / "dates.py"
bts = py.read_bytes()
print(type(bts), bts.decode("utf-8"))

<class 'bytes'> from datetime import datetime, timezone
import pytz

localtime = datetime.now(timezone.utc).astimezone(pytz.timezone('US/Pacific')).isoformat()
print(localtime)



In [72]:
tmp_dir = Path.home() / "temp" / "path" / "stuff"
print(f"Does {tmp_dir} exist? (expected False): ", tmp_dir.exists())
tmp_dir.mkdir(parents=True, exist_ok=False)
print(f"Does {tmp_dir} exist afer mkdir? (expected True): ", tmp_dir.exists())

# cleanup
!rm -fr ~/temp/path

Does /Users/avilay/temp/path/stuff exist? (expected False):  False
Does /Users/avilay/temp/path/stuff exist afer mkdir? (expected True):  True


### Moving and Removing

  * `replace()` and `rename()`: Both of these do the same thing but `replace` is more portable, `rename` only works on unix systems. This method can only be used to move files from one location to another. It does not seem to work with directories.

  * `unlink`: This is used to delete a file. It will not work on a directory.
  
  * `rmdir`: This is used to remove empty directories.

In [80]:
# Use replace to move file from location to another and rename it in the process.

# create the temp dir, without it replace will error out.
tmp_dir = Path.home() / "temp" / "path" / "stuff"
tmp_dir.mkdir(parents=True, exist_ok=True)

# Create a temp file in this directory.
hello = Path.cwd() / "hello.txt"
hello.write_text("this is a sample hello world file.\n")
print(f"Does {hello} exist? (expected True): ", hello.exists())

# Now move it to the ~/temp/path/stuff directory as copy_of_hello.txt
hello_copy = tmp_dir / "copy_of_hello.txt"
print(f"Does {hello_copy} exist? (expected False) ", hello_copy.exists())  # Verify that the destination file does not exist.
hello.rename(hello_copy)  # This will move the file.
print(f"Does {hello} still exist? (expected False): ", hello.exists())
print(f"Does {hello_copy} exist now? (expected True): ", hello_copy.exists())


Does /Users/avilay/projects/bitbucket/learn/learn-python/hello.txt exist? (expected True):  True
Does /Users/avilay/temp/path/stuff/copy_of_hello.txt exist? (expected False)  False
Does /Users/avilay/projects/bitbucket/learn/learn-python/hello.txt still exist? (expected False):  False
Does /Users/avilay/temp/path/stuff/copy_of_hello.txt exist now? (expected True):  True


In [81]:
hello_copy.unlink()
print(f"Does {hello_copy} exist after unlinking? (expected False): ", hello_copy.exists())

Does /Users/avilay/temp/path/stuff/copy_of_hello.txt exist after unlinking? (expected False):  False


In [82]:
# if I try to remove ~/temp/path it won't work because it still contains stuff in it.
try:
    tmp_dir.parent.rmdir()
except OSError as err:
    print(err)

[Errno 66] Directory not empty: '/Users/avilay/temp/path'


In [83]:
# I'll have to first remove stuff and then I can remove path
tmp_dir.rmdir()
tmp_dir.parent.rmdir()
print(f"Does {tmp_dir} exist? (expected False): ", tmp_dir.exists())
print(f"Does {tmp_dir.parent} exist? (expected False): ", tmp_dir.parent.exists())

Does /Users/avilay/temp/path/stuff exist? (expected False):  False
Does /Users/avilay/temp/path exist? (expected False):  False
