# os


In [1]:
import os

## os.path

NOTE: a lot of these functions don't care if the path actually exists - they're just doing string transformations.


In [38]:
# Get real path from path with home.
print('[expanduser]')
print(os.path.expanduser('~/somefolder'))
print()

# Get absolute path from relative path
print('[abspath]')
print(os.path.abspath('.'))
print(os.path.abspath('./somefolder/somefile.txt'))
print()

# Get relative path from absolute path
print('[relpath]')
print(os.path.relpath('/folder1/folder2/folder3', '/folder1'))
print()

# Get the parent folder of a file or folder
print('[dirname]')
print(os.path.dirname('/folder1/folder2/folder3/myfile.txt'))
print(os.path.dirname('/folder1/folder2/folder3'))
print()

# Remove the extension of a file if it has one
print('[basename]')
print(os.path.basename('/folder1/folder2/folder3/myfile.txt'))
print(os.path.basename('/folder1/folder2/folder3/myfile'))
print(os.path.basename('/folder1'))
print()

# Split path into basename and extension part
print(os.path.splitext('/folder1/folder2/folder3/myfile.txt'))
print(os.path.splitext('/folder1/folder2/folder3/myfile'))
print()

# Construct a path in an OS-independent way
print('[join]')
print(os.path.join('folder1', 'folder2', 'myfile.txt'))
print(os.path.join('/', 'folder1'))
print(os.path.join(*['folder1', 'folder2']))
print()

# File and folder existence
print('[existence]')
print(os.path.exists('/folder1/folder2/madeup.txt'))
print(os.path.isfile('/folder1/folder2/madeup.txt'))
print(os.path.isfile('.'))
print(os.path.isdir('.'))
print()

[expanduser]
/Users/davidpetrofsky/somefolder

[abspath]
/Users/davidpetrofsky/repos/snippets/python
/Users/davidpetrofsky/repos/snippets/python/somefolder/somefile.txt

[relpath]
folder2/folder3

[dirname]
/folder1/folder2/folder3
/folder1/folder2

[basename]
myfile.txt
myfile
folder1

('/folder1/folder2/folder3/myfile', '.txt')
('/folder1/folder2/folder3/myfile', '')

[join]
folder1/folder2/myfile.txt
/folder1
folder1/folder2

[existence]
False
False
False
True



## os.walk

Visits each directory in a folder **recursively** in **alphabetical depth-first search** order. Returns path as **strings** (not some kind of special file system objects like other APIs).

A given subfolder in the tree will appear twice:

- as a subdirectory entry in its parent folder
- as a dirpath when it is visited directly

It can take **relative paths** but **not paths with ~**.

The paths you get in dirpath, dirnames, and filenames are relative to whatever you passed in, so if you pass in a full path, you get full paths, and if you pass in a relative path, you get relative paths.


In [43]:
import os

for dirpath, dirnames, filenames in os.walk('.'):
    # Paths will print relative to .
    print('Current directory:', dirpath)
    print('Subdirectories:', dirnames)
    print('Files:', filenames)
    print()

Current directory: .
Subdirectories: ['basics', 'cli_subfolder', 'subfolder', '__pycache__', 'packagefolder', '.ipynb_checkpoints', 'testsfolder']
Files: ['CLI.ipynb', 'Basics.ipynb', 'Modules and Packages.ipynb', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', 'Conda and Pip.ipynb', 'mymodule.py', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'Caveats.ipynb']

Current directory: ./basics
Subdirectories: []
Files: ['file.txt', 'append.txt', 'binary', 'output.txt']

Current directory: ./cli_subfolder
Subdirectories: []
Files: ['magic.py', 'argv0.py', 'argparse_example.py', 'file.py', 'args.py', 'direct.py', 'module.py', 'name.py', 'commands.py', 'oneline.py']

Current directory: ./subfolder
Subdirectories: ['__pycache__']
Files: ['submodule.py']

Current directory: ./subfolder/__pycache__
Subdirectories: []
Files: ['submodule.cpython-310.pyc']

Current directory: ./__pycache__
Subdirectories: []
Files: ['mymodule.cpython-310.pyc']

In [44]:
import os

for dirpath, dirnames, filenames in os.walk(os.path.abspath('.')):
    # Paths will print relative to the absolute path
    print('Current directory:', dirpath)
    print('Subdirectories:', dirnames)
    print('Files:', filenames)
    print()

Current directory: /Users/davidpetrofsky/repos/snippets/python
Subdirectories: ['basics', 'cli_subfolder', 'subfolder', '__pycache__', 'packagefolder', '.ipynb_checkpoints', 'testsfolder']
Files: ['CLI.ipynb', 'Basics.ipynb', 'Modules and Packages.ipynb', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', 'Conda and Pip.ipynb', 'mymodule.py', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'Caveats.ipynb']

Current directory: /Users/davidpetrofsky/repos/snippets/python/basics
Subdirectories: []
Files: ['file.txt', 'append.txt', 'binary', 'output.txt']

Current directory: /Users/davidpetrofsky/repos/snippets/python/cli_subfolder
Subdirectories: []
Files: ['magic.py', 'argv0.py', 'argparse_example.py', 'file.py', 'args.py', 'direct.py', 'module.py', 'name.py', 'commands.py', 'oneline.py']

Current directory: /Users/davidpetrofsky/repos/snippets/python/subfolder
Subdirectories: ['__pycache__']
Files: ['submodule.py']

Current directory: 

## File-System Functions in os


In [73]:
import os

# Working Directory
print('[working directory]')
cwd = os.getcwd()
print(cwd)
os.chdir('..')
print(os.getcwd())
os.chdir(cwd)
print()

# Get Files and folders in directory
# defaults to working directory
# just names - no path prefixes
print('[listdir]')
print(os.listdir())
print(os.listdir('.'))
print(os.listdir(os.path.abspath('.')))
print()

# Creating and Deleting Folders
print('[creating and deleting folders]')
os.mkdir('mkdir_folder')  # fails if already exists
print(os.path.exists('mkdir_folder'))
os.rmdir('mkdir_folder')  # fails if not empty
print(os.path.exists('mkdir_folder'))
os.makedirs('makedirs_folder/subfolder')  # multiple levels
print(os.path.exists('makedirs_folder/subfolder'))
os.rmdir('makedirs_folder/subfolder')
os.rmdir('makedirs_folder')  # can't remove a folder until empty

[working directory]
/Users/davidpetrofsky/repos/snippets/python
/Users/davidpetrofsky/repos/snippets

[listdir]
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', '__pycache__', 'packagefolder', 'Conda and Pip.ipynb', 'mymodule.py', '.ipynb_checkpoints', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'testsfolder', 'Caveats.ipynb']
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', '__pycache__', 'packagefolder', 'Conda and Pip.ipynb', 'mymodule.py', '.ipynb_checkpoints', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'testsfolder', 'Caveats.ipynb']
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 

## Environment Variables


In [80]:
import os

# Get a variable
# Note os.environ is a dictionary and you don't use $
print(os.environ['PATH'])
print()

# Get with a default if not present
print(os.getenv('NOTHERE', 0))

# Setting environment variable
os.environ['NEWTHING'] = 'hello'
os.putenv('NEWTHING2', 'hello2')

/Users/davidpetrofsky/miniforge3/envs/ai/bin:/Users/davidpetrofsky/miniforge3/condabin:/usr/local/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin

0


# shutil

Compared to `os`, `shutil` has **higher-level** file system functionality.


Some of the key functions in the shutil module include:

- `shutil.copy(src, dst)`: Copies a file from the source path to the destination path.
- `shutil.move(src, dst)`: Moves a file or directory from the source path to the destination path.
- `shutil.rmtree(path)`: Deletes a directory and all its contents recursively.
- `shutil.make_archive(base_name, format, root_dir)`: Creates an archive file (e.g., ZIP or TAR) from a directory.
- `shutil.unpack_archive(archive_file, extract_dir)`: Extracts files from an archive file into a directory.


# sys


In [91]:
import sys

# Top-level call and commandline args
print(sys.argv)

# Exiting with error code
# sys.exit(0)  # Not going to call this for obvious reasons

# Standard I/O
print(sys.stdin)
print(sys.stdout)
print(sys.stderr)
sys.stdout.write('hello\n')  # same as print('hello')

# Module Search Path
print(sys.path)

# OS info
print(sys.platform)

# Python version info
print(sys.version)

# Currently improted modules
#print(sys.modules) # not running because in jupyter it's too long

['/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/Users/davidpetrofsky/Library/Jupyter/runtime/kernel-74c214b1-9a3b-4ca4-9a9d-69fe3c1440db.json']
<_io.TextIOWrapper name='<stdin>' mode='r' encoding='utf-8'>
<ipykernel.iostream.OutStream object at 0x103d8a170>
<ipykernel.iostream.OutStream object at 0x103d8a1a0>
hello
['/Users/davidpetrofsky/repos/snippets/python', '/Users/davidpetrofsky/repos/projects', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python310.zip', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/lib-dynload', '', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/site-packages']
darwin
3.10.10 | packaged by conda-forge | (main, Mar 24 2023, 20:12:31) [Clang 14.0.6 ]


# glob


In [93]:
import glob

glob.glob('**/*.py')

['cli_subfolder/magic.py',
 'cli_subfolder/argv0.py',
 'cli_subfolder/argparse_example.py',
 'cli_subfolder/file.py',
 'cli_subfolder/args.py',
 'cli_subfolder/direct.py',
 'cli_subfolder/module.py',
 'cli_subfolder/name.py',
 'cli_subfolder/commands.py',
 'cli_subfolder/oneline.py',
 'subfolder/submodule.py',
 'packagefolder/__init__.py',
 'testsfolder/my_tests_mocking_general.py',
 'testsfolder/my_tests_mocking_return.py',
 'testsfolder/my_tests_mocking_fake.py',
 'testsfolder/my_tests_mocking.py',
 'testsfolder/my_tests.py',
 'testsfolder/my_tests_tf.py',
 'testsfolder/mytests_with_members.py',
 'testsfolder/mytests_with_output.py',
 'testsfolder/my_tests_asserts.py']

# re

Regular expressions


In [99]:
import re

tweet = 'RT   hi this is a tweet'
clean_tweet = re.sub(r'^RT\s+', '', tweet)

print(clean_tweet)

hi this is a tweet


In [100]:
text = 'this is some text'
words = re.findall(r'\w+', text)

print(words)

['this', 'is', 'some', 'text']


# string


In [104]:
import string

print(string.ascii_letters)
print()
print(string.punctuation)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


# random


In [115]:
import random

random.seed(42)
print(random.randint(0, 100))  # exclusive upper
print(random.random())  # float [0, 1)

l = list(range(10))
random.shuffle(l)  # destructive
print(l)

81
0.11133106816568039
[7, 6, 2, 9, 0, 5, 1, 8, 3, 4]


# math


In [129]:
import math

print(math.sqrt(4))
print(math.floor(3.8))
print(math.ceil(3.2))
print(math.exp(2))  # e base
print(math.log(2))  # ln
print(math.log(2, 2))  # 2 base
print(math.cos(2. * math.pi))
print(math.cos(math.radians(360)))
print(math.pi)
print(math.e)
# can adjust tolerances with params if needed
print(math.isclose(0.3, 0.3))

2.0
3
4
7.38905609893065
0.6931471805599453
1.0
1.0
1.0
3.141592653589793
2.718281828459045
True


# collections


### defaultdict


In [134]:
import collections

d = collections.defaultdict(int)
print(d['noexist'])
d['noexist'] += 1
print(d['noexist'])
d['noexist'] += 1
print(d['noexist'])

0
1
2


### Counter


In [143]:
import collections
import random

random.seed(42)

# just shuffling to show order doesn't matter
values = [1, 1, 1, 2, 2, 3, 5]
random.shuffle(values)
print(values)

# dictionary of counts for values
c = collections.Counter(values)
print(c)
print(c[1])
print(c[4])  # nonexistent just has 0 count

# filter to highest n counts
# tuple of val,count in descending order
print(c.most_common(2))

# value with highest count
print(c.most_common(1)[0][0])

# total count
print(sum(c.values()))

[1, 2, 2, 1, 5, 1, 3]
Counter({1: 3, 2: 2, 5: 1, 3: 1})
3
0
[(1, 3), (2, 2)]
1
7


### namedtuple


In [145]:
import collections

# Create the type
Point = collections.namedtuple('Point', ['x', 'y'])

# Create an instance
point1 = Point(10, 20)
point2 = Point(x=30, y=40)

# Tuple unpacking
x1, y1 = point1
x2, y2 = point2

# Named fields
print(point1.x)
print(point1.y)
print(point2.x)
print(point2.y)

10
20
30
40


# pickle

**Built-in types** support this already.

To support it on a custom type, you need to implement **\_\_getstate\_\_** and **\_\_setstate\_\_** to virtualize the object's members/state as a dictionary.


In [147]:
import pickle

d = {'a': 1, 'b': 2, 'c': 3}
with open('pickletest', 'wb') as f:
    pickle.dump(d, f)
with open('pickletest', 'rb') as f:
    e = pickle.load(f)

print(e)

{'a': 1, 'b': 2, 'c': 3}


# copy

**Built-in types** support this already.

To support it on a custom type, you need to implement **\_\_copy\_\_** and **\_\_deepcopy\_\_**.


In [151]:
import copy

# without copy
layer1 = [1, 2, 3]
layer2 = [layer1, layer1, layer1]
layer1.append(4)
print(layer2)

# with copy
layer1 = [1, 2, 3]
layer2 = [copy.deepcopy(layer1), copy.deepcopy(layer1), copy.deepcopy(layer1)]
layer1.append(4)
print(layer2)

[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]
[[1, 2, 3], [1, 2, 3], [1, 2, 3]]


# time

Times are in **seconds** but as **floats**.


In [154]:
import time

start = time.time()
time.sleep(1)
stop = time.time()

print(stop - start)

1.00462007522583


# asyncio


In [5]:
import asyncio


# Note the return type is still the synchronous one.
# But asyncio lets us put 'async' in front.
async def my_async_fn() -> int:
    return 10


# An async fn can call another.
async def my_calling_fn():
    # Use await and treat as a synchronous value.
    val = await my_async_fn()
    print(val)
    return val


# This is how you'd call a top-level async function
# to kick off the whole chain.
# But you can't do it in a Jupyter Notebook.
# You're only supposed to call asyncio.run() once
# per program.
#
# val = asyncio.run(my_calling_fn)

## Other Useful asyncio things

- `async with` to get a context asynchronously and then continue
- `asyncio.Lock` for alternative to `threading.Lock` that is friendly to async operations
- `asyncio.gather()` to wait on multiple async things
