# os


In [1]:
import os

## os.path

NOTE: a lot of these functions don't care if the path actually exists - they're just doing string transformations.


In [38]:
# Get real path from path with home.
print('[expanduser]')
print(os.path.expanduser('~/somefolder'))
print()

# Get absolute path from relative path
print('[abspath]')
print(os.path.abspath('.'))
print(os.path.abspath('./somefolder/somefile.txt'))
print()

# Get relative path from absolute path
print('[relpath]')
print(os.path.relpath('/folder1/folder2/folder3', '/folder1'))
print()

# Get the parent folder of a file or folder
print('[dirname]')
print(os.path.dirname('/folder1/folder2/folder3/myfile.txt'))
print(os.path.dirname('/folder1/folder2/folder3'))
print()

# Remove the extension of a file if it has one
print('[basename]')
print(os.path.basename('/folder1/folder2/folder3/myfile.txt'))
print(os.path.basename('/folder1/folder2/folder3/myfile'))
print(os.path.basename('/folder1'))
print()

# Split path into basename and extension part
print(os.path.splitext('/folder1/folder2/folder3/myfile.txt'))
print(os.path.splitext('/folder1/folder2/folder3/myfile'))
print()

# Construct a path in an OS-independent way
print('[join]')
print(os.path.join('folder1', 'folder2', 'myfile.txt'))
print(os.path.join('/', 'folder1'))
print(os.path.join(*['folder1', 'folder2']))
print()

# File and folder existence
print('[existence]')
print(os.path.exists('/folder1/folder2/madeup.txt'))
print(os.path.isfile('/folder1/folder2/madeup.txt'))
print(os.path.isfile('.'))
print(os.path.isdir('.'))
print()

[expanduser]
/Users/davidpetrofsky/somefolder

[abspath]
/Users/davidpetrofsky/repos/snippets/python
/Users/davidpetrofsky/repos/snippets/python/somefolder/somefile.txt

[relpath]
folder2/folder3

[dirname]
/folder1/folder2/folder3
/folder1/folder2

[basename]
myfile.txt
myfile
folder1

('/folder1/folder2/folder3/myfile', '.txt')
('/folder1/folder2/folder3/myfile', '')

[join]
folder1/folder2/myfile.txt
/folder1
folder1/folder2

[existence]
False
False
False
True



## os.walk

Visits each directory in a folder **recursively** in **alphabetical depth-first search** order. Returns path as **strings** (not some kind of special file system objects like other APIs).

A given subfolder in the tree will appear twice:

- as a subdirectory entry in its parent folder
- as a dirpath when it is visited directly

It can take **relative paths** but **not paths with ~**.

The paths you get in dirpath, dirnames, and filenames are relative to whatever you passed in, so if you pass in a full path, you get full paths, and if you pass in a relative path, you get relative paths.


In [43]:
import os

for dirpath, dirnames, filenames in os.walk('.'):
    # Paths will print relative to .
    print('Current directory:', dirpath)
    print('Subdirectories:', dirnames)
    print('Files:', filenames)
    print()

Current directory: .
Subdirectories: ['basics', 'cli_subfolder', 'subfolder', '__pycache__', 'packagefolder', '.ipynb_checkpoints', 'testsfolder']
Files: ['CLI.ipynb', 'Basics.ipynb', 'Modules and Packages.ipynb', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', 'Conda and Pip.ipynb', 'mymodule.py', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'Caveats.ipynb']

Current directory: ./basics
Subdirectories: []
Files: ['file.txt', 'append.txt', 'binary', 'output.txt']

Current directory: ./cli_subfolder
Subdirectories: []
Files: ['magic.py', 'argv0.py', 'argparse_example.py', 'file.py', 'args.py', 'direct.py', 'module.py', 'name.py', 'commands.py', 'oneline.py']

Current directory: ./subfolder
Subdirectories: ['__pycache__']
Files: ['submodule.py']

Current directory: ./subfolder/__pycache__
Subdirectories: []
Files: ['submodule.cpython-310.pyc']

Current directory: ./__pycache__
Subdirectories: []
Files: ['mymodule.cpython-310.pyc']

In [44]:
import os

for dirpath, dirnames, filenames in os.walk(os.path.abspath('.')):
    # Paths will print relative to the absolute path
    print('Current directory:', dirpath)
    print('Subdirectories:', dirnames)
    print('Files:', filenames)
    print()

Current directory: /Users/davidpetrofsky/repos/snippets/python
Subdirectories: ['basics', 'cli_subfolder', 'subfolder', '__pycache__', 'packagefolder', '.ipynb_checkpoints', 'testsfolder']
Files: ['CLI.ipynb', 'Basics.ipynb', 'Modules and Packages.ipynb', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', 'Conda and Pip.ipynb', 'mymodule.py', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'Caveats.ipynb']

Current directory: /Users/davidpetrofsky/repos/snippets/python/basics
Subdirectories: []
Files: ['file.txt', 'append.txt', 'binary', 'output.txt']

Current directory: /Users/davidpetrofsky/repos/snippets/python/cli_subfolder
Subdirectories: []
Files: ['magic.py', 'argv0.py', 'argparse_example.py', 'file.py', 'args.py', 'direct.py', 'module.py', 'name.py', 'commands.py', 'oneline.py']

Current directory: /Users/davidpetrofsky/repos/snippets/python/subfolder
Subdirectories: ['__pycache__']
Files: ['submodule.py']

Current directory: 

## File-System Functions in os


In [73]:
import os

# Working Directory
print('[working directory]')
cwd = os.getcwd()
print(cwd)
os.chdir('..')
print(os.getcwd())
os.chdir(cwd)
print()

# Get Files and folders in directory
# defaults to working directory
# just names - no path prefixes
print('[listdir]')
print(os.listdir())
print(os.listdir('.'))
print(os.listdir(os.path.abspath('.')))
print()

# Creating and Deleting Folders
print('[creating and deleting folders]')
os.mkdir('mkdir_folder')  # fails if already exists
print(os.path.exists('mkdir_folder'))
os.rmdir('mkdir_folder')  # fails if not empty
print(os.path.exists('mkdir_folder'))
os.makedirs('makedirs_folder/subfolder')  # multiple levels
print(os.path.exists('makedirs_folder/subfolder'))
os.rmdir('makedirs_folder/subfolder')
os.rmdir('makedirs_folder')  # can't remove a folder until empty

[working directory]
/Users/davidpetrofsky/repos/snippets/python
/Users/davidpetrofsky/repos/snippets

[listdir]
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', '__pycache__', 'packagefolder', 'Conda and Pip.ipynb', 'mymodule.py', '.ipynb_checkpoints', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'testsfolder', 'Caveats.ipynb']
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 'Type Hints.ipynb', 'Collections Tips.ipynb', 'Docstring.ipynb', '__pycache__', 'packagefolder', 'Conda and Pip.ipynb', 'mymodule.py', '.ipynb_checkpoints', 'Built-In Libraries.ipynb', 'Unit Tests.ipynb', 'testsfolder', 'Caveats.ipynb']
['CLI.ipynb', 'Basics.ipynb', 'basics', 'Modules and Packages.ipynb', 'cli_subfolder', 'subfolder', 'Implementing Special Objects.ipynb', 

## Environment Variables


In [80]:
import os

# Get a variable
# Note os.environ is a dictionary and you don't use $
print(os.environ['PATH'])
print()

# Get with a default if not present
print(os.getenv('NOTHERE', 0))

# Setting environment variable
os.environ['NEWTHING'] = 'hello'
os.putenv('NEWTHING2', 'hello2')

/Users/davidpetrofsky/miniforge3/envs/ai/bin:/Users/davidpetrofsky/miniforge3/condabin:/usr/local/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin

0


# shutil

Compared to `os`, `shutil` has **higher-level** file system functionality.


Some of the key functions in the shutil module include:

- `shutil.copy(src, dst)`: Copies a file from the source path to the destination path.
- `shutil.move(src, dst)`: Moves a file or directory from the source path to the destination path.
- `shutil.rmtree(path)`: Deletes a directory and all its contents recursively.
- `shutil.make_archive(base_name, format, root_dir)`: Creates an archive file (e.g., ZIP or TAR) from a directory.
- `shutil.unpack_archive(archive_file, extract_dir)`: Extracts files from an archive file into a directory.


# sys


In [3]:
import sys

# Top-level call and commandline args
print(sys.argv)

# Exiting with error code
# sys.exit(0)  # Not going to call this for obvious reasons

# Standard I/O
print(sys.stdin)
print(sys.stdout)
print(sys.stderr)
sys.stdout.write('hello\n')  # same as print('hello')

# Module Search Path
print(sys.path)

# OS info
print(sys.platform)

# Python version info
print(sys.version)

# Currently improted modules
#print(sys.modules) # not running because in jupyter it's too long

# Specifying a specific file to print to
# eg. sys.stdout, sys.stderr
print('hello', file=sys.stdout)

['/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/Users/davidpetrofsky/Library/Jupyter/runtime/kernel-a264e3c9-dde1-44d0-8aef-f7cc5b648e3c.json']
<_io.TextIOWrapper name='<stdin>' mode='r' encoding='utf-8'>
<ipykernel.iostream.OutStream object at 0x10844afb0>
<ipykernel.iostream.OutStream object at 0x10844afe0>
hello
['/Users/davidpetrofsky/repos/snippets/python', '/Users/davidpetrofsky/repos/projects', '/Users/davidpetrofsky/repos/snippets/python', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python310.zip', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/lib-dynload', '', '/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/site-packages']
darwin
3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:13) [Clang 14.0.6 ]
hello


# glob


In [93]:
import glob

glob.glob('**/*.py')

['cli_subfolder/magic.py',
 'cli_subfolder/argv0.py',
 'cli_subfolder/argparse_example.py',
 'cli_subfolder/file.py',
 'cli_subfolder/args.py',
 'cli_subfolder/direct.py',
 'cli_subfolder/module.py',
 'cli_subfolder/name.py',
 'cli_subfolder/commands.py',
 'cli_subfolder/oneline.py',
 'subfolder/submodule.py',
 'packagefolder/__init__.py',
 'testsfolder/my_tests_mocking_general.py',
 'testsfolder/my_tests_mocking_return.py',
 'testsfolder/my_tests_mocking_fake.py',
 'testsfolder/my_tests_mocking.py',
 'testsfolder/my_tests.py',
 'testsfolder/my_tests_tf.py',
 'testsfolder/mytests_with_members.py',
 'testsfolder/mytests_with_output.py',
 'testsfolder/my_tests_asserts.py']

# re

Regular expressions


In [99]:
import re

tweet = 'RT   hi this is a tweet'
clean_tweet = re.sub(r'^RT\s+', '', tweet)

print(clean_tweet)

hi this is a tweet


In [100]:
text = 'this is some text'
words = re.findall(r'\w+', text)

print(words)

['this', 'is', 'some', 'text']


# string


In [104]:
import string

print(string.ascii_letters)
print()
print(string.punctuation)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


# random


In [115]:
import random

random.seed(42)
print(random.randint(0, 100))  # exclusive upper
print(random.random())  # float [0, 1)

l = list(range(10))
random.shuffle(l)  # destructive
print(l)

81
0.11133106816568039
[7, 6, 2, 9, 0, 5, 1, 8, 3, 4]


# math


In [129]:
import math

print(math.sqrt(4))
print(math.floor(3.8))
print(math.ceil(3.2))
print(math.exp(2))  # e base
print(math.log(2))  # ln
print(math.log(2, 2))  # 2 base
print(math.cos(2. * math.pi))
print(math.cos(math.radians(360)))
print(math.pi)
print(math.e)
# can adjust tolerances with params if needed
print(math.isclose(0.3, 0.3))

2.0
3
4
7.38905609893065
0.6931471805599453
1.0
1.0
1.0
3.141592653589793
2.718281828459045
True


# collections


### defaultdict


In [134]:
import collections

d = collections.defaultdict(int)
print(d['noexist'])
d['noexist'] += 1
print(d['noexist'])
d['noexist'] += 1
print(d['noexist'])

0
1
2


### Counter


In [143]:
import collections
import random

random.seed(42)

# just shuffling to show order doesn't matter
values = [1, 1, 1, 2, 2, 3, 5]
random.shuffle(values)
print(values)

# dictionary of counts for values
c = collections.Counter(values)
print(c)
print(c[1])
print(c[4])  # nonexistent just has 0 count

# filter to highest n counts
# tuple of val,count in descending order
print(c.most_common(2))

# value with highest count
print(c.most_common(1)[0][0])

# total count
print(sum(c.values()))

[1, 2, 2, 1, 5, 1, 3]
Counter({1: 3, 2: 2, 5: 1, 3: 1})
3
0
[(1, 3), (2, 2)]
1
7


### namedtuple


In [2]:
import collections

# Create the type
Point = collections.namedtuple('Point', ['x', 'y'])

# Create an instance
point1 = Point(10, 20)
point2 = Point(x=30, y=40)

# Tuple unpacking
x1, y1 = point1
x2, y2 = point2

# Named fields
print(point1.x)
print(point1.y)
print(point2.x)
print(point2.y)
print()

# Operators
point3 = Point(10, 20)
print(point1 == point3)
print(point1 < point3) # lexographic
print(point1)
print(str(point1))

print(hash(point1))
print(hash(point3))
print(hash(point2))

10
20
30
40

True
False
Point(x=10, y=20)
Point(x=10, y=20)
-4873088377451060145
-4873088377451060145
3342721321007327999


# pickle

**Built-in types** support this already.

To support it on a custom type, you need to implement **\_\_getstate\_\_** and **\_\_setstate\_\_** to virtualize the object's members/state as a dictionary.


In [147]:
import pickle

d = {'a': 1, 'b': 2, 'c': 3}
with open('pickletest', 'wb') as f:
    pickle.dump(d, f)
with open('pickletest', 'rb') as f:
    e = pickle.load(f)

print(e)

{'a': 1, 'b': 2, 'c': 3}


# copy

**Built-in types** support this already.

To support it on a custom type, you need to implement **\_\_copy\_\_** and **\_\_deepcopy\_\_**.


In [151]:
import copy

# without copy
layer1 = [1, 2, 3]
layer2 = [layer1, layer1, layer1]
layer1.append(4)
print(layer2)

# with copy
layer1 = [1, 2, 3]
layer2 = [copy.deepcopy(layer1), copy.deepcopy(layer1), copy.deepcopy(layer1)]
layer1.append(4)
print(layer2)

[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]
[[1, 2, 3], [1, 2, 3], [1, 2, 3]]


# time

Times are in **seconds** but as **floats**.


In [154]:
import time

start = time.time()
time.sleep(1)
stop = time.time()

print(stop - start)

1.00462007522583


# asyncio

NOTE: you cannot mark a lambda as `async`, so you have to use an inline function instead.

In [12]:
# import asyncio # needed if you do the last line

# Note the return type is still the synchronous one.
# But asyncio lets us put 'async' in front.
async def my_async_fn() -> int:
    return 10


# An async fn can call another.
async def my_calling_fn():
    # Use await and treat as a synchronous value.
    val = await my_async_fn()
    print(val)
    return val


# This is how you'd call a top-level async function
# to kick off the whole chain.
# But you can't do it in a Jupyter Notebook.
# You're only supposed to call asyncio.run() once
# per program.
#
# val = asyncio.run(my_calling_fn)

## Other Useful asyncio things

- `async with` to get a context asynchronously and then continue
- `asyncio.Lock` for alternative to `threading.Lock` that is friendly to async operations
- `asyncio.gather()` to wait on multiple async things


# threading

## Global Interpretter Lock (GIL)

In Python, only one thread can execute python code at a time.  Thus, multithreading only helps with __IO-bound__ tasks and not CPU-bound.

However, even though one thread can execute at a time, they can still end up switching at unexpected times.  In the example below, you will often see __print statements that run together__ because the print() statement gets preempted while it's still printing.

## Thread

`Thread` target is a function to run.

`time.sleep` is a way to sleep for a given number of seconds.

`join()` can also take a timeout (float seconds).

Use `threading.current_thread()` to get the current thread.

In [9]:
import threading
import time

def f():
    for _ in range(3):
        time.sleep(1)
        print('sleep1')
        
def g():
    for _ in range(3):
        time.sleep(1)
        print('sleep2')
        
thread1 = threading.Thread(target=f)  # required to specify the keyword arg
thread2 = threading.Thread(target=g)
thread1.start()
thread2.start()
print('threads started')
thread1.join()
thread2.join()
print('after join')

print()
print(thread1.is_alive())

threads started
sleep2sleep1

sleep2
sleep1
sleep2
sleep1
after join

False


## Thread Args

In [15]:
import threading
import time

def f(arg):
    for _ in range(3):
        time.sleep(1)
        print(arg)

thread = threading.Thread(target=f, args=('hi',))
thread.start()
thread.join()

hi
hi
hi


## OOP Threads

In [10]:
import threading
import time

class MyThread(threading.Thread):
    def run(self):
        for _ in range(3):
            time.sleep(1)
            print('sleep')
            
thread = MyThread()
thread.start()
thread.join()

sleep
sleep
sleep


## Thread Death

In [14]:
import threading
import time

def thread():
    for _ in range(3):
        print('sleep')
        time.sleep(1)
    raise ValueError()

t = threading.Thread(target=thread)
t.start()
t.join()
print('after join')
print(t.is_alive())

sleep
sleep
sleep


Exception in thread Thread-20 (thread):
Traceback (most recent call last):
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/wn/pmx5pn155tg83bskqqc2wzz00000gn/T/ipykernel_18349/1427272078.py", line 8, in thread
ValueError


after join
False


## Thread Interruption

There is __no concept__ of interrupting a thread like in Java.

However, you can implement it yourself, such as having a __stop event__ on your thread class and checking it in the loop.

## Events

Events are like conditions in Java except not tied to a lock object.  You can use an event as a way to signal a thread interruption.

The event is a shared variable that can be passed in as an arg, or part of the thread class, etc.

The interface of `Event` is simple:
  - `Event()` to make an event
  - `set()` to signal the event
  - `wait(timeout)` to wait for the event (can ommit timeout to wait forever)
  
NOTE: this will wake up __all threads__ waiting, not just one like Java conditions.

In [17]:
import threading
import time

def wait_for_stop(stop_event):
    stop_signalled = stop_event.wait(5) # 5s timeout (ommit to wait forever)
    if stop_signalled:
        print('stop signal')
    else:
        print('no stop signal')
    
def signal_stop(stop_event):
    time.sleep(1)
    stop_event.set()
    
stop_event = threading.Event()
waiting = threading.Thread(target=wait_for_stop, args=(stop_event,))
sending = threading.Thread(target=signal_stop, args=(stop_event,))

waiting.start()
sending.start()

waiting.join()
sending.join()

stop signal


## Locking

Creating a mutex is simple:
 - create a lock variable with `threading.Lock()`
 - pass that lock around
 - use `with` to have a critical section on that lock
 
NOTE: unlike in java, locks are not reentrant by default, but you can use `threading.RLock()` to make it so.

NOTE: there is no `synchronized` keyword like in Java.

NOTE: a semaphore works the same way but you create it with `threading.Semaphore(n)`.

In [19]:
import threading
import time

lock = threading.Lock()
x = 1

def f(lock):
    global x
    for _ in range(3):
        time.sleep(1)
        with lock:
            x *= 10
    
def g(lock):
    global x
    for _ in range(3):
        with lock:
            print(x)
        time.sleep(1)
        
thread1 = threading.Thread(target=f, args=(lock,))
thread2 = threading.Thread(target=g, args=(lock,))

thread1.start()
thread2.start()

thread1.join()
thread2.join()

1
10
100


## Conditions

A condition is created with `threading.Condition()` and acts like a lock that also has signalling like an event does.  This makes it more similar to Java's conditions.

You use `with` to use the condition as a lock.  Then inside the critical section, you can call its `wait()`, `notify()`, and `notify_all()` methods just like Java.

## Volatility and Atomicity

There is no concept like `volatile` in some other langues like Java.

There are also no atomic types built-in, so you'd need to use a `Lock`.

## Effect of Main Thread Ending

Ending the main thread, including with `sys.exit()`, does not automatically kill executing threads.

Python does not provide a way to force kill all threads (or single threads).

In [22]:
import threading
import time
import sys

def f():
  while True:
      time.sleep(1)
      print('still going')

thread = threading.Thread(target=f)
thread.start()
print('main thread done')

main thread done
still going
still going
still going
still going
still going
still going
still going


## Timed execution

In [11]:
import threading

threading.Timer(2, lambda: print('time!')).start()

time!


## Thread-local storage

Like in Java, `random` is thread-safe by default, but you still might want a thread-local instance.

In [16]:
import random
import threading

thread_local_data = threading.local()

def get_thread_local_random():
    if not hasattr(thread_local_data, "random"):
        thread_local_data.random = random.Random()
    return thread_local_data.random

def task():
    rnd = get_thread_local_random()
    return rnd.random()

# Then use task() in your threads

# multiprocessing

`multiprocessing` is a module for executing tasks in __parallel processes__ instead of threads.  This gets around the GIL issue, but it also means there is __no shared memory__ between tasks.

This is more appropriate for __CPU-bound__ parall tasks, but is also __more expensive__ due to the heavy nature of process switching and because you have to __copy data__ with IPC.  This is the best you can do due to the GIL.

## Process

This __does not run in Jupyter__ because of a limitation with how the `multiprocessing` module handles functions across processes.  It uses `pickle` internally, and something about the way Jupyter defines things interactively makes pickle not work for functions like this.

But besides that, the interesting thing to note here is that it looks __exactly like the threading__ equivalent code.

NOTE: global variables from the original process will be copied (and then become independent) in the spawned processes.

In [1]:
import multiprocessing
import time

def f(arg):
    for _ in range(3):
        time.sleep(1)
        print(arg)

process = multiprocessing.Process(target=f, args=('hi',))
process.start()
process.join()

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'f' on <module '__main__' (built-in)>


## IPC

Since there is __no shared memory__ between processes, you have to use IPC for processes to communicate.

`Pipe`
  - `parent_conn, child_conn = Pipe()`
    - pass `child_conn` into child process when create it
  - `child_conn.send()` and `child_conn.close()`
  - `parent_conn.recv()`
  - there is also a __bidirectional__ version
  
`Queue`
  - create and use same one in both parent and child
  - `put()` and `get()` to pass data
  
`Value`
  - create and use same one in parent and child
  - read and write the `value` attribute to share the data

`Array`
  - create and use same on in parent and child
  - read and write via indexing and slicing to share the data
  
`Manager`
  - for more complex objects and flexibility

# concurrent.futures

## Executor, Thread Pool, and Future

These concepts are similar to Java, but the 3 of them are tied together instead of being separate concepts.

NOTE: you can use `future.cancel()` to cancel a task, but __only if it hasn't started__.

NOTE: workers are only created as needed, so you can set a high `max_workers` to simulate Java's cached thread pool.

In [4]:
from concurrent.futures import ThreadPoolExecutor

def task():
    return "result"

executor = ThreadPoolExecutor(max_workers=1) # create an executor/pool
future = executor.submit(task)  # submit a task to the pool and get a future for it
print(future.result())  # Waits for the task to complete and prints the result
print(future.done())

result
True


## Awaiting Termination

Python is more limited than most languages in terms of forceful shutdown.

In [8]:
from concurrent.futures import ThreadPoolExecutor

def some_function(x):
    print(x)
    return x
    
executor = ThreadPoolExecutor(max_workers=4)

# Submit tasks to executor
futures = [executor.submit(some_function, arg) for arg in [1, 2, 3]]

# Shutdown and wait for all tasks to complete
executor.shutdown(wait=True)

print("All tasks completed.")

12

3
All tasks completed.


## Address Space

`ThreadPoolExecutor` tasks share the same address space as the main thread, just like in `threading` module threads.  Thus, you can use `Lock`, etc. just as with normal threads.

## Combining With asyncio (eg. for chaining)

NOTE: this doesn't work in Jupyter

You can't chain exactly like with JS promises, but you can do `async/await` chaining by using `asyncio`.

In [14]:
import asyncio
import concurrent.futures

async def main():
    loop = asyncio.get_event_loop()
    future = concurrent.futures.Future()

    # ... Set the result of future in another thread

    asyncio_future = asyncio.wrap_future(future)
    result = await asyncio_future
    print(result)

asyncio.run(main())

RuntimeError: asyncio.run() cannot be called from a running event loop

## ProcessPoolExecutor

`ProcessPoolExecutor` works much like `ThreadPoolExecutor` but using multiprocesses instead of threads (which means no shared address space after process creation).

NOTE: this won't work in Jupyter

In [17]:
import concurrent.futures

def compute_square(n):
    return n * n

numbers = [1, 2, 3, 4, 5]
with concurrent.futures.ProcessPoolExecutor() as executor:
    # Submit tasks to the process pool
    futures = [executor.submit(compute_square, num) for num in numbers]

    # Retrieve and print the results
    for future in concurrent.futures.as_completed(futures):
        print(future.result())

Process SpawnProcess-7:
Process SpawnProcess-8:
Traceback (most recent call last):
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'compute_square' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/davidpetrofsky/miniforge3/envs/ai/lib/python3

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

# time

NOTE: times are floating point seconds

In addition to what's shown here, there are various ways to format or parse a __string__, interact with GMT, etc.

In [25]:
import time

print(time.time())          # since epoch
print(time.perf_counter())  # higher-precision (with less range) version

time.sleep(1)

print(time.localtime())

1700893073.721708
93118.063308291
time.struct_time(tm_year=2023, tm_mon=11, tm_mday=24, tm_hour=22, tm_min=17, tm_sec=54, tm_wday=4, tm_yday=328, tm_isdst=0)


# datetime

In [27]:
import datetime

print(datetime.datetime.now())
print(datetime.date.today())
print(datetime.time(12, 30))

one_day = datetime.timedelta(days=1)
print(one_day.total_seconds())
tomorrow = datetime.datetime.now() + one_day
print(tomorrow)

2023-11-24 22:22:47.820658
2023-11-24
12:30:00
86400.0
2023-11-25 22:22:47.820920


# json

In [10]:
import json
import sys

data = {
    "name": "Jane Smith",
    "age": 25,
    "is_employee": False
}

# files
json.dump(data, sys.stdout, indent=2)
print()
print()
# data = json.load(somefile)

# strings
d = json.dumps(data, indent=2)
print(d)
print()
print(json.loads(d))

{
  "name": "Jane Smith",
  "age": 25,
  "is_employee": false
}

{
  "name": "Jane Smith",
  "age": 25,
  "is_employee": false
}

{'name': 'Jane Smith', 'age': 25, 'is_employee': False}
