# 20 Useful Python libraries
## __DEBUGGING IN PROGRESS__
- [original Medium article](https://towardsdatascience.com/20-python-gems-buried-in-the-installation-waiting-to-be-found-96034cad4d15)
- contextlib (external source manager)
- functools (functions manager)
- itertools (iterations manager)
- glob (matching)
- pathlib
- sqlite3
- hashlib
- secrets
- argparse (CLI)
- random
- pickle (file I/O)
- shutil (shell utilities)
- statistics
- gc (garbage collection)
- pprint
- pydoc (not implemented in this notebook)
- calendar (not implemented in this notebook)
- webbrowser (directly from Jupyter lab!)
- logging
- concurrent ops

## https://docs.python.org/3/library/contextlib.html

In [3]:
# contextlib
# context manager - serves as a timer
# wrapping a function under a context manager decorator
# converts it to a manager - you can use with "with"

from contextlib import contextmanager

@contextmanager
def timer():
    start=time.time()
    yield
    end=time.time()
    print(f"{round(end-start,3)}")
    
import time
with timer():
    for _ in range(10):
        time.sleep(0.5)

5.006


## https://docs.python.org/3/library/functools.html

In [6]:
# functools
# builds functions from existing ones
# one is 'partial' - clones functions with custom values

from functools import partial
import pandas as pd

partial_read_csv = partial(
    pd.read_csv,delimiter="|",index_col="date",true_values="true"
)

partial_read_csv("data/partial_read.csv")

TypeError: Expected list, got str

In [8]:
# functools - caching
# remembers each output that maps to inputs
# so results are instantly available when same args are passed
# example: streamlit

from functools import cache

@cache
def sleep(n):
    print(time.sleep(n))
    
with timer():
        sleep(10)
        
with timer():
        sleep(10)


None
10.008
0.0


## https://docs.python.org/3/library/itertools.html

In [13]:
# itertools

from itertools import product, zip_longest

a="ABC"; b="DEF"
for i in product(a,b):
    print(i)
    
a=[1,2,3,4]; b=[5,6]
for i in zip(a,b):
    print(i)
    
for i in zip_longest(a,b,fillvalue='blank'):
    print(i)

('A', 'D')
('A', 'E')
('A', 'F')
('B', 'D')
('B', 'E')
('B', 'F')
('C', 'D')
('C', 'E')
('C', 'F')
(1, 5)
(2, 6)
(1, 5)
(2, 6)
(3, 'blank')
(4, 'blank')


## https://docs.python.org/3/library/glob.html

In [18]:
# glob - Unix-style pattern matching
# enables working with multi files in parallel

import glob

def choose_these_files(root="."):
    return glob.glob(".*")

## https://docs.python.org/3/library/pathlib.html

In [27]:
# pathlib - started with Py 3.4

import pathlib
p=pathlib.Path(".")
print(p.parent)

c=pathlib.Path.cwd() # current working directory
print(c)

.
/home/bjpcjp/1TB/tools-tutorials/python


## https://docs.python.org/3/library/sqlite3.html

In [29]:
import sqlite3

con=sqlite3.connect("data/sample.db")
cursor=con.cursor()

cursor.execute(
    """
    CREATE TABLE films
    (released text, title text, budget real)
    """
)
con.commit(); con.close()

## https://docs.python.org/3/library/hashlib.html

In [30]:
import hashlib

enc=hashlib.sha256(b"Howdy.")

print(enc.hexdigest())

c9fd9b4ddd48b4ba9aa6228db6cdb3f0ab71d913dac256c9667dd9064bcda168


## https://docs.python.org/3/library/secrets.html

In [31]:
import secrets, string

alphabet=string.ascii_letters+string.digits
password="".join(secrets.choice(alphabet) for _ in range(10))

print(password)
print(secrets.token_urlsafe())
print(secrets.token_hex())

c8oP5wRUH9
BjsgglbWAAhxM4T0pjPWWlIJdjkfd6XqcOWvNZ4JRMo
1730a6e82ad7cda8a370cd642614bf396d793773ff08ce8c8cc703e279406272


## https://docs.python.org/3/library/argparse.html

In [33]:
import argparse

parser=argparse.ArgumentParser(
    prog="simple adder",
    epilog="good luck.",
)

parser.add_argument('a',help='1st arg')
parser.add_argument('b',help='2nd arg')

args=parser.parse_args()
print(f"equation is {args.a} * {args.b}")

usage: simple adder [-h] a b
simple adder: error: the following arguments are required: b


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## https://docs.python.org/3/library/random.html

In [36]:
import random

normal_dist=random.gauss(0,1)
print(normal_dist)

print(random.choice(list(range(1000))))

unordered=list(range(10)); random.shuffle(unordered)
print(unordered)

0.8062474458362765
283
[8, 2, 7, 3, 4, 5, 0, 1, 9, 6]


## https://docs.python.org/3/library/pickle.html

In [37]:
# pickle = 80X faster than CSV I/O, smaller memory

import pickle; import seaborn as sns

df=sns.load_dataset("diamonds")

with open("data/diamonds.pkl","wb") as file:
    pickle.dump(df,file)
    
with open("data/diamonds.pkl","rb") as file:
    df1=pickle.load(file)
    
print(df1.head())

   carat      cut color clarity  depth  table  price     x     y     z
0   0.23    Ideal     E     SI2   61.5   55.0    326  3.95  3.98  2.43
1   0.21  Premium     E     SI1   59.8   61.0    326  3.89  3.84  2.31
2   0.23     Good     E     VS1   56.9   65.0    327  4.05  4.07  2.31
3   0.29  Premium     I     VS2   62.4   58.0    334  4.20  4.23  2.63
4   0.31     Good     J     SI2   63.3   58.0    335  4.34  4.35  2.75


## https://docs.python.org/3/library/shutil.html

In [38]:
# shutil (shell utilities)- advanced file ops

import shutil
print(dir(shutil))

['COPY_BUFSIZE', 'Error', 'ExecError', 'ReadError', 'RegistryError', 'SameFileError', 'SpecialFileError', '_ARCHIVE_FORMATS', '_BZ2_SUPPORTED', '_GiveupOnFastCopy', '_HAS_FCOPYFILE', '_LZMA_SUPPORTED', '_UNPACK_FORMATS', '_USE_CP_SENDFILE', '_WINDOWS', '_WIN_DEFAULT_PATHEXT', '_ZLIB_SUPPORTED', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_access_check', '_basename', '_check_unpack_options', '_copyfileobj_readinto', '_copytree', '_copyxattr', '_destinsrc', '_ensure_directory', '_fastcopy_fcopyfile', '_fastcopy_sendfile', '_find_unpack_format', '_get_gid', '_get_uid', '_is_immutable', '_islink', '_make_tarball', '_make_zipfile', '_ntuple_diskusage', '_rmtree_isdir', '_rmtree_islink', '_rmtree_safe_fd', '_rmtree_unsafe', '_samefile', '_stat', '_unpack_tarfile', '_unpack_zipfile', '_use_fd_functions', 'chown', 'collections', 'copy', 'copy2', 'copyfile', 'copyfileobj', 'copymode', 'copystat', 'copytree', 'disk_usage',

In [40]:
print(shutil.which("python3"))

/usr/bin/python3


In [41]:
shutil.copy("data/diamonds.pkl","data/diamonds-copy.pkl")

'data/diamonds-copy.pkl'

## https://docs.python.org/3/library/statistics.html

In [42]:
import statistics

array = [random.randint(1, 1000) for _ in range(1000)]
array2 = [random.randint(1, 1000) for _ in range(1000)]

print(statistics.mean(array))
print(statistics.stdev(array2))
print(statistics.quantiles(array2, n=10))

518.134
284.9063535028067
[108.0, 213.4, 316.0, 426.4, 501.5, 604.2, 701.0, 802.0, 902.7]


## https://docs.python.org/3/library/gc.html

In [43]:
# collect returns number of unreachable/cleaned objects in namespace
import gc

gc.enable(); gc.collect()

13

## https://docs.python.org/3/library/pprint.html

In [44]:
nested = [list("abcs"), 
          list("sdff"), 
          [1, 45, 4, 6, 7, 8], 
          list(range(12))]

print(nested)
from pprint import pprint
pprint(nested)

[['a', 'b', 'c', 's'], ['s', 'd', 'f', 'f'], [1, 45, 4, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]
[['a', 'b', 'c', 's'],
 ['s', 'd', 'f', 'f'],
 [1, 45, 4, 6, 7, 8],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]


## https://docs.python.org/3/library/pydoc.html

## https://docs.python.org/3/library/webbrowser.html

In [45]:
import webbrowser
webbrowser.open("bjpcjp.github.io")

True

## https://docs.python.org/3/library/logging.html

In [46]:
import logging

logging.basicConfig(
    format ="%(asctime)s - %(message)s", 
    datefmt="%d-%b-%y %H:%M:%S", 
    level  =logging.INFO
)

print(logging.info("Finished execution"))
print(logging.critical("Critical error!"))

15-Sep-22 19:53:47 - Finished execution
15-Sep-22 19:53:47 - Critical error!


None
None


## https://docs.python.org/3/library/concurrent.futures.html

In [47]:
# concurrency (multithreading)
# send 100 GETs to a URL, wait for response
# inefficient way:

import requests

URL = "https://www.google.com/"

def get(url: str):
    response = requests.get(url)
    return response.content

with timer():
    for index, _ in enumerate(range(101), 1):
        get(URL)

34.262


In [48]:
# smarter approach - use all available CPU cores

import concurrent.futures as cf

with timer():
    with cf.ThreadPoolExecutor() as tpe:
        for index,_ in enumerate(range(100),1):
            tpe.submit(get,URL)

14.062
