## Strings - accessing substrings

In [26]:
s1 = "With regard to the view that all things are for the sake of an end and nothing is in vain, the assignation of ends is in general not easy, as it is usually stated to be ... we must set certain limits to purposiveness and to the effort after the best, and not assert it to exist in all cases without qualification."

print(s1[0:4])
print(s1[19:23])
print(s1[-14:], s1[-14:-1])

With
view
qualification. qualification


## Numbers

In [5]:
a_num = 1234

# Hexadecimal
print(hex(a_num))

# Binary
print(bin(a_num))

# Octal
print(oct(a_num))

0x4d2
0b10011010010
0o2322


## Arrays, Lists, etc

In [1]:
# Limit the size of a list comprehension

from itertools import islice

evens_list = (x for x in [11,22,33,44,55,66,77,88,99,1010] if x % 2 == 0)
list(islice(evens_list, 3))

[22, 44, 66]

In [4]:
# Find an element in a list of dictionaries

items = [
    {'field1': 'foo',  'field2': 'bar',  'field3': 123},
    {'field1': 'bar',  'field2': 'foo',  'field3': 456},
    {'field1': 'baz',  'field2': 'quux', 'field3': 789},
    {'field1': 'quux', 'field2': 'baz',  'field3': 101},
]

[item for item in items if item['field2'] == 'foo'][0]

{'field1': 'bar', 'field2': 'foo', 'field3': 456}

In [19]:
# Account for no match using an iterator and a default value
next((item for item in items if item['field2'] == 'shazam'), "not found")

'not found'

## Dictionaries

In [1]:
h = {
    'one': {
        'a': 10,
        'b': 20
    },
    'two': {
        'a': 100,
        'b': 10
    },
    'three': {
        'a': 123,
        'b': 1
    },
    'four': {
        'a': 50,
        'b': 75
    },
    'five': {
        'a': 21,
        'b': 43
    },
    'six': {
        'a': 22,
        'b': 11
    },
    'seven': {
        'a': 66,
        'b': 44
    }
}

h['six']['b']

11

In [3]:
# Values
h.items()

dict_items([('one', {'a': 10, 'b': 20}), ('two', {'a': 100, 'b': 10}), ('three', {'a': 123, 'b': 1}), ('four', {'a': 50, 'b': 75}), ('five', {'a': 21, 'b': 43}), ('six', {'a': 22, 'b': 11}), ('seven', {'a': 66, 'b': 44})])

In [4]:
# Sort a dictionary by a value
{k: v for k, v in sorted(h.items(), key=lambda item: item[1]['b'])}

{'three': {'a': 123, 'b': 1},
 'two': {'a': 100, 'b': 10},
 'six': {'a': 22, 'b': 11},
 'one': {'a': 10, 'b': 20},
 'five': {'a': 21, 'b': 43},
 'seven': {'a': 66, 'b': 44},
 'four': {'a': 50, 'b': 75}}

In [6]:
# Reverse sort
sorted(h.items(), key=lambda item: item[1]['b'], reverse=True)

[('four', {'a': 50, 'b': 75}),
 ('seven', {'a': 66, 'b': 44}),
 ('five', {'a': 21, 'b': 43}),
 ('one', {'a': 10, 'b': 20}),
 ('six', {'a': 22, 'b': 11}),
 ('two', {'a': 100, 'b': 10}),
 ('three', {'a': 123, 'b': 1})]

In [3]:
# More sorting

h2 = {
    'one': {
        'a1': 456,
        'a2': 4321
    },
    'two': {
        'a1': 321,
        'a2': 1234
    },
    'three': {
        'a1': 12,
        'a2': 505
    },
    'four': {
        'a1': 10101,
        'b1': 8
    },
    'five': {
        'a1': 44,
        'b1': 555
    }
}

# Sort by sub-key a1
sorted_keys = sorted(h2.keys(), key=lambda k: h2[k]['a1'])
[h2[k]['a1'] for k in sorted_keys]

[12, 44, 321, 456, 10101]

In [4]:
# Sort by the reverse of each key
sorted_keys = sorted(h2.keys(), key=lambda k: k[::-1])
sorted_keys

['three', 'one', 'five', 'two', 'four']

## Regular Expressions (Regex)

In [3]:
import re

In [6]:
# regex replace
s = '123 Anywhere Lane, Springfield, USA. 456 Snowy Road, Pueblo, CO.'
re.sub(r'(Lane|Road)', r'<\1>', s)

'123 Anywhere <Lane>, Springfield, USA. 456 Snowy <Road>, Pueblo, CO.'

In [7]:
# Using lambda for operating on match groups - case transform example
s = 'This Sentence Has 2 many Title-case words!'
re.sub(r'\b([A-Z]\w+)\b', lambda m: m.group(1).lower(), s)

'this sentence has 2 many title-case words!'

## Paths

In [2]:
from pathlib import Path

p = Path("/path/to/some/filename.txt")

# Path basename
p.name

'filename.txt'

In [3]:
# Home
Path.home()

PosixPath('/Users/andy')

In [4]:
# Join paths portably with the / operator
new_path = Path('./output') / ('some_file' + '_out.txt')
new_path

PosixPath('output/some_file_out.txt')

## Parsing XML

Using [xml.etree.ElementTree](https://docs.python.org/3/library/xml.etree.elementtree.html)

In [12]:

import xml.etree.ElementTree as ET

xml_str = """<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>
"""

# From a string:
docroot = ET.fromstring(xml_str)

# From a file:
#tree = ET.parse('some_file.xml')
#docroot = tree.getroot()

for el in docroot:
    # tag names, attributes, child elements
    print(el.tag, el.attrib, el.find("year").text)

country {'name': 'Liechtenstein'} 2008
country {'name': 'Singapore'} 2011
country {'name': 'Panama'} 2011


## Re-import a module in the repl

In [4]:
# Use re as an example
import importlib
importlib.reload(re)

<module 're' from '/opt/anaconda3/lib/python3.8/re.py'>

## Reference a global and scope capture

In [2]:
# local capture

def some_outer_fn():
    a = 123
    def some_inner_fn():
        nonlocal a
        a *= 2
        print(a)
    print(a)
    some_inner_fn()
    print(a)

some_outer_fn()

123
246
246


In [3]:
# globals

some_var = 'foo'

def a_func():
    global some_var
    some_var += ' bar'

print(some_var)
a_func()
print(some_var)

foo
foo bar


## The __dict__ and dot notation

In [1]:
def some_func(x):
    a = 2
    b = 5
    c = a + b + x
    return c

some_func(12)

19

In [2]:
some_func.__dict__

{}

In [3]:
some_func.__dict__.keys()

dict_keys([])

In [4]:
some_func.__dict__['bar'] = 'baz'

In [5]:
some_func.__dict__

{'bar': 'baz'}

In [6]:
some_func.bar

'baz'

## Scope rules - LEGB

### https://realpython.com/python-scope-legb-rule/

LEGB - Local (Function), Enclosing, Global, Built-in

In [2]:
# Local
# This is really function or lambda scope.
def func_with_local():
    a_var = 123

# Not defined
a_var

NameError: name 'a_var' is not defined

In [3]:
# Enclosing
# Only for nested functions
def func_with_local_func():
    a_var_1 = 101
    def local_func():
        a_var_2 = a_var_1 + 10
        return a_var_2
    return local_func()

func_with_local_func()

111

In [7]:
# Vars
func_with_local_func.__code__.co_varnames

('local_func',)

In [9]:
# Constants
func_with_local_func.__code__.co_consts

(None,
 101,
 <code object local_func at 0x7fe638a60450, file "<ipython-input-3-3fce2557458a>", line 5>,
 'func_with_local_func.<locals>.local_func')

In [5]:
# Global
# Obvious
a_global_var = "foo"
a_global_var

'foo'

In [10]:
# Built-in
dir()

['In',
 'Out',
 '_',
 '_3',
 '_5',
 '_6',
 '_7',
 '_9',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i10',
 '_i2',
 '_i3',
 '_i4',
 '_i5',
 '_i6',
 '_i7',
 '_i8',
 '_i9',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 'a_global_var',
 'exit',
 'func_with_local',
 'func_with_local_func',
 'get_ipython',
 'quit']

In [11]:
dir(__builtins__)

['ArithmeticError',
 'AssertionError',
 'AttributeError',
 'BaseException',
 'BlockingIOError',
 'BrokenPipeError',
 'BufferError',
 'ChildProcessError',
 'ConnectionAbortedError',
 'ConnectionError',
 'ConnectionRefusedError',
 'ConnectionResetError',
 'EOFError',
 'Ellipsis',
 'EnvironmentError',
 'Exception',
 'False',
 'FileExistsError',
 'FileNotFoundError',
 'FloatingPointError',
 'GeneratorExit',
 'IOError',
 'ImportError',
 'IndentationError',
 'IndexError',
 'InterruptedError',
 'IsADirectoryError',
 'KeyError',
 'KeyboardInterrupt',
 'LookupError',
 'MemoryError',
 'ModuleNotFoundError',
 'NameError',
 'None',
 'NotADirectoryError',
 'NotImplemented',
 'NotImplementedError',
 'OSError',
 'OverflowError',
 'PermissionError',
 'ProcessLookupError',
 'RecursionError',
 'ReferenceError',
 'RuntimeError',
 'StopAsyncIteration',
 'StopIteration',
 'SyntaxError',
 'SystemError',
 'SystemExit',
 'TabError',
 'TimeoutError',
 'True',
 'TypeError',
 'UnboundLocalError',
 'UnicodeDecode

## Formatting/printing

### f-strings

### Basic

In [5]:
h1 = {
    'one': 1,
    'this is a key': 123.4567,
    'twenty': 20,
    'pi': 3.141592653,
}

for k, v in h1.items():
    print(f'{k} = {v}')

one = 1
this is a key = 123.4567
twenty = 20
pi = 3.141592653


### More formatting

In [33]:
# Pad quoted key strings and round value floats
for k, v in h1.items():
    print(f'{k!r:<20} = {v:.2f}')

'one'                = 1.00
'this is a key'      = 123.46
'twenty'             = 20.00
'pi'                 = 3.14


In [34]:
# Right align quoted key strings
for k, v in h1.items():
    print(f'{k!r:>20} = {v:.1f}')

               'one' = 1.0
     'this is a key' = 123.5
            'twenty' = 20.0
                'pi' = 3.1


In [36]:
# Use a variable for decimal places
decs = 3
for k, v in h1.items():
    print(f'{k:<15} = {v:.{decs}f}')

one             = 1.000
this is a key   = 123.457
twenty          = 20.000
pi              = 3.142


## chr/ord/etc

In [3]:
print([chr(x) for x in range(ord('0'), ord('z')) if chr(x).isalnum()])

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y']


In [4]:
chars = [chr(x) for x in range(ord('0'), ord('z')) if chr(x).isalnum()]

In [8]:
import random
chars[int(random.random() * len(chars))]

'y'

In [16]:
''.join([chars[int(random.random() * len(chars))] for x in range(10)])

'ZIYA9dMmfb'

## Dates, times, arrow, etc

In [1]:
# https://arrow.readthedocs.io/en/latest/
!pip install -U arrow

Collecting arrow
  Downloading arrow-1.2.2-py3-none-any.whl (64 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.0/64.0 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Installing collected packages: arrow
  Attempting uninstall: arrow
    Found existing installation: arrow 1.1.1
    Uninstalling arrow-1.1.1:
      Successfully uninstalled arrow-1.1.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cookiecutter 1.7.2 requires MarkupSafe<2.0.0, but you have markupsafe 2.0.1 which is incompatible.[0m[31m
[0mSuccessfully installed arrow-1.2.2


In [2]:
import arrow

In [3]:
now_utc = arrow.utcnow()
now_utc

<Arrow [2022-08-17T18:07:28.550213+00:00]>

In [10]:
now_tz = now_utc.to('US/Pacific')
print(now_tz.timestamp())
print(now_tz.format())
print(now_tz.humanize())

1660759648.550213
2022-08-17 11:07:28-07:00
a minute ago


In [14]:
print(arrow.now())

2022-08-17T11:10:23.146027-07:00


In [15]:
# From an int timestamp
arrow.get(1660759648)

<Arrow [2022-08-17T18:07:28+00:00]>

In [16]:
arrow.get('2013-05-05 12:30:45', 'YYYY-MM-DD HH:mm:ss')

<Arrow [2013-05-05T12:30:45+00:00]>

## ChainMap

### For combining dictionaries

In [1]:
from collections import ChainMap

In [3]:
d1 = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
d2 = {'c': 40, 'd': 50, 'e': 60}
cmdict = ChainMap(d1, d2)
cmdict

ChainMap({'a': 10, 'b': 20, 'c': 30, 'd': 40}, {'c': 40, 'd': 50, 'e': 60})

In [4]:
cmdict['a']

10

In [5]:
cmdict['e']

60

In [6]:
cmdict['c']

30

In [7]:
cmdict['c'] == d1['c']

True

In [8]:
cmdict['c'] == d2['c']

False

## Counter

### For counting frequencies of occurrence in lists, etc

In [11]:
from collections import Counter

In [21]:
a = [3,45,6,4,3,2,34,5,6,7,8,7,6,5,4,3,2,24,5,6,7,8,89,9,1]
c = Counter(a)
c

Counter({3: 3,
         45: 1,
         6: 4,
         4: 2,
         2: 2,
         34: 1,
         5: 3,
         7: 3,
         8: 2,
         24: 1,
         89: 1,
         9: 1,
         1: 1})

In [27]:
# Access the frequency associated with a value
print(f"7 occurs {c[7]} time(s)")
print(f"24 occurs {c[24]} time(s)")
print(f"1234 occurs {c[1234]} time(s)")

7 occurs 3 time(s)
24 occurs 1 time(s)
1234 occurs 0 time(s)


In [14]:
# Keys are numbers encountered. Values are frequencies of occurrence.
print(c.keys())
print(c.values())

dict_keys([3, 45, 6, 4, 2, 34, 5, 7, 8, 24, 89, 9, 1])
dict_values([3, 1, 4, 2, 2, 1, 3, 3, 2, 1, 1, 1, 1])


In [15]:
# Max value (not frequency of occurrence)
max(c)

89

In [17]:
max(c) == max(a)

True

In [19]:
# Most common element(s) with count(s)
c.most_common(1)

[(6, 4)]

In [20]:
c.most_common(3)

[(6, 4), (3, 3), (5, 3)]

In [29]:
# In python 3.10+
# c.total()
# In python 3.9
sum(c.values())

25

In [34]:
# An iterator over the elements, repeating each as many times as it occurs
# in the source list. Elements are listed in order of first occurrence.
list(c.elements())

[3, 3, 3, 45, 6, 6, 6, 6, 4, 4, 2, 2, 34, 5, 5, 5, 7, 7, 7, 8, 8, 24, 89, 9, 1]