# Data Structures

## Tuples

In [None]:
# Tuples are heterogeneous, immutable lists.
(1, 2, 3)

#  Immutable -> hashable -> valid dict keys
{('john', 'doe'): 32, ("jane", "smith"): 24}

### Named Tuples

...<br>
Named Tuples are a great way to implement generic records, where a datum's position in the record carries semantic meaning (e.g. a table of data, each column is an attribute)

The `collections.namedtuple` function is a factory that produces subclasses of tuple
enhanced with field names and a class name (helpful with debugging)

Instances of a class that you build with namedtuple take exactly the
same amount of memory as tuples because the field names are
stored in the class.

In [None]:
from collections import namedtuple

# Name, fields (as an iterable of strings or as a single space-delimited string)
City = namedtuple('City', 'name country population coordinates')
# City = namedtuple('City', ('name', 'country', 'population', 'coordinates'))

# constructor takes position arguments
boise = City('Boise', 'USA', '1.21', (35.689722, 139.691667))

# multiple ways to access fields.  Note that boise["population"] is not allowed
boise.population == boise[2]

# see the fields associated with the City Class
City._fields

# create a dict from the named tuple instance
boise._asdict()

## Strings

In [None]:
# Split at space
"Hello John".split()
"Hello John, my name is joe".split(", ")

#concatenate with a separator
",".join(["Today", " unlike most days", " is a great day!"])


### String object methods

...<br>
|Method|Description|
|-|-|
|count|Return the number of non-overlapping occurrences of substring in the string.|
|endswith|Returns True if string ends with suffix.|
|startswith|Returns True if string starts with prefix.|
|join|Use string as delimiter for concatenating a sequence of other strings.|
|index|Return position of first character in substring if found in the string; raises ValueError|
|find|Return position of first character of first occurrence of substring in the string; like index not found.|
|rfind|Return position of first character of last occurrence of substring in the string; returns –1 if not found.|
|replace|Replace occurrences of string with another string.|
|strip, rstrip, lstrip|Trim whitespace, including newlines; equivalent to x.strip() (and rstrip, lstrip , respectively) for each element.|
|split|Break string into list of substrings using passed delimiter.|
|lower|Convert alphabet characters to lowercase.|
|upper|Convert alphabet characters to uppercase.|
|casefold|Convert characters to lowercase, and convert any region-specific variable character combinations to a common comparable form.|

### Regex

In [None]:
import re

txt = 'bob    is not\t that weird'

#compile and use a regex
re.split('\s+',txt)

#compile a reusable regex
white_space = re.compile('\s+')
re.split(white_space,txt)


text = """Dave dave@google.com
Steve steve@gmail.com
Rob rob@gmail.com
Ryan ryan@yahoo.com """


email_rgxs = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}' 

# re.IGNORECASE makes the regex case-insensitive
email_rgx = re.compile(email_rgxs, flags=re.IGNORECASE)

#get a list of all patterns matching the regex
email_rgx.findall(text)

#use search to get the position range of the first occurrence
email_rgx.search(text)

#does the entire string match the pattern
email_rgx.match(text)

#replace occurences of the regex with a string
email_rgx.sub('NA', text)

### Formatting

#### f-Strings
An f-String is special syntax, f"", for formatting strings that reference variables.

In [None]:
cap_first = lambda x: x[0].upper() + x[1:]
name = "eric"
age = 74
net_worth = 125.2132314223
s = f"Hello, {cap_first(name)}. You are {age} years old, well {(age*365)+12} days old, to be precise. That is {0.96374:.0%} of your expected lifetime.  "
s += f"You are worth ${net_worth:,.2f}; you might consider a new career!"

print(s)

#### Floats

In [None]:
# value:{total_width}.{precision}f  # # of characters (incl space) = width, # decimals = precision
v = 4321.123
print(f"{v:3.2f}")
print(f"{v:.2f}")  # don't bother total width
print(f"{v:,.4f}")  # use commas to separate thousands
print(f"For fixed width strings, {v:20,}")  # no decimals, fixed width, commas

## Sequence Operations and Manipulations

### Slicing

In [None]:
x = [1,2,3,4,5]

x[-1] # 5 (last element)
x[:2] # [1,2]  (start until index 2 (exclusive))
x[2:] # [3,4,5] (start at index 2 (inclusive) until end)
x[1:4] # [2,3,4]  (start at 1 (incl), end at 4 (excl))
x[::-1]  # [5,4,3,2,1] (reverse order)

### Iterable Unpacking

In [None]:
# **kw and position argument expansion
def samp_fnc(a,b,c=3,d='hello'):
    print('a: ', a, ' b: ', b , ' c: ', c, ' d: ', d)

pos_args = ['first', 'second']
kw_args = {'c':'balanced','d':100}

samp_fnc(*pos_args,**kw_args)
samp_fnc(*['a','b','c','d'])
samp_fnc(**{a : a for a in ['a','b','c','d']})

In [None]:
# Grabbing excessive items
a,b, *rest = range(10)

# * prefix can be applied to one var but in any position
a, *mid, b = range(10)

# nested unpacking
for nm, (height,weight) in [("bob", (65,150)), ("mary", (65,130))]:
    print(nm,height, weight)

### Sorting and sorted searches

In [19]:
data = [i for i in range(20)[::-2]]

# sort the values in ascending order, can provide a key_fn to sort by
sdata = sorted(data) #, key=lambda x: x,reverse=False)

from bisect import bisect, insort

# use bisect to identify the insertion point for an item in a sorted (ascending order) sequence (similiar to np.searchsorted)

bisect(sdata, 4)  # 2
insort(sdata,4)  # insert item into location (inplace) to maintain sorting 
sdata


[1, 3, 4, 5, 7, 9, 11, 13, 15, 17, 19]

## Comprehensions

In [None]:
nums = [0, 1, 2, 3, 4]

#lists
squares = [x ** 2 for x in nums if x % 2]     #[0, 4, 16]

# note that the FOR orders mirrors the order of nested FOR statements
# for x in range(2):
#     for y in range(x+5,x+7)
perms = [(x,y) for x in range(2) for y in range(x+5,x+7)] # [(0, 5), (0, 6), (1, 6), (1, 7)]

#set
{s for s in nums if s % 2}  #{0,2,4}

#dictionaries
even_num_to_square = {x: x ** 2 for x in nums if x % 2}  # {0: 0, 2: 4, 4: 16}

# generators
next((x for x in [0, 0, 0, 1.5, 0, 0, 1] if x))

## Dicts

In [25]:
a = {"a": 1, "b": 2}  # literal syntax
b = dict(zip(("a", "b"), (1,2)))  # keys and values
c = dict([("a",1),("b",2)])  # a seq of pairs
assert a == b == c

# dict comprehension
{x: x ** 2 for x in nums if x % 2}  # {0: 0, 2: 4, 4: 16}

# Dates

In [None]:
# https://docs.python.org/3/library/datetime.html

from datetime import date
from datetime import datetime
from datetime import timedelta

#Create dates , datetimes are very similiar but use datetime instead of date
date.fromisoformat("2019-01-01")
date.today()
date(2019,1,1)


#adding time intervals to dates
# class datetime.timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)¶
datetime.now() + timedelta(days=1,minutes=5,seconds=20)
date.today() + timedelta(days=3,weeks=2) 

## Common methods

...<br>
| Method | Description |
|--|--|
| year, month, ... | get the [year,month, ...] of the date or datetime |
| weekday | Return the day of the week as an integer, where Monday is 0 |
| isoformat | Return a string representing the date in ISO 8601 format |
| fromisoformat(d) | Parse the isoformatted date string as a date (or datetime) |

# OOP

In [None]:
class Chicken:
    
    species = 'Bird'  #Static/Class variable

    #methods that start with __ have private scope
    # __init__ is the constructor method
    def __init__(self, weight):
        self.num_feet = 2  #public attribute
        self._weight = weight # attributes that begin with _ have private scope
        
    def digest(self)
    
    def feed(self, food_weight):
        self._weight += food_weight / 10.0
        

# Decorators

Decorators are simply function wrappers; they take a function as an argument and then create a new function that adds some additional functionality around it.  The new function is returned.  Below is a simple example (from realpython.com, https://realpython.com/primer-on-python-decorators)

## Simple example

In [None]:
#Define your additional behavior.  In this case, we print something before and after the function call
def my_decorator(func):
    def wrapper():
        print("Something is happening before the function is called.")
        func()
        print("Something is happening after the function is called.")
    return wrapper


### Classic way to wrap a function

In [None]:
def say_whee():
    print("Whee!")

# Classic way to get decorator behavior from a function .. define function and wrap it manually
say_whee = my_decorator(say_whee)

say_whee()

### Syntactic sugar

In [None]:
@my_decorator
def say_whee():
    print("Whee!")
    
say_whee()

## Decorators with arguments

In [None]:
#use the *args and **kwargs values to allow reusable decorators (support varying # of method parameters)
def do_twice(func):
    def wrapper_do_twice(*args, **kwargs):
        func(*args, **kwargs)
        func(*args, **kwargs)
    return wrapper_do_twice

@do_twice
def say_whee():
    print('Whee!')
    
@do_twice
def say_whee_named(name):
    print(name, 'said Whee!')
    

say_whee()
say_whee_named('Bob')

# Miscellaneous

## Special Methods

...<br>
| Category | Methods |
| --- | --- |
| String/bytes representation | `__repr__, __str__, __format__, __bytes__` |
| Conversion to number | `__abs__, __bool__, __complex__, __int__, __float__, __hash__, __index__` |
| Emulating collections | `__len__, __getitem__, __setitem__, __delitem__, __contains__` |
| Iteration | `__iter__, __reversed__, __next__` |
| Emulating callables | `__call__` |
| Context management | `__enter__, __exit__` |
| Instance creation and destruction | `__new__, __init__, __del__` |
| Attribute management | `__getattr__, __getattribute__, __setattr__, __delattr__, __dir__` |
| Attribute descriptors | `__get__, __set__, __delete__` |
| Class services | `__prepare__, __instancecheck__, __subclasscheck__` |

## Syntactic Sugar

### Merge dictionaries

In [None]:
defaults = {'winner' : 'thomas', 'loser' : 'other', 'contest' : 'Singing'}
user_args = {'loser' : 'tim t', 'contest' : 'Dancing'}

{**defaults, **user_args}

## Serialization

### Pickle

#### Without a file

In [None]:
import pickle
import io

obj = "Hello world"
s = io.BytesIO()
s_idx = s.tell() #determine the starting index for our stream
x = pickle.dump(obj,s,pickle.HIGHEST_PROTOCOL)
s.seek(s_idx) #jump to the start of the stream
pickle.load(s)

#### With a file

In [None]:
import pickle

file_name = 'temp_file.pkl'
obj = {'Score' : 12}
pickle.dump(obj,open(file_name,'wb'))  #open file in write binary mode

pickle.load(open(file_name,'rb'))  # load from an open file

## Command Line arguments
https://levelup.gitconnected.com/the-easy-guide-to-python-command-line-arguments-96b4607baea1

In [None]:
import argparse

parser = argparse.ArgumentParser(description='An example program of argparse!')
parser.add_argument("--a", default=1, type=int, help="This is the 'a' variable")
parser.add_argument("--education", 
                    choices=["highschool", "college", "university", "other"],
                    required=True, type=str, help="Your name")

args = parser.parse_args()

ed = args.education

## Profiling

### Decompose to times in function call

In [None]:
from pyinstrument import Profiler

profiler = Profiler()
with profiler:
    sum(range(100000))

with open("profile.html", 'w') as f:
    f.write(profiler.output_html())

### Time the execution

In [None]:
from time import perf_counter
 
t0 = perf_counter()
sum(range(100000))
t1 = perf_counter()
 
print("Elapsed time:", t1 - t0)

### iPython magic

In [None]:
%%timeit
# Time cell execution
sum(range(100000))
sum(range(100000))

# Or time a single line with
# %timeit sum(range(100000))

# References

1. Luciano Ramalho, (2015). `Fluent Python`. O'Reilly Media, Inc. ISBN: 978-1-491-94600-8