Any sequence or iterable can be unpack into variable using a simple assignment operation

In [None]:
p = (4,5)
x,y = p

In [None]:
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]

#can extract whole tuple or specific elements
name, shares, price, date = data
#or
name, shares, price, (year, mon, day) = data


In python you might want to discard data, can use a throwaway variable like "_" to assign to all data you don't care about

In [None]:
_, shares, price, _ = data

If you don't know how many elements you want to unpack you can yuse the * operator to be flexible

In [None]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

In [None]:
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record
print(phone_numbers)

It is worth noting that the star syntax can be especially useful when iterating over a
sequence of tuples of varying length. For example, perhaps a sequence of tagged tuples:

In [None]:
records = [
    ('foo', 1, 2),
    ('bar', 'hello'),
    ('foo', 3, 4),
]

def do_foo(x, y):
    print('foo', x, y)
    
def do_bar(s):
    print('bar', s)
    for tag, *args in records:
        if tag == 'foo':
            do_foo(*args)
        elif tag == 'bar':
            do_bar(*args)

Star unpacking can also be useful when combined with certain kinds of string processing
operations, such as splitting. For example:

In [None]:
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':')

throwing away variable number of arguments:

In [None]:
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record

Example of inline if else statement

syntax is as follows <i> expression_if_true if condition else expression_if_false </i>

In [None]:
def sumTest(items):
    head, *tail = items
    return head + sum(tail) if tail else head

deque is a usefull data structure for keeping track of the last N items in a searching function

In [None]:
from collections import deque

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
    previous_lines.append(line)

# Example use on a file
if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5):
            for pline in prevlines:
                print(pline, end='')
            print(line, end='')
            print('-'*20)

In [None]:
print('hello',end='')
print('world', end='')
print('-'*20)

The syntax ('expression' for 'var' in 'iterable' [if 'condition']) specifies the general form for a generator comprehension. This produces a generator, whose instructions for generating its members are provided within the parenthetical statement.

In [None]:
#(<expression> for <var> in <iterable> if <condition>)
#
# is equivalent to:
#
#for <var> in <iterable>:
#    if bool(<condition>):
#        yield <expression>

When debugging in console it's helpful to use the built in generator function <code>next(gen_name)</code> to get the next iterated value reported by the generator

In [None]:
print(11.1//4.2)

# Testing generators

In [2]:
file_name = "techcrunch.csv"
lines = (line for line in open(file_name))
list_line = (s.rstrip().split(",") for s in lines)
cols = next(list_line)
company_dicts = (dict(zip(cols, data)) for data in list_line)
funding = (
    int(company_dict["raisedAmt"])
    for company_dict in company_dicts
    if company_dict["round"] == "a"
)
total_series_a = sum(funding)
print(f"Total series A fundraising: ${total_series_a}")

Total series A fundraising: $18500000


In [12]:
file_name = "techcrunch.csv"
lines = (line for line in open(file_name))
list_line = (s.rstrip().split(",") for s in lines)
cols = next(list_line)
company_dicts = (dict(zip(cols, data)) for data in list_line)

for cd in company_dicts:
    print(cd.items())


dict_items([('permalink', 'digg'), ('company', 'Digg'), ('numEmps', '60'), ('category', 'web'), ('city', 'San Francisco'), ('state', 'CA'), ('fundedDate', '1-Dec-06'), ('raisedAmt', '8500000'), ('raisedCurrency', 'USD'), ('round', 'b')])
dict_items([('permalink', 'digg'), ('company', 'Digg'), ('numEmps', '60'), ('category', 'web'), ('city', 'San Francisco'), ('state', 'CA'), ('fundedDate', '1-Oct-05'), ('raisedAmt', '2800000'), ('raisedCurrency', 'USD'), ('round', 'a')])
dict_items([('permalink', 'facebook'), ('company', 'Facebook'), ('numEmps', '450'), ('category', 'web'), ('city', 'Palo Alto'), ('state', 'CA'), ('fundedDate', '1-Sep-04'), ('raisedAmt', '500000'), ('raisedCurrency', 'USD'), ('round', 'angel')])
dict_items([('permalink', 'facebook'), ('company', 'Facebook'), ('numEmps', '450'), ('category', 'web'), ('city', 'Palo Alto'), ('state', 'CA'), ('fundedDate', '1-May-05'), ('raisedAmt', '12700000'), ('raisedCurrency', 'USD'), ('round', 'a')])
dict_items([('permalink', 'photobu

In [10]:
nums = [1,23,3]
print(type(nums))
test = list(nums)
print(type(test))
print(nums)
print(test)

<class 'list'>
<class 'list'>
[1, 23, 3]
[1, 23, 3]


<strong> Priority Queue Implementation </strong>

In [13]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

Using defaultdict can help clean up the process of creating a multivalued dictionary of keys mapped to lists. It helps make it so you don't have to initialize keys yourself:

In [None]:
#not so clean way
d = {}
for key, value in pairs:
    if key not in d:
        d[key] = []
d[key].append(value)

# much cleaner way with default dict
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

Ordered dictionaries preserve the order that values are inserted. this is particularly useful when you want to serialize data into a JSON and preserve the order of the field for processing on the receiving end.

NOTE:memory size of the ordered dict is almost twice a normal dict, keeps track of ordering in a doubly linked list

<strong> Remove duplicates and maintain order in a sequence </strong>

In [14]:
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)