# Notebook for the documentation of diferent Python 3 recipes

## 1.0 Data structures and algorithms
#### The first one is a love of mine, variable unpacking
### 1.1 Variable unpacking and 1.2 unpacking iterables of arbitrary size

In [85]:
p = (4, 5)
a, b = p
print("p = ",p,"\na = ",a,", b = ",b)

p =  (4, 5) 
a =  4 , b =  5


In [86]:
list_of_items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
head, *tail = list_of_items
print("list =>",list_of_items)
print("\thead => ", head,"\n\ttail => ",tail)

list => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
	head =>  1 
	tail =>  [2, 3, 4, 5, 6, 7, 8, 9, 10]


In [87]:
list_of_items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
begin, *middle, end = list_of_items
print("list =>", list_of_items)
print("\tbegin => ", begin,"\n\tmiddle =>", middle, "\n\tend =>",end)

list => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
	begin =>  1 
	middle => [2, 3, 4, 5, 6, 7, 8, 9] 
	end => 10


### 1.3 Keep last N items

In [88]:
from collections import deque
# A list-like sequence optimized for data accesses near its endpoints.

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
        previous_lines.append(line)
with open("text.txt") as f:
    for line, prev_lines in search(f, 'python', 5):
        for pline in prev_lines:
            print(pline)
        print(line, end='')
        print('-'*20)

lalalala

this is a text file

this is the third line

this is the line of index 3

hahahahha

can you find the word python in it?
--------------------


### 1.4 Find N largest numbers and N smallest numbers faster in a list using heap

In [89]:
import heapq
import random
# Heap queue algorithm (a.k.a. priority queue).

list_of_items = [random.randint(-99,99) for i in range(10)]
print("list => ",list_of_items)
print("n => 2")
print("n largest => ", heapq.nlargest(2, list_of_items))
print("n smallest => ", heapq.nsmallest(2, list_of_items))

list =>  [33, 23, -6, -57, -17, 13, -57, -31, 86, 59]
n => 2
n largest =>  [86, 59]
n smallest =>  [-57, -57]


### 1.5 Implementing a priority queue

In [90]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    def pop(self):
        return heapq.heappop(self._queue)[-1]

class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)
    
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('fooSamePriority'), 1)
q.push(Item('test'), 2)
q.push(Item('lala'), 3)

print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())


Item('lala')
Item('test')
Item('foo')
Item('fooSamePriority')


### 1.6 Mapping multiple values to the same key in a dict

In [91]:
from collections import defaultdict
# The default factory is called without arguments to produce
# a new value when a key is not present, in __getitem__ only.
# A defaultdict compares equal to a dict with the same items.
# All remaining arguments are treated the same as if they were
# passed to the dict constructor, including keyword arguments.
dictionaryOfLists = defaultdict(list)
dictionaryOfSets = defaultdict(set)

dictionaryOfLists['l1'].append(1)
dictionaryOfLists['l1'].append(1)
dictionaryOfLists['l1'].append(2)

dictionaryOfSets['l1'].add(1)
dictionaryOfSets['l1'].add(1)
dictionaryOfSets['l1'].add(2)

print("Dictionary of lists:\n", dictionaryOfLists)

print("\nDictionary of Sets:\n", dictionaryOfSets)

# This way is more readable than:
# d = {}
# for key, value in pairs:
#     if key not in d:
#         d[key] = []
#     d[key].append(value)

Dictionary of lists:
 defaultdict(<class 'list'>, {'l1': [1, 1, 2]})

Dictionary of Sets:
 defaultdict(<class 'set'>, {'l1': {1, 2}})


### 1.7 Keeping the order in dictionaries 

In [92]:
from collections import OrderedDict
# Dictionary that remembers insertion order

ord_d = OrderedDict()
ord_d['key1'] = 1
ord_d['key2'] = 2
ord_d['key3'] = 3
ord_d['key2'] = 43
ord_d['key2'] = 2


print("Ordered dict: ", ord_d)

d = dict()
d['key1'] = 1
d['key2'] = 2
d['key3'] = 3
d['key2'] = 43
d['key2'] = 2
print("Normal dict: ", d)

Ordered dict:  OrderedDict([('key1', 1), ('key2', 2), ('key3', 3)])
Normal dict:  {'key1': 1, 'key2': 2, 'key3': 3}


### 1.8 Making calculations with dictionaries

In [93]:
prices = {'PETR': 300, 'ABS': 200, 'LOL': 100}
min_price = min(zip(prices.values(), prices.keys()))
max_price = max(zip(prices.values(), prices.keys()))
print("Min price:", min_price)
print("Max price:", max_price)

Min price: (100, 'LOL')
Max price: (300, 'PETR')


### 1.9 Find two items in common in two dicts

In [94]:
d1 = {'x':1, 'y': 2, 'z': 3}
d2 = {'x':1, 'y': 42, 'w': 3}

print(d1.keys() & d2.keys())
print(d1.keys() - d2.keys())
print(d1.items() & d2.items())

{'y', 'x'}
{'z'}
{('x', 1)}


### 1.10 Removing duplicate items from a sequence, preserving the order of the data

In [95]:
 def dedupe(items):
        seen = set()
        for item in items:
            if item not in seen:
                yield item
                seen.add(item)
                
list_of_items = [random.randint(0,5) for i in range(10)]
print(list_of_items)
print(list(dedupe(list_of_items)))

[2, 2, 4, 4, 4, 0, 4, 4, 2, 3]
[2, 4, 0, 3]


### 1.11 Giving names to slices

In [96]:
string = "Im reading this Python Book, and boy... It is quite good"
first_part = slice(0,27)
print(string[first_part])
steped_string = slice(0,-1,2)
print(string[steped_string])

Im reading this Python Book
I edn hsPto ok n o..I sqiego


### 1.12 Determine items that frequently appear on a sequence

In [97]:
list_of_items = [random.randint(0,5) for i in range(20)]
print(list_of_items)

from collections import Counter
# Dict subclass for counting hashable items.  Sometimes called a bag
# or multiset.  Elements are stored as dictionary keys and their counts
# are stored as dictionary values.

number_counts = Counter(list_of_items)
print(number_counts)
top_two = number_counts.most_common(2)
print("Top two:", top_two)

[5, 4, 3, 5, 4, 0, 4, 4, 3, 4, 1, 4, 5, 3, 1, 2, 0, 2, 5, 4]
Counter({4: 7, 5: 4, 3: 3, 0: 2, 1: 2, 2: 2})
Top two: [(4, 7), (5, 4)]


### 1.13 Ordering a list of dictionaries by a common key

In [98]:
rows = [
    {"first_name":"Gabriel", "last_name":"Fuhr", "age":20},
    {"first_name":"Roberto", "last_name":"Carlos", "age":80},
    {"first_name":"Greg", "last_name":"Doucette", "age":40}
]

from operator import itemgetter

rows_by_first_name = sorted(rows, key=itemgetter("first_name"))
rows_by_age = sorted(rows, key=itemgetter("age"))
print("Rows by the first name:", rows_by_first_name)
print("\nRows by the age:", rows_by_age)

Rows by the first name: [{'first_name': 'Gabriel', 'last_name': 'Fuhr', 'age': 20}, {'first_name': 'Greg', 'last_name': 'Doucette', 'age': 40}, {'first_name': 'Roberto', 'last_name': 'Carlos', 'age': 80}]

Rows by the age: [{'first_name': 'Gabriel', 'last_name': 'Fuhr', 'age': 20}, {'first_name': 'Greg', 'last_name': 'Doucette', 'age': 40}, {'first_name': 'Roberto', 'last_name': 'Carlos', 'age': 80}]


### 1.14 Ordering Objects by attr without native support for comparation 

In [99]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return 'User({})'.format(self.user_id)
    
users = [User(29), User(2), User(13)]

print("Original users: ", users)

from operator import attrgetter
print("Sorted users", sorted(users, key=attrgetter('user_id')))

Original users:  [User(29), User(2), User(13)]
Sorted users [User(2), User(13), User(29)]


### 1.15 Group registers according to a specific field 

In [100]:
rows = [
    {"first_name":"Gabriel", "last_name":"Fuhr", "birthDate":"08/08/1000",},
    {"first_name":"Roberto", "last_name":"Carlos", "birthDate":"08/08/1400"},
    {"first_name":"Greg", "last_name":"Doucette", "birthDate":"08/08/1000"}
]

from operator import itemgetter
from itertools import groupby

# First we need to order by the desired field
rows.sort(key=itemgetter('birthDate'))

# Iterate by groups
for date, items in groupby(rows, key=itemgetter('birthDate')):
    print(date)
    for i in items:
        print("\t", i)

08/08/1000
	 {'first_name': 'Gabriel', 'last_name': 'Fuhr', 'birthDate': '08/08/1000'}
	 {'first_name': 'Greg', 'last_name': 'Doucette', 'birthDate': '08/08/1000'}
08/08/1400
	 {'first_name': 'Roberto', 'last_name': 'Carlos', 'birthDate': '08/08/1400'}


### 1.16 Filtering elements of a sequence 

In [101]:
list_of_items = [random.randint(-99,99) for i in range(10)]
print([x for x in list_of_items if x > 0])
print([x for x in list_of_items if x < 0])
print([x for x in list_of_items if x > 0 and x%2 == 0])

generator = (x for x in list_of_items if x > 0)
print(generator)
for x in generator:
    print(x)
print(list(generator))
generator = (x for x in list_of_items if x > 0)
print(list(generator))

### Using filter
values = ['1', '2', '3', '-', 'asdas', 'N/A']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals = list(filter(is_int, values))
print(values)
print(ivals)

[68, 52, 87, 50]
[-56, -76, -36, -27, -46, -80]
[68, 52, 50]
<generator object <genexpr> at 0x7efbe17f8950>
68
52
87
50
[]
[68, 52, 87, 50]
['1', '2', '3', '-', 'asdas', 'N/A']
['1', '2', '3']


### 1.17 Extracting a sub dictionary from a dict

In [102]:
prices = {'PETR': 300, 'ABS': 200, 'LOL': 100}
p1 = {key: value for key, value in prices.items() if value > 199}
print(prices)
print(p1)

{'PETR': 300, 'ABS': 200, 'LOL': 100}
{'PETR': 300, 'ABS': 200}


### 1.18 Mapping names to items of a sequence

In [1]:
from collections import namedtuple
# Returns a new subclass of tuple with named fields.

Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('gfuhr@aol.net', '2001-10-19')
print(sub)

Subscriber(addr='gfuhr@aol.net', joined='2001-10-19')


### 1.19 Transforming and reducing data at the same time 

In [8]:
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9]
numbers_sum = sum(x/(x*x) for x in numbers)


# Use generator expressions instead of list generators if the data will be used only once and then discarted
shares = [
    {'name':'PETR','price':300}, 
    {'name':'ABS','price': 200}, 
    {'name':'LOL','price': 100}]
min_price = min(share['price'] for share in shares)
print(min_price)
# Alternative, show the min price and the min name:
min_price = min(shares, key=lambda s: s['price'])
print(min_price)

100
{'name': 'LOL', 'price': 100}


### 1.20 Combining multiple mappings in an unique mapping

In [13]:
a = {'x': 1, 'y': 2}
b = {'y': 3, 'z': 4}
from collections import ChainMap
# A ChainMap groups multiple dicts (or other mappings) together
# to create a single, updateable view.

# The underlying mappings are stored in a list.  That list is public and can
# be accessed or updated using the *maps* attribute.  There is no other
# state.

# Lookups search the underlying mappings successively until a key is found.
# In contrast, writes, updates, and deletions only operate on the first
# mapping.
c = ChainMap(a, b)
print(c['x'])
print(c['y']) # Duplicate keys will have the value of the first appearance of a key in the mapping
print(c['z'])

1
2
4


## 2.0 Strings and texts

### 2.1 Separating strings according to various delimiters 

In [14]:
line = "asdasdasda; adsasdasaddas, asdddsasa, dsadsaas, sda"
import re
re.split(r'[;,\s]\s*',line)

['asdasdasda', 'adsasdasaddas', 'asdddsasa', 'dsadsaas', 'sda']

### 2.2 Text correspondence at the end or begin of a string 

In [16]:
string = "thisIsAPythonProgram.py"
print(string.startswith("this"), string.endswith(".py"))

True True


### 2.3 Using shell wildcards in Python 

In [19]:
from fnmatch import fnmatch, fnmatchcase
# Test whether FILENAME matches PATTERN.

# Patterns are Unix shell style:

# *       matches everything
# ?       matches any single character
# [seq]   matches any character in seq
# [!seq]  matches any char not in seq

# An initial period in FILENAME is not special.
files = ["test.txt", "program.py", "joke.docx", "text.txt"]
print(list(fnmatch(file, "*.txt") for file in files))

[True, False, False, True]


### 2.4 Matching and searching for patterns in text 

In [24]:
import re
text1 = '11/27/2012'
text2 = 'Nov 27, 2012'

regular_expression = r'\d+/\d+/\d+'

def isDate(text):
    if(re.match(regular_expression, text)):
        return True
    else:
        return False
print(isDate(text1))
print(isDate(text2))

regular_expression = re.compile(r'\d+/\d+/\d+')
text = "Today is a good day 02/10/2020 my lectures will begin at 03/13/2020"
dates = regular_expression.findall(text)

True
False


#### Using groups to separate fields of the regular expression 

In [32]:
regular_expression = re.compile(r'(\d+)/(\d+)/(\d+)')
text = "Today is a good day 02/10/2020 my lectures will begin at 03/13/2020"
dates = regular_expression.findall(text)
print(dates)
for date in dates:
    for group in date:
        print(group)

[('02', '10', '2020'), ('03', '13', '2020')]
02
10
2020
03
13
2020


### 2.5 Searching and substitutting texts 

#### Simple match and substitute 

In [2]:
text = "this is test of a text, i need to find this in this text"
print(text)
text = text.replace("this", "fish")
print(text)

this is test of a text, i need to find this in this text
fish is test of a text, i need to find fish in fish text


#### More complex patterns and substitution

In [4]:
text = "Today is a good day 02/10/2020 my lectures will begin at 03/13/2020"
import re
print(text)
print(re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text))

Today is a good day 02/10/2020 my lectures will begin at 03/13/2020
Today is a good day 2020-02-10 my lectures will begin at 2020-03-13


#### Even more complex patterns and substitution

In [8]:
import re
from calendar import month_name
text = "Today is a good day 02/10/2020 my lectures will begin at 03/13/2020"
regular_expression = re.compile(r'(\d+)/(\d+)/(\d+)')

def change_date(match):
    mon_name = month_name[int(match.group(1))]
    return '{} {} {}'.format(match.group(2), mon_name, match.group(3))

print(regular_expression.sub(change_date, text))

Today is a good day 10 February 2020 my lectures will begin at 13 March 2020


### 2.6 Matching strings while ignoring the Case 

In [11]:
text = "UPPER PYTHON, lower python, Mixed Python"
re.findall("python", text, flags=re.IGNORECASE)

['PYTHON', 'python', 'Python']

### 2.7 How to find the least long match using regular expressions  

In [14]:
str_pat = re.compile(r'\"(.*)\"')
text1 = 'Computer says "no."'
print(str_pat.findall(text1))
text2 = 'Computer says "no." Phone says "yes"'
print(str_pat.findall(text2))
str_pat = re.compile(r'\"(.*?)\"')
print(str_pat.findall(text2))

['no.']
['no." Phone says "yes']
['no.', 'yes']


### 2.8 Regular expressions for multiline patterns 

In [22]:
one_line = "/*This is a comment*/"
multi_line = """ /* This is a multiple
lines comment */
"""
comment_pat = re.compile(r'/\*(.*?)\*/')
print(comment_pat.findall(one_line))
print(comment_pat.findall(multi_line))

comment_pat = re.compile(r'/\*(.*?)\*/',re.DOTALL)
print(comment_pat.findall(multi_line))

['This is a comment']
[]
[' This is a multiple\nlines comment ']


### 2.9 Normalizing Unicode text 

In [23]:
s1 = "Spicy Japape\u00f1o"
s2 = "Spicy Japapen\u0303o"
print(s1, s2)

Spicy Japapeño Spicy Japapeño


In [29]:
import unicodedata
# This module provides access to the Unicode Character Database which
# defines character properties for all Unicode characters. The data in
# this database is based on the UnicodeData.txt file version
# 11.0.0 which is publicly available from ftp://ftp.unicode.org/.

# The module uses the same names and symbols as defined by the
# UnicodeData File Format 11.0.0.
print(s1 == s2)
t1 = unicodedata.normalize('NFC', s1)
t2 = unicodedata.normalize('NFC', s2)
print(t1 == t2)
print(ascii(t2))
t1 = unicodedata.normalize('NFD', s1)
t2 = unicodedata.normalize('NFD', s2)
print(t1 == t2)
print(ascii(t2))
normalized = ''.join(char for char in t1 if not unicodedata.combining(char))
print(normalized)

False
True
'Spicy Japape\xf1o'
True
'Spicy Japapen\u0303o'
Spicy Japapeno


### 2.10 Working with Unicode Characters in Regular Expressions 

In [32]:
import re
num = re.compile('\d+')
# ASCII digits
num.match('123')
pat = re.compile('stra\u00dfe', re.IGNORECASE)
s = 'straße'

print(pat.match(s))
print(pat.match(s.upper()))

<re.Match object; span=(0, 6), match='straße'>
None


### 2.11 Removing unwanted characters from Strings 

In [41]:
string = "              this is a text \n"
print(string.strip())
string = "              this is a text \n"
print(string.lstrip())
string = "              this is a text \n"
print(string.rstrip())

text = "-----------text++++++++++"
print(text.lstrip('-'))
print(text.rstrip('+'))

text = "hi---------mark"
print(text.strip('-'))
print(text.replace('-',''))

this is a text
this is a text 

              this is a text
text++++++++++
-----------text
hi---------mark
himark


### 2.13 Sanitizing and Cleaning Up Text

In [44]:
string = 'pýtĥöñ\fis\tawesome\r\n'
print(string)
remap = {
        ord('\t') : ' ',
        ord('\f'): ' ',
        ord('\r') : None # This actually deletes the character
        } 
a = string.translate(remap)
print(a)

pýtĥöñis	awesome

pýtĥöñ is awesome



In [46]:
import unicodedata
import sys
cmb_chrs = dict.fromkeys(c for c in range(sys.maxunicode)
                        if unicodedata.combining(chr(c)))
b = unicodedata.normalize('NFD', a)
c = b.translate(cmb_chrs)
print(b,c)

# This was for text, but for numbers you can use the Unicode digitmap

digitmap = {c: ord('0') + unicodedata.digit(chr(c))
           for c in range(sys.maxunicode)
           if unicodedata.category(chr(c)) == 'Nd'}
x = '\u0661\u0662\u0663'
print(x)
x.translate(digitmap)

pýtĥöñ is awesome
 python is awesome

١٢٣


'123'

### 2.14 Aligning text Strings

In [49]:
text = "Greg Doucette"
print(text.ljust(20))
print(text.rjust(20))
print(text.center(20))
print(text.ljust(20,'*'))
print(text.rjust(20,'*'))
print(text.center(20,'*'))

Greg Doucette       
       Greg Doucette
   Greg Doucette    
Greg Doucette*******
*******Greg Doucette
***Greg Doucette****


#### You can also use the format function to easily adjust things 

In [51]:
print(format(text, '>20'))
print(format(text, '<20'))
print(format(text, '^20'))
print(format(text, '*>20'))
print(format(text, '*<20'))
print(format(text, '*^20'))

       Greg Doucette
Greg Doucette       
   Greg Doucette    
*******Greg Doucette
Greg Doucette*******
***Greg Doucette****


### 2.15 Combining and Concatenating Strings