# Basic Python Notes
This notebook covers basics of Python that I need refreshed or didn't know about.\
This covers: Data Structures like dicts and sets, Functions and unique pythonic function features/quirks, OOP notes on classes and objects, and modules/packages/namespace notes.

# Data Structures

## Dicts

When using Dicts, and looking up a value by its key you may get an exception:

In [15]:
example = { 'a':1, 'b':2, 'c':3 }
print(example)

try:
    print(example['d'])
except KeyError as err:
    print('KeyError at ', err, '!')

{'a': 1, 'b': 2, 'c': 3}
KeyError at  'd' !


To avoid this exception you can do one of two things, first check if the key exists:

In [5]:
if 'd' in example:
    print(example['d'])
else:
    print('d not found!')

d not found!


Second, use the get method:

In [6]:
example.get('d') # This displays nothing

example.get('d', 'Does not exist') # This prints 'Does not exist'

'Does not exist'

In [19]:
print(example.keys()) # This will get all the existing keys

print( list( example.values() ) ) # This will get all existing values

example2 = {'c':3, 'd':4, 'e':5}
combo = {**example, **example2} # This will create a merged dict
print(combo)

newdict = {'foo':42, 'bar':66}
combo.update(newdict) # The update function will copy from one dict to another and append them
print(combo)

newfoo = {'foo':0}
combo.update(newfoo) # Update will merge & replace values with the same keys 
print(combo)

del combo['bar'] # Deletes a key:value from a dict
print(combo)

foo = combo.pop('foo') # Combines get() and del
print(f'foo is: {foo}')
print(combo)

print(combo.clear()) # Clears the dict

dict_keys(['a', 'b', 'c'])
[1, 2, 3]
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'foo': 42, 'bar': 66}
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'foo': 0, 'bar': 66}
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'foo': 0}
foo is: 0
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
None


In [23]:
users = {'Rob':2323, 'Frank':3454, 'George':8583}
users_assigned = users # When you use = to assign, you are still referencing the original dict
users_copied = users.copy() # To avoid this, copy it with the copy() function, if you have embedded lists in the values use deepcopy()
print(users_assigned)
users['Frank'] = 69
print(f'Assigned: {users_assigned}')
print(f'Copied: {users_copied}')

{'Rob': 2323, 'Frank': 3454, 'George': 8583}
Assigned: {'Rob': 2323, 'Frank': 69, 'George': 8583}
Copied: {'Rob': 2323, 'Frank': 3454, 'George': 8583}


In [27]:
for user in users: # Just keys
    print(user)
print(' ')
for value in users.values(): # Just values
    print(value)
print(' ')
for item in users.items(): #Tuples of key:value
    print(item)
print(' ')
for name,number in users.items(): #Explicitly assign tuples of key:value
    print(f'Name: {name}, Number: {number}')

Rob
Frank
George
 
2323
69
8583
 
('Rob', 2323)
('Frank', 69)
('George', 8583)
 
Name: Rob, Number: 2323
Name: Frank, Number: 69
Name: George, Number: 8583


*Dict Comprehension*

In [31]:
word = 'doolittle'
letter_counts = {letter: word.count(letter) for letter in set(word)}
print(letter_counts)

vowels = 'aeiou'
letter_counts = {letter: word.count(letter) for letter in set(word) if letter in vowels}
print(letter_counts)

{'l': 2, 'e': 1, 't': 2, 'i': 1, 'd': 1, 'o': 2}
{'e': 1, 'i': 1, 'o': 2}


## Sets

In [36]:
foo = set('letters') # only has one of each letter
print(foo)
print(len(foo))
foo.add('q')
print(foo)
foo.remove('q')
print(foo)

{'l', 'e', 't', 'r', 's'}
5
{'l', 'e', 't', 'r', 's', 'q'}
{'l', 'e', 't', 'r', 's'}


In [48]:
# Common use-case for sets is in Dicts:
drinks = {
 'martini': {'vodka', 'vermouth'},
 'black russian': {'vodka', 'kahlua'},
 'white russian': {'cream', 'kahlua', 'vodka'},
 'manhattan': {'rye', 'vermouth', 'bitters'},
 'screwdriver': {'orange juice', 'vodka'}
}
print(drinks)

# Better filtering/selecting with set items
print("\nContain vermouth:")
for name, ingredients in drinks.items():
    if 'vermouth' in ingredients:
        print(name)
        
# Set intersection 
print("\n Contain vermouth or kahlua:")
for name, ingredients in drinks.items():
    if ingredients & {'vermouth', 'kahlua'}:
        print(name)
        
# Get ingredients of a drink
ingA = drinks['screwdriver']
ingB = drinks['martini']
print(f"\nScrewdriver ingredients: {ingA}")
print(f"Martini ingredients: {ingB}")
print(f"Intersection: {ingA & ingB}") # set intersection (can also use the setA.intersection(setB) function)
print(f"Union: {ingA | ingB}") # set union, could also use setA.union(setB)
print(f"Difference: {ingA - ingB}") # set difference
print(f"XOR: {ingA ^ ingB}") # XOR or symmetric_difference()
print(f"Is Subset: {ingA <= ingB}") # Check if A is a subset of B, also issubset()

{'martini': {'vodka', 'vermouth'}, 'black russian': {'vodka', 'kahlua'}, 'white russian': {'vodka', 'cream', 'kahlua'}, 'manhattan': {'bitters', 'rye', 'vermouth'}, 'screwdriver': {'orange juice', 'vodka'}}

Contain vermouth:
martini
manhattan

 Contain vermouth or kahlua:
martini
black russian
white russian
manhattan

Screwdriver ingredients: {'orange juice', 'vodka'}
Martini ingredients: {'vodka', 'vermouth'}
Intersection: {'vodka'}
Union: {'orange juice', 'vodka', 'vermouth'}
Difference: {'orange juice'}
XOR: {'orange juice', 'vermouth'}
Is Subset: False


In [51]:
a_set = {num for num in range(1,50) if num % 2 == 0} # Set Comprehension
print(a_set)

{2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48}


In [56]:
locked = frozenset([9,3,6]) # frozenset creates an immutable set
print(locked)
try:
    locked.add(4)
except AttributeError as err:
    print(f"Exception: {err}")

frozenset({9, 3, 6})
Exception: 'frozenset' object has no attribute 'add'


## Exercises

In [65]:
# Exercise 1
life = {'animals':{'cats':['Risky', 'Riot'],'octopi':[],'emus':[]}, 'plants':{}, 'other':{}} # Multilevel dict
for top, inner in life.items():
    print(f"{top} - {inner}")
    
# Exercise 2
squares = { num:num**2 for num in range(1,10) } # Dict comprehension
print(f"\n{squares}")

# Exercise 3

animals - {'cats': ['Risky', 'Riot'], 'octopi': [], 'emus': []}
plants - {}
other - {}

{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}


# Functions

### Explode/Gather Positional Arguments (*)
When Used in a function parameter the asterisk groups a variable number of positional arguments into a single tuple of parameter values.\
When used outside a function parameter *args explodes the tuple into comma-separated positional args.\
The * must be used within a function call or definition

In [9]:
def print_args(*params):
    print('positional tuple: ',params)
print_args(1,2,3,'string1','string2')

# This function has two required args (arg1, arg2) and then variable positional args (*args)
def more_than_one(arg1, arg2, *params):
    print(f'Params: {arg1}, {arg2}, {params}')
more_than_one('required', 'also required', 1,2,3,'optional args')

positional tuple:  (1, 2, 3, 'string1', 'string2')
Params: required, also required, (1, 2, 3, 'optional args')


### Explode/Gather Keyword Args (**)
Keyword arguments are gathered into a dict\
This is also only used in a function call or def\
Argument order: required pos args, optional pos args(*args), optional keyword args(**args)

In [15]:
def print_kwargs(**kwargs):
    print('Keyword Args: ', kwargs)
print_kwargs(key1='val1', key2='val2', key3='val3')

Keyword Args:  {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}


### Keyword only Arguments
These are 'name=value' args and a preceeding * means they must be provided as named args if you don't want the default values

In [25]:
def start_time(timeData, *, start=0, end=190):
    for value in (timeData[start:end]):
        print(value)
scenes = ['scene1','scene2','scene3','scene4','scene5']
start_time(scenes)
print('\n')
start_time(scenes, start=3)
print('\n')
start_time(scenes, start=2, end=4)

scene1
scene2
scene3
scene4
scene5


scene4
scene5


scene3
scene4


### Watching for Mutability in Function Parameters
When passing a value to a function, treat it as a pass-by-reference and assign it instead of altering it otherwise your changes can be carried outside of the function

In [29]:
print('Outside a function: ')
mutable_guy = ['foo','bar','now']
print(mutable_guy)

def alter(arg):
    print('Altering inside a function...')
    arg[2] = 'NEVER'
alter(mutable_guy)

print('Outside a function: ')
print(mutable_guy)

Outside a function: 
['foo', 'bar', 'now']
Altering inside a function...
Outside a function: 
['foo', 'bar', 'NEVER']


### Docstrings
A docstring can be included by adding a string at the beginning of a function, this is a good way to document functions.\
To print a docstring you can use the help() function

In [37]:
def printstring():
    'printstring prints: String.'
    print('String.\n')
printstring()
help(printstring) # With formatting

print(printstring.__doc__) # Without extra formatting

String.

Help on function printstring in module __main__:

printstring()
    printstring prints: String.

printstring prints: String.


## Functions as Objects
Functions are first-class citizens in Python: Functions can be assigned to variables, used as arguments, and returned from functions as they are objects

In [39]:
def foo():
    print('bar')

def run_func(func): # Takes function as argument and runs it
    func()
    
run_func(foo) # Runs a function by passing it to another

def sum_args(*args): 
    return sum(args)
def run_args(func, *args): # You can combine this functionality with *args or *kwargs
    return func(*args)
run_args(sum_args, 2,4,6,7)

bar


19

## Closures
A closure is a function that is dynamically generated by another function and can change/remember the values of vars that were created outside the function

In [44]:
def knights(quote):
    def inner():
        return "We are the knights who say: %s" % quote
    return inner

a = knights('Ni!') # a is getting a specialized version of the inner() function returned
b = knights('Cheese!') # b is getting a different version of it 

print(a())
print(b())

We are the knights who say: Ni!
We are the knights who say: Cheese!


## Anonymous/Lambda Functions
These are a single statement anonymous function that usually replace short functions\
They are less clear than functions and are most useful when you need many small functions and would struggle to remember what all of them were called,\
they're good for callback functions

In [6]:
def edit_story(words, func):
    for word in words:
        print(func(word))
stairs = ['thud', 'meow', 'thud', 'hiss']

# Instead of:
def enliven(word):
    return word.capitalize() + '!'
edit_story(stairs, enliven)

print('')

# You could do:
edit_story(stairs, lambda word: word.capitalize() + '!') # Zero or more comma-separated args : function definition

Thud!
Meow!
Thud!
Hiss!

Thud!
Meow!
Thud!
Hiss!


## Generators
Python sequence creation objects, used to iterate through large sequences without creating and storing the whole sequence in memory\
Generators keep track of where they were called last and return to the next value\
They only run once, and cannot go back/restart\
An example would be the `range()` function\
To write your own make sure you use `yield` instead of `return`

In [16]:
# Writing the range generator function:
def new_range(first=0, last=10, step=1):
    num = first
    while num < last:
        yield num
        num += step

defaultRange = new_range() # defaults
for x in defaultRange:
    print(x)
print('')
range = new_range(4,105,10) # with args
for y in range:
    print(y)
    
for z in range: # Can't do it again
    print(z)

print('')################
    
# Generator comprehensions are like the others but with parenteses:
genobj = (pair for pair in zip(['foo','bar'],['6','9']))
print(genobj)
for i in genobj:
    print(i)

0
1
2
3
4
5
6
7
8
9

4
14
24
34
44
54
64
74
84
94
104

<generator object <genexpr> at 0x000001E3D53751C0>
('foo', '6')
('bar', '9')


## Decorators
Decorators modify existing functions without altering them

In [23]:
# Create the decorator:
def document_it(func):
    def new_function(*args, **kwargs):
        print('Running function: ', func.__name__)
        print('Pos args: ', args)
        print('Keyword args: ', kwargs)
        result = func(*args, **kwargs)
        print('Result: ',result)
        return result
    return new_function

# Use the decorator:
# Long way:
def subem(a,b):
    return a - b
print(subem(10,5),'\n')


better_subem = document_it(subem) 
better_subem(10,5)

print('')

# Short way: 
@document_it 
def addem(a,b):
    return a + b
addem(5,8)

5 

Running function:  subem
Pos args:  (10, 5)
Keyword args:  {}
Result:  5

Running function:  addem
Pos args:  (5, 8)
Keyword args:  {}
Result:  13


13

In [27]:
# Second Decorator
def square_it(func):
    def new_function(*args, **kwargs):
        result = func(*args, **kwargs)
        return result * result
    return new_function

# When using mult decorators the closest goes first
@document_it # second
@square_it # first
def multem(a,b):
    return a * b
multem(3,5)

print('')

@square_it # second
@document_it # first
def multem2(a,b):
    return a * b
multem2(3,5)

Running function:  new_function
Pos args:  (3, 5)
Keyword args:  {}
Result:  225

Running function:  multem2
Pos args:  (3, 5)
Keyword args:  {}
Result:  15


225

## Namespaces and Scope
Each function has its own namespace, the main part of a program has a global namespace you can access from inside functions

In [36]:
name = 'Chris'# global
def show_name():
    print(name) # Can access it
def show_name_2():
    try:
        print(name) # Now is a reference before assignment because...
        name = 'Kaila' # Creates name in local namespace
        print(name)
    except UnboundLocalError as err:
        print('Exception: ',err)
def show_name_3():
    global name # Defines name in local namespace as global
    print(name)
    name = 'Kaila'
    print(name)
    other_name = 'Francis'
    print('local vars: ',locals()) # prints a dict of local vars
    # print('global vars: ',globals()) # This would print global vars but in jupyter there's a ton
show_name()
show_name_2()
show_name_3()

Chris
Exception:  local variable 'name' referenced before assignment
Chris
Kaila
local vars:  {'other_name': 'Francis'}


## Underscores
Double underscores are for reserved use like `function.__name__` or `function.__doc__` or `__main__`\


## Recursion
Recursion can be useful for generator functions

In [48]:
# Potential interview question 'flatten/collect a set...':
def flatten(lol):
    for item in lol:
        if isinstance(item, list):
            yield from flatten(item) # yield from creates another generator that does a 'for subitem in flatten(item): yield subitem'
        else:
            yield item
lol = [1,2,[3,4,5], [6,[7,8,9],[]]]
list(flatten(lol))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

## Exceptions
Always try to handle exceptions where they may occur with `try` and `except`

In [53]:
listing = [1,2,3]
try:
    listing[3]
except: # Generic catchall
    print('oopsie whoopsie made a fucky wucky outside the wist uWu')
    
try:
    listing[3]
except IndexError as idx_err: # Specifies specific exception type and handles it... accordingly?
    print('u made an index out of wange ewwow: ',idx_err)
except Exception as gen_err:
    print('YOU HAVE MADE SOME OTHER KIND OF ERROR NOW DIE')

oopsie whoopsie made a fucky wucky outside the wist uWu
u made an index out of wange ewwow:  list index out of range


### Make your own exceptions at home!

In [52]:
class UppercaseException(Exception):
    pass

demands = ['yes','yes','yes','NO']
for demand in demands:
    if demand.isupper():
        raise UppercaseException(demand)

UppercaseException: NO

## Exercises

In [9]:
# Decorator: print when function is starting and ending
def test(func):
    def printtest(*args, **kwargs):
        print("starting...")
        result = func(*args, **kwargs)
        print("ending...")
        return result
    return printtest
@test
def examplefunction():
    print('foobar')
examplefunction()

# Generator: Get odd nums from a range
def getodds(start=0,end=10):
    for idx in range(start,end):
        if (idx % 2) != 0:
            yield idx
list(getodds(0,20))

class OopsException(Exception):
    print('Oopsie whoopsie!')
    
def evenbad(nums):
    for num in nums:
        print(num)
        if num % 2 == 0:
            raise OopsException
evenbad([1,3,5,7,8,9])

starting...
foobar
ending...
Oopsie whoopsie!
1
3
5
7
8


OopsException: 

# Objects and Classes
Technically, being Object Oriented, everything in Python is an object, including data structures and functions

## Object Classes in Python

In [10]:
# Class definition
class Pet():
    # Initialization function
    def __init__(self, name):
        self.name = name
    def identify(self):
        print(f"I'm {self.name}. I'm a pet!")
sully = Pet('Sullivan') # looks up class, instantiates obj, runs init, stores name, returns object, attaches variable
risky = Pet('Risky')
riot = Pet('Riot')
pets = [sully, risky, riot]
for pet in pets:
    print(pet.name)

# Inheritance, try not to overuse or it will confuse
class Cat(Pet):
    def __init__(self,name,age):
        super().__init__(name) # use super() to get the parent implementation
        self.age = age
    def identify(self): # Overloaded method
        print(f"I'm, {self.name}, a {self.age} year old cat, like a pet but more catty!")
print("A cat is a pet: ",issubclass(Cat, Pet))
risky = Cat('Risky', 12) # Cat has overloaded and must include the age
riot = Cat('Riot', 2)
sully.identify()
risky.identify()
riot.identify()

Sullivan
Risky
Riot
A cat is a pet:  True
I'm Sullivan. I'm a pet!
I'm, Risky, a 12 year old cat, like a pet but more catty!
I'm, Riot, a 2 year old cat, like a pet but more catty!


## Multiple Inheritance
Classes can inherit from multiple parents and when referencing a method/attribute a class doesn't have, its parents are checked\
When getting methods from parents Python uses *method resolution order*, each class has a special method called `mro()`\
and `__mro__` gives a tuple of the parent classes in order of reference importance\
Generally, mro goes: Object itself, object's class, its first parent class, second parent class, ..etc, grandparent class


## Mixins
A mixin is a parent class that doesn't share any methods with other parent classes, avoiding mro issues\
These are good for side tasks like logging etc

In [12]:
class PrettyMixin():
    def dump(self):
        import pprint
        pprint.pprint(vars(self))
class Thing(PrettyMixin):
    pass
t=Thing()
t.name = "Kaila"
t.love = True
t.eyecolor = "Brown"
t.age = 26
t.dump()

{'age': 26, 'eyecolor': 'Brown', 'love': True, 'name': 'Kaila'}


## Self
So Python uses self in class method definitions to get the right attributes/methods, so:

In [15]:
# When we call
risky.identify()
''' 
    It really is looking up Cat, of the object risky
    Then it passes the risky object to the identify() method as the self argument, 
    functionally the same as:
 '''
Cat.identify(risky)

I'm, Risky, a 12 year old cat, like a pet but more catty!
I'm, Risky, a 12 year old cat, like a pet but more catty!


## Attributes
You can either directly access attributes or use getters/setters\
Although Python doesn't have private attributes, you can at least obfuscate w/ getting/setting\
To get some more real privacy use properties (covered later)

In [5]:
class Dog():
    def __init__(self,input_name):
        self.hidden_name = input_name
    def get_name(self):
        return self.hidden_name
    def set_name(self, input_name):
        self.hidden_name = input_name
pup = Dog('Sullivan')
print(pup.get_name())
pup.set_name('Maya')
print(pup.get_name())
pup.name

Sullivan
Maya


AttributeError: 'Dog' object has no attribute 'name'

## Properties
More pythonic way to do privacy

In [21]:
# Same thing but add the last line
class Dog():
    def __init__(self,input_name):
        self.hidden_name = input_name
    def get_name(self):
        return self.hidden_name
    def set_name(self, input_name):
        self.hidden_name = input_name
    name = property(get_name, set_name) # Properties are defined with a getter and setter
# Getter and setter stil work
pup = Dog('Sullivan')
print(pup.get_name())
pup.set_name('Maya')
print(pup.get_name())
# But accessing as an attribute also works
pup.name = 'Willy'
print(pup.name)

# Another format with decorators for getter and setter
class Food():
    def __init__(self,input_name):
        self.__name = input_name # vs. self.hidden_name = input_name : the double underscore is a better way to obfuscate attributes in Python
    @property
    def name(self):
        return self.__name
    @name.setter
    def name(self, input_name):
        self.__name = input_name
dinner = Food('Pizza')
print(dinner.name)
dinner.name = 'Tacos'
print(dinner.name)
print('Outside getter: '+dinner._Food__name) # Not entirely private still
print(dinner.__name) # but at least can't directly access hidden attr __name directly or accidentally


Sullivan
Maya
Willy
Pizza
Tacos
Outside getter: Tacos


AttributeError: 'Food' object has no attribute '__name'

### For Computed Values

In [13]:
class Circle():
    def __init__(self, radius):
        self.radius = radius
    @property
    def diameter(self):
        return 2*self.radius
circle = Circle(5)
print(f'radius: {circle.radius}, diameter: {circle.diameter}')

radius: 5, diameter: 10


## Methods
If there's no preceding decorator it's an *instance method*, the first arg is self and refers to the individual object\
If there is a `@classmethod` decorator it's a *class method*, first arg is cls (anything that isn't the reserved word class) that refers to the class\
If there's a `@staticmethod` it's a *static method* and the first arg isn't an object or class

### Instance methods
The methods covered above with 'self' as the arguments

### Class Methods
A class method affects the class as a whole, any changes made affect all objects that share that class

In [22]:
class A():
    count = 0
    def __init__(self):
        A.count += 1
    def exclaim(self):
        print("A!")
    @classmethod
    def kids(cls):
        print("A has", cls.count, "little objects.")
one_a = A()
two_a = A()
three_a = A()
A.kids()

A has 3 little objects.


# Modules, Packages

A **module** is really any python file with code.
Some basic reminders on module imports:
- Be organized and put all imports together at the top of a file to have all dependencies in one place.
- Use aliases e.g. `import bs4 as bs` when needed/helpful
- import only what you need from a module e.g. `from random import choice`

A **package** is an organized hierarchy of .py files in a directory and subdirectories.\ 
Given a folder with the name 'sources', with files 'fast.py' and 'advice.py' in it, in the same directory as a .py file:
`from sources import fast,advice` will import from sources as a package.

*Interview question?* In pre-3.3 Python you also needed an \_\_init__.py file as well for a directory to be recognized as a package.

To see all the places that your Python interpreter searches for packages, import the standard sys
module and use its path list: \
`for place in sys.path:`\
`    print(place)`\
This is a list of directory names and ZIP archive files that
Python searches in order to find modules to import.\
Keep in mind, if you override a name of a standard module it will only take the first instance it finds and will block you from the standard module (e.g. if you used random as a module name).

## Absolute vs Relative
All the examples above are **absolute**, Python will look for a file or package with the associated name along its search path until it finds one (or doesn't).\
Otherwise, you can import **relative** to your current position in the filesystem `. import foobar` for current, `.. import foobar` for one directory above, `..barfoo import foobar` if it's under a sibling directory called barfoo.

<a href="http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html">Here</a> are some importing pitfalls for the unwary.

## Namespaces
You can create a **namespace** package to split the python files. So, if you had one directory with two files in it, rather than splitting the files into their own subdirectories which can break the use of the module in other applications, you can add parent directories to separate files.

## Modules vs Objects
The difference between an object and a module is that a module's code is available to the outside, while an object's code can be obscured or access-controlled with properties and naming to restrict access to data attributes.

# Batteries-Included Python
There are some nice features of the Python Standard Library:


## Handle missing keys with setdefault() and defaultdict()
This can help avoid an exception for non-existing keys. `setdefault()` is like the dictionary `get()` but also assigns an item to the dict if the key is missing.

In [3]:
elements = {'Hydrogen':1, 'Helium':2}
carbon = elements.setdefault('Carbon', 12) # Get carbon, with set value
print(elements)
helium = elements.setdefault('Helium', -1) # Get Helium, only set value if it doesn't exist already
print(elements)
print(helium) # Helium maintains its inital value since it was already set

{'Hydrogen': 1, 'Helium': 2, 'Carbon': 12}
{'Hydrogen': 1, 'Helium': 2, 'Carbon': 12}
2


defaultdict() is similar but has a default value for any new key when the dict is created. The argument is a function.

In [8]:
from collections import defaultdict
elements = defaultdict(int) # int() means the default value will be 0
elements['Hydrogen'] = 1
elements['Lead'] # Referencing creates a dict entry with the default value
print(elements)

defaultdict(<class 'int'>, {'Hydrogen': 1, 'Lead': 0})


In [11]:
# Defining a function that returns the default val for the dict entries
def confused():
    return 'It hurt itself in its confusion!'
pokemon = defaultdict(confused)
pokemon['Fire'] = 'Charmander'
pokemon['Water'] = 'Squirtle'
pokemon['Grass']
print(pokemon['Grass'])

It hurt itself in its confusion!


## Count items with Counter()

In [19]:
from collections import Counter
griffin_abcs = ['gumpy', 'hey', 'gumpy', 'gumpy', 'hey']

# Counter object of all elements in the element
abc_count = Counter(griffin_abcs)
print(abc_count)

# Counters have helpful methods
print('Most To Least Common: ', abc_count.most_common()) # Descending order
print('Top Most Common: ', abc_count.most_common(1)) # Top 1 common element
ghost_horses = ['ghost','ghost','horse','ghost','ghost','horse','hey', 'gumpy']
gh_count = Counter(ghost_horses)
print(abc_count + gh_count) # Can combine counters with '+'
print(gh_count - abc_count) # What's in the first list but not the other: '-'
print(gh_count & abc_count) # Intersection '&'
print(abc_count | gh_count) # Union '|'

Counter({'gumpy': 3, 'hey': 2})
Most To Least Common:  [('gumpy', 3), ('hey', 2)]
Top Most Common:  [('gumpy', 3)]
Counter({'gumpy': 4, 'ghost': 4, 'hey': 3, 'horse': 2})
Counter({'ghost': 4, 'horse': 2})
Counter({'hey': 1, 'gumpy': 1})
Counter({'ghost': 4, 'gumpy': 3, 'hey': 2, 'horse': 2})


## Order by Key with OrderedDict()
Useful for earlier versions of python (pre 3.7) since the more recent version retains the order in which you add keys.


In [20]:
from collections import OrderedDict
data = OrderedDict([
    ('a',1),
    ('b',2),
    ('c',3)
])
for item in data: 
    print(item)

a
b
c


## Stack and Queue AKA deque
A deque is a double ended queue, giving it stack and queue features. Useful when you want to add/delete from both ends.

In [23]:
# Note: this is not the best way to do this
def is_palindrome(word):
    from collections import deque
    dq = deque(word)
    while len(dq) > 1:
        # popleft() gets the first char and pop() gets the last
        if dq.popleft() != dq.pop():
            return False
    return True
print(is_palindrome('otto'))
print(is_palindrome('nope'))
print(is_palindrome('racecar'))

# Here's a quick/concise palindrome function using a slice
def quick_palindrome(word):
    return word == word[::-1]
print(quick_palindrome('otto'))
print(quick_palindrome('nope'))
print(quick_palindrome('racecar'))

True
False
True
True
False
True


## Iterate over structures with itertools
itertools has iterator functions that return one item at a time when called in for...in loops and remembers state in between calls.


In [35]:
# chain() runs through args as though they're an iterable
import itertools
for item in itertools.chain([1,2],['a','b','c']):
    print(item)
print()
# cycle() is an infinite cycler
for idx,item in enumerate(itertools.cycle([0,1])):
    print(item)
    if(idx > 9):
        break
print()
# accumulate() calculates accumulated values, by default it does sum, second arg can redefine
for item in itertools.accumulate([1,1,2,3,5,8]):
    print(item)

1
2
a
b
c

0
1
0
1
0
1
0
1
0
1
0

1
2
4
7
12
20


## pprint(??)
It should print things out nicely, but maybe print rules have changed.

In [36]:
# pprint() prints nicely (??)
from pprint import pprint
print(data)  # OrderedDict from earlier example
pprint(data)

OrderedDict([('a', 1), ('b', 2), ('c', 3)])
OrderedDict([('a', 1), ('b', 2), ('c', 3)])


## Rawrr xD so random

In [51]:
# choice() gives a random value from a sequence
from random import choice
data = [1,33,532,'NI',0x123a]
print(choice(data))
print()

# sample() gets more than one value at a time
from random import sample
print(sample(data,3))
print()

# randint() gets a random integer from a given range
from random import randint
print(randint(1,6969))
print(randint(1,6969))
print()

# randrange() has args for start(inclusive) and end(exclusive) integers and an optional step arg
from random import randrange
print(randrange(22,85,10))

532

[4666, 'NI', 532]

6397
68

82


# Exercises

In [3]:
# Created zoo.py in root folder and imported function to print hours
from modules.zoo import hours
hours()

# Make a basic dictionary
plain = {
    'a':1, 
    'b':2, 
    'c':3
}
print(plain)

# Make an OrderedDict (should behave the same)
from collections import OrderedDict
fancy = OrderedDict([
    ('a',1),
    ('b',2),
    ('c',3)
])
print(fancy)



Open 9am - 5pm M-F
{'a': 1, 'b': 2, 'c': 3}
OrderedDict([('a', 1), ('b', 2), ('c', 3)])


# Python in Practice
Applications of the basics of Python.

## Data Wranglin and Manglin
A couple basic data formats to work with are text and binary. 

### Text Strings: Unicode
Python 3 strings are Unicode character sequences instead of byte arrays (big change from Python 2).Unicode characters are handled by Python, these characters are divided into 8-bit sets called planes. The first 256 planes are the *basic multilingual planes*. You can look these up for more info.

If you know a unicode ID or name for a char you can use it in a Python string:
 * If you use a \u followed by four hex numbers you get a char in the multilingual planes. The first two are plane number, and the next two are the index of the char in the plane. Plane 00 is ASCII.
 * For chars in the higher planes you need more bits. It would be \U followed by 8 hex characters.
 * For all characters \N{name} lets you specify it by its standard name provided in the Unicode Character Name Index.

The **unicodedata** module has functions that translate in both directions. 
 * lookup() - takes case-insensitive name and returns unicode character
 * name() - takes a unicode character and returns uppercase name


In [72]:
import unicodedata
def unicode_test(value):
    name = unicodedata.name(value)
    value2 = unicodedata.lookup(name)
    print('value="%s", name="%s", value2="%s"'% (value,name,value2))
unicode_test('A')
unicode_test('$')
unicode_test('☃')

''' 
When you look up the names you might get something like this: "E WITH ACUTE, LATIN SMALL LETTER"
Delete the comma and move the last part up front like so: "LATIN SMALL LETTER E WITH ACUTE" to get the correct python-friendly name
'''
print(unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE'))
print(unicodedata.name('\u00e9'))
print('caf\u00e9')

# string len() calculates unicode characters, not bytes
print(len('$'))
print(len('\U0001f47b'))

value="A", name="LATIN CAPITAL LETTER A", value2="A"
value="$", name="DOLLAR SIGN", value2="$"
value="☃", name="SNOWMAN", value2="☃"
é
LATIN SMALL LETTER E WITH ACUTE
café
1
1


### UTF-8
When transferring data in and out of python you need a way to encode char strings to bytes and decode bytes to char strings. UTF-8 uses 1-4 bytes per unicode character:
 * 1 for ASCII
 * 2 for most latin-derived (not cyrillic) languages
 * 3 for the rest of the basic multilingual plane
 * 4 for the rest, including some Asian languages/symbols

UTF-8 is standard in Python, Linux, and HTML. It's always good to ensure your incoming data is encoded in the UTF-8 format.

#### Encoding


In [22]:
snowman = '\u2603'
print(snowman)
print(len(snowman)) # 1 - single unicode character
ds = snowman.encode('utf-8')
print(len(ds)) # 3 - variable-length encoding that uses 3 bytes

try:
    ds = snowman.encode('ascii') # Can't encode with another encoding unless it happens to be in both
except UnicodeEncodeError as err:
    print('Exception: ', err)

ds = snowman.encode('ascii', 'ignore') # the second arg allows you to avoid exceptions and ignore unknown chars
print(ds)
ds = snowman.encode('ascii', 'replace') # or replace with '?'
print(ds)
ds = snowman.encode('ascii', 'backslashreplace') # or replace with escaped chars
print(ds)
ds = snowman.encode('ascii', 'xmlcharrefreplace') # or to make HTML safe strings
print(ds)

☃
1
3
Exception:  'ascii' codec can't encode character '\u2603' in position 0: ordinal not in range(128)
b''
b'?'
b'\\u2603'
b'&#9731;'


#### Decoding

In [27]:
place = 'caf\u00e9'
print(place)
print(type(place),'\n')

place_bytes = place.encode('utf-8')
print(place_bytes)
print(type(place_bytes),'\n')

place2 = place_bytes.decode('utf-8')
print(place2)
print(type(place2))

try:
    place3 = place_bytes.decode('ascii')
except UnicodeDecodeError as err:
    print(err)

café
<class 'str'> 

b'caf\xc3\xa9'
<class 'bytes'> 

café
<class 'str'>
'ascii' codec can't decode byte 0xc3 in position 3: ordinal not in range(128)


Whenever possible in Python use the UTF-8 encoding. It's the best supported and easiest to encode/decode.

### HTML Entities


In [30]:
import html
print(html.unescape("&egrave;")) # Name
print(html.unescape("&#233;")) # Decimal
print(html.unescape("&#xe9;")) # Hex

è
é
é


In [31]:
# You could also import the dictionary of entities and do it by name:
from html.entities import html5
print(html5['egrave'])

è


In [34]:
# To go from Python Unicode to HTML entity:
char = '\u00e9'
dec_value = ord(char)
print(html.entities.codepoint2name[dec_value],'\n')

# Or for strings do 2-step:
place = 'caf\u00e9'
byte_value = place.encode('ascii', 'xmlcharrefreplace')
print(byte_value)
print(byte_value.decode())

eacute 

b'caf&#233;'
caf&#233;


### Normalization
Some Unicode chars can be represented by more than one encoding. 

In [36]:
# Create e with accent by using different unicode methods
eacute1 = 'é' # UTF-8, pasted
eacute2 = '\u00e9' # Unicode code point
eacute3 = '\N{LATIN SMALL LETTER E WITH ACUTE}' # Unicode name
eacute4 = chr(233) # decimal byte value
eacute5 = chr(0xe9) # hex byte val
print(eacute1, eacute2, eacute3, eacute4, eacute5)
print(eacute1 == eacute2 == eacute3 == eacute4 == eacute5)

é é é é é
True


In [43]:
# Build a e with accent by combining two characters so it looks the same
import unicodedata
print(unicodedata.name(eacute1))
print(ord(eacute1))
print(0xe9)
eacute_combined1 = "e\u0301"
eacute_combined2 = "e\N{COMBINING ACUTE ACCENT}"
eacute_combined3 = "e" + "\u0301"
print(eacute_combined1, eacute_combined2, eacute_combined3)
print(eacute_combined1 == eacute_combined2 == eacute_combined3)

LATIN SMALL LETTER E WITH ACUTE
233
233
é é é
True


In [45]:
# though they look the same they are NOT equal
print(eacute1 == eacute_combined1)

# You can fix this with normalize()
eacute_normalized = unicodedata.normalize('NFC', eacute_combined1) # NFC is 'normal form, composed'
print(eacute_normalized == eacute1)


False
True


### Regular Expressions

In [55]:
import re
# match() matches a pattern at the beginning of a string
print(re.match('Space', 'Space Balls'))

# for more complex patterns you can compile them
youpattern = re.compile('Space')
print(youpattern.match('Space Balls'))

# search() finds the first match if any
print(re.search('Ball', 'Space Balls'))

# findall() gives a list of all non-overlapping matches
print(re.findall('a', 'Space Balls'))

# split() splits source at matches with pattern and gives a list of the leftover pieces
print(re.split(' ', 'Space Balls'))

# sub() takes another replacement arg and changes all parts that are matched
print(re.sub('a', 'oo', 'Space Balls'))

<re.Match object; span=(0, 5), match='Space'>
<re.Match object; span=(0, 5), match='Space'>
<re.Match object; span=(6, 10), match='Ball'>
['a', 'a']
['Space', 'Balls']
Spooce Boolls


#### RE Overview
 * \d A single digit
 * \D A single nondigit
 * \w An alphanumeric character
 * \W A non-alphanumeric character
 * \s A whitespace character
 * \S A nonwhitespace character
 * \b A word boundary (between a \w and a \W, in either order)
 * \B A nonword boundary
 *  abc Literal abc
 * ( expr ) expr
 * expr1 | expr2 expr1 or expr2
 * . Any character except \n
 * ^ Start of source string
 * $ End of source string
 * prev ? Zero or one prev
 * prev * Zero or more prev, as many as possible
 * prev *? Zero or more prev, as few as possible
 * prev + One or more prev, as many as possible
 *  prev { m } m consecutive prev
 * prev { m, n } m to n consecutive prev, as many as possible
 * prev { m, n }? m to n consecutive prev, as few as possible
 * [ abc ] a or b or c (same as a|b|c)
 * [^ abc ] not (a or b or c)
 * prev (?= next ) prev if followed by next
 * prev (?! next ) prev if not followed by next
 * (?<= prev ) next next if preceded by prev
 * (?<! prev ) next next if not preceded by prev

In [60]:
import string
# string has a 'printable' testing string with 100 ascii chars
printable = string.printable
print(printable)
print('digits: ', re.findall('\d', printable))
print('non-aplhanum: ', re.findall('\W', printable))
print('spaces: ', re.findall('\s', printable))

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

digits:  ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
non-aplhanum:  ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\x0b', '\x0c']
spaces:  [' ', '\t', '\n', '\r', '\x0b', '\x0c']
