## 2.1 The Python Interpreter
- Executes single statements sequentially
- Run programs with `python /path/to/file.py`


In [1]:
# Assign variables
a = 25
# Get information about Python ver
print(a)
print(copyright)
display(credits)
print(license) # Yes! But how to get the license back? 
# %quickref # Shows Python quick reference
# Get help about a particular object (including functions, etc.) using `?`
a?
a??

25
Copyright (c) 2001-2019 Python Software Foundation.
All Rights Reserved.

Copyright (c) 2000 BeOpen.com.
All Rights Reserved.

Copyright (c) 1995-2001 Corporation for National Research Initiatives.
All Rights Reserved.

Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.
All Rights Reserved.


    Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
    for supporting Python development.  See www.python.org for more information.

See https://www.python.org/psf/license/


## 2.2 IPython Basics
- Type `jupyter notebook` in Anaconda Prompt to open Jupyter notebook
    - Same process to access IDLE
    - Will open in default brower (unless you specify `-- no-browser`)
- Can deploy Jupyter remotely
- Notebooks contain all the code other users need to run them
    - Can download [this notebook](https://github.com/wesm/pydata-book/blob/2nd-edition/ch02.ipynb) (from Chapter 2 of the book) and try
- Can use the Tab key to find functions, function options, methods, attributes, objects, etc. in the namespace
    - Tab completion also works on file paths (in strings)
- Can run programs within notebooks using `%run`
    - Use `%run -i` to allow access to objects already in namespace
- Similarly, use `%load` to import code into cell
- If there is code in the clipboard, can use `%paste` to run
    - `%cpase` prompts instead
- Lots of useful keyboard shortcuts:

| Shortcut     | Action                                        |
| -------------| --------------------------------------------- |
| Ctrl-K       | Delete from cursor to line end                |
| Ctrl-U       | Delete entire current line                    |
| Ctrl-L       | Clear screen                                  |

- "Magic commands" help with certain system functions
    - All start with `%`
    - `%timeit`: How long did it take to execute? 
    - View options with `?` at end
    - `%magic`: List of magic commands

In [2]:
list1 = range(1, 5)
print(list1[0])
print(list1[-1])
import numpy as np
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
df = {i : np.random.randn() for i in range(7)}
display(df)
# Can use tab completion for all kinds of useful things
df?
df??
# Define a function then use it
def add_numbers(i, j): 
    return(i + j)
add_numbers?
add_numbers??
add_numbers(2, 3)
%pwd
path = 'C:/Users/evan.kramer/Documents/CMU/Courses/2020-03/95888 - Data Focused Python/Datasets/usanames.txt'
open(path).readline()
records = [json.loads(line) for line in open('C:/Users/evan.kramer/Documents/CMU/Courses/2020-03/95888 - Data Focused Python/Datasets/usanames.txt')]
#display(records)
print(records[0])
print(records[0]['tz'])
b = [1, 2, 3]
b.append(4)
b.index(2)
b.reverse
b
import datetime
datetime.date?
np.*load*?
np.*mean*?
np.*array*?

a = np.random.randn(10, 10)
%timeit np.dot(a, a)
%timeit?
%magic

%matplotlib inline
plt.plot(np.random.randn(50).cumsum())

1
4


{0: 0.2453177721267194,
 1: 0.9180962032285386,
 2: 1.3290770315012215,
 3: 1.8474988959595962,
 4: -0.2139019958001523,
 5: -0.5325255090472418,
 6: 0.8007981629912254}

IndexError: list index out of range

## 2.3 Python Language Basics

### Semantics
- As with R, use `;` to execute multiple statements on single line
- Arguments to functions either named or positional

In [None]:
a = 5; b = 6; c = 7
d = range(a, c, 1)
print(d)

# Control flow requires indentation and whitespace, not brackets
for n in d:
    print(n)
print(n)

### Assignment
- When two variables refer to the same object, changing one changes the underlying object (and therefore the other variable)
- Objects created within functions are globally accessible

In [None]:
# Objects assigned to other objects are sort of 'relational' (in that they both point to the same object)
a = [1, 2, 3]
b = a
a.append(4)
b
# It also works in reverse
b.remove(3)
print('b is:', b)
print('a is:', a)
print('\n')

# Objects created within functions are globally accessible
def append_element(some_list, element):
    some_list.append(element)
data = [1, 2, 3]
append_element(data, 4)
data
data2 = [range(1, 5, 1)]
print(data2)
append_element(data2, 10)
print(data2)
for i in data2:
    print(i)
print(i)

### Types and Attributes
- Can use `isinstance` to check if  object is of certain type
- Access attributes of objects with `.<attribute_name>`
    - Use `getattr` to access methods/attributes
- Use `is` to check that objects are identical

In [None]:
# Types (and changing types)
a = 5
print(type(a))
a = 'foo'
print(type(a))
print(type(append_element))
append_element = [10, 100, 1000]
print(type(append_element))
#'5' + 5 # but types still matter
print('5', 5, sep = '')
print('\n')
a = 4.5
b = 2
print('a is {0}, b is {1}'.format(type(a), type(b)))
print(a / b)
c = 'character'
d = 1e10
print('c is {0}, d is {1}'.format(type(c), type(d)))
a = 5; b = 4.5 
print(isinstance(a, (int, float))) # check if object a is either an int or float
print(isinstance(b, (int, float))) # check if b is either an int or float
print(isinstance(a, (str, bool))) # but it's not a bool or str
# What is the type of a `type` output? 
print(type(type(a))) # seems `type` is its own class
print('\n')

# Attributes of objects
a = 'foo'
print(a.capitalize())
print(a.find('o'))
print(a.replace('f', 'z'))
print(a.upper())
print(getattr(a, 'split'))
print(a, '\n')

# Duck typing
def isiterable(obj):
    try:
        iter(obj)
        return True
    except TypeError: # if not iterable
        return False
print(isiterable('a string'))
print(isiterable([1, 2, 3]))
print(isiterable(5))
print(isiterable([5])) # so you can make an object iterable by repackaging it as a list
x = 5
print('x is {0}'.format(type(x)))
if not isinstance(x, list) and isiterable(x):
    x = list(x)
print('x is {0}'.format(type(x))) # should this not now be a list? 

### Importing Modules/Code
- Can import objects from other modules

In [None]:
# Importing other modules/code
import some_module
pi = some_module.PI
print(some_module.f(x)) # uses the `f` function defined in some_module and the `x` object in the current workspace.
print(pi)
from some_module import f, g, PI
result = g(5, PI)
print(result)
import some_module as sm
from some_module import PI as pi, g as gf
r1 = sm.f(pi)
r2 = gf(6, pi)
print('r1 is:', r1, '; r2 is:', r2)

### Operators and Comparisons
- `is` checks whether objects are identical
    - `list` always creates new list, so checking existing list object against generalized list will return different results
    - Common to use `is None`, since only one `None` value
    - Can change elements of lists
        - But can't of `tuple`s and `str`s

In [None]:
# Binary operators and comparisons
print(5-7)
print(12 + 21.5)
print(5 <= 2)
# How does it compare strings? Alphabetize?
print('evan' > 'kramer') 
print('evan' > 'nave')
print('evan' == 'nave')
print('spell' < 'spool')
print('foo' < 'food')
print('mahi' * 2)
a = [1, 2, 3]
b = a
c = list(a) # because `list` always creates a new list
print(a is b)
print(a is not c)
print(a == c)
b.append(4)
a.remove(1)
print(a is b)
print(c is not b)
print(c is [1, 2, 3]) # not the same underlying object
print(c == [1, 2, 3]) # but does have the same value(s)
a = None
print(a is None)
print(a == None)
print('\n')

# When can types (not) change?
a_list = ['foo', 2, [4, 5]]
a_list[2] = [3, 4]
print(a_list)
# Could we change the second element not to be a list? Yep!
a_list[2] = 'bar'
print(a_list)
a_tuple = (3, 5, (4, 5))
#a_tuple[1] = 'four' # can't modify strings and tuples
a_string = 'evan kramer'
#a_string[3] = 't'# testing with str

### Numeric Types
- Includes `None`, `str`, `bytes`, `float`, `bool`, `int`
    - Most numbers are `int` or `float`

In [None]:
ival = 17239871
print(ival ** 6)
fval = 7.243
fval2 = 6.78e-5
print(3 / 2) # always yields `float`
print(type(1 / 1))
print(type(100 ** 100))
print(type(100 ** 0.5))
print(100 ** 0.5) # sqrt
print(100 ** 1/2) # order of operations
print(3 // 2) # floor division
print(type(3 // 2))
print(type(123456 // 10)) # always type `int`

### Strings
- Can use `'` or `"`
- Use `'''` or `"""` for multiple-line `str` objects
- Use `c.count` to count characters in a string (including `'\n'`)
- Again, cannot modify `str` (but can reassign or use the `.replace` method)
- Convert objects with `str`

In [None]:
a = 'one way of writing a string'
b = "another way"
c = """
this is a longer string that 
spans multiple lines
"""
print(c.count('\n'))
d = '''
this
is
a
test
of 
strings
in 
Python'''
print(d.translate('Spanish'))
print(d.splitlines())
d.swapcase()

# Can't change strings
a = 'this is a string'
#a[10] = 'f'
# Need to use .replace instead
b = a.replace('string', 'longer string')
print(b)
print(a) # remains the same
# Does it replace all instances? 
print(b.replace('long', 'short'))
print(b.replace('is', "IS")) # yep; and is unsuprisingly case-sensitive)
print('\n')

# Convert using `str()`
a = 5.6
s = str(a)
print(s)
print('a is {0}; s is {1}'.format(type(a), type(s)))

### Unicode and Slicing Strings
- Can subset (but not change) strings as one would lists
    - Can also use `[]`
- `\` is an escape, as in R
    - use `r` (raw) in front of strings with lots of `\` to interpret as is (and therefore avoiding having to retype escape characters over and over again)



In [None]:
# Unicode and special characters
s = 'python'
print(list(s))
print(s[:3])
print(type(s))
print(type(list(s)))
# Would this also work with bracket notation? 
t = ['python']
print(type(t)) # appears so
print('\n')

s = '12\\34' # to use a literal backslash
print(s)
s = r'this\has\no\special\characters'
print(s)


### Concatenating and Formatting
- Use `+` to concatenate
- Various ways to format with `.format` method
    - In example...
        - `{0:.2f}`: float with two decimals
        - `{1:s}`: literal string
        - `{2:d}`: exact integer
    - More details/examples [here](https://docs.python.org/3/tutorial/inputoutput.html) and [here](https://www.digitalocean.com/community/tutorials/how-to-use-string-formatters-in-python-3)

In [None]:
a = 'this is the first half '
b = 'and this is the second half'
print(a + b)
snum = 1615
street = 'Kenyon'
sttype = 'Street'
quad = 'NW'
city = 'Washington'
state = 'DC'
zipc = 20010
print(str(snum) + ' ' + street + ' ' + sttype + ' ' + quad + '\n' + city + ',', state + ' ' + str(zipc))

# Formatting
template = '{0:.02f} {1:s} are worth US${2:d}'
print(template.format(4.5560, 'Argentine Pesos', 1))
print("I've told you {:,} times! No!".format(1000))
print('I paid ${0:.2f} for this shirt'.format(10.01023))
print('I paid ${0:,.2f} for this boat.'.format(123413.413312))
# Phone number example? 
phone = 4152696180
#phone = 1531525
if len(str(phone)) != 10: 
    print('Check that your phone number includes an area code')
else:
    phone = str(phone)
    print('Phone: (', phone[0:3], ') ', phone[3:6], '-', phone[6:], sep = '')

### Bytes, Unicode, and Booleans
- May have characters not in UTF-8
    - Use `decode` and `encode` methods for bytes
    - Use `b` in front of strings for byte literals
- 

In [None]:
val = "español"
print(val)
val_utf8 = val.encode('utf-8')
print(val_utf8)
print(type(val_utf8))
print(val_utf8.decode('utf-8'))
print(val.encode('latin1'))
print(val.encode('utf-16'))
print(val.encode('utf-16le'))
bytes_val = b'this is bytes'
print(bytes_val)
decoded = bytes_val.decode('utf-8') # into Unicode
print(decoded)
print('\n')
print(True and True)
print(False or True)
print(not False)
print(not False and not True)

### Type Casting
- Each type also a function to change object to that type


In [None]:
# Type casting
s = '3.14159'
fval = float(s)
print(type(fval))
print(int(fval))
print(bool(fval))
print(bool(0))
for i in range(-10, 10):
    print(bool(i)) # all except 0
print(bool(0.5)) # also true
print('\n')

# None type
a = None
print(a is None)
b = 5
print(b is not None)
print(list is not None)
print(type is not None)
print('\n')
def add_and_maybe_multiply(a, b, c= None):
    result = a + b
    if c is not None:
        result = result * c
    return result
print(add_and_maybe_multiply(1, 2))
print(add_and_maybe_multiply(1, 2, 3))
print(type(None))

### Dates/Times
- `datetime` module
- `strftime` string formatting
    - More information [here](https://strftime.org/)
- `strptime` for parsing from string
- Intervals with `datetime.timedelta`
- Use `.replace` method to re-set (e.g., time to 0)
- 

In [None]:
from datetime import datetime, date, time
dt = datetime(2011, 10, 29, 20, 30, 21)
print(dt.day)
print(dt.minute)
print(dt.date())
print(dt.time())
print(dt.strftime('%m/%d/%Y %H:%M'))
print(dt.month)
print(dt.strftime('%h %d, %Y %H:%M'))
print(dt.strftime('%A, %B %d, %Y'))
print(dt.strftime('%h %d, %Y %H:%M'))
datetime.now
print(datetime.strptime('20091031', '%Y%m%d'))
print(datetime.strptime('jan1299', '%b%d%y'))
print(dt.replace(minute = 0, second = 0))
print(dt.replace(year = dt.year + 1))
# print(dt.replace(day = dt.day + 7)) # can't do that, as the day is too high for the month
dt2 = datetime(2011, 11, 15, 22, 30)
delta = dt2 - dt
print(delta)
print(type(delta))
print(datetime.strptime('19880513', '%Y%m%d') - datetime.strptime('19870606', '%Y%m%d'))
print(dt)
print(dt + delta)

### Control Flow
- `if`, `elif`, `else`
- Multiple comparisons at once (e.g., `0 < x < 10`)
- Use `continue` to progress iteration; exit with `break`
- Use `pass` as placeholder (need for whitespace)
- Create iterators/sequences with `range`
- Can write one-line `if-else` with `<value-if-true> if <condition> else <value-if-false>` 

In [None]:
x = -20
if x < 0: 
    print("It's negative")
elif x == 0:
    print("Equal to zero")
elif 0 < x < 5:
    print("Positive but smaller than 5")
else:
    print("Positive and larger than or equal to 5")
    
x = [-20, 0, 4, 5, 10]
for i in x:
    if i < 0: 
        print("It's negative")
    elif i == 0:
        print("Equal to zero")
    elif 0 < i < 5:
        print("Positive but smaller than 5")
    else:
        print("Positive and larger than or equal to 5")
        
a = 5; b = 7
c = 8; d = 4
if a < b or c > d:
    print('Made it')
print(4 > 3 > 2 > 1)

sequence = [1, 2, None, 4, None, 5]
total = 0
for value in sequence:
    if value is None:
        continue
    total += value
print(total)
sequence = [1, 2, 0, 4, 6, 5, 2, 1]
total_until_5 = 0
for value in sequence:
    if value == 5:
        break
    total_until_5 += value
print(total_until_5)
for i in range(4):
    for j in range(4):
        if j > i:
            break
        print((i, j))
        
x = 256
total = 0
while x > 0:
    if total > 500:
        break
    total += x
    x = x // 2
    print(x, total)
    
if x < 0: 
    print('negative!')
elif x == 0:
    # TODO: put something smart here
    pass
else: 
    print('positive!')
    
print(range(10))
print(list(range(10)))
print(list(range(0, 20, 2)))
print(list(range(5, 0, -1)))
seq = [1, 2, 3, 4]
for i in range(len(seq)):
    val = seq[i]
    print(val)
print(val)
sum = 0
for i in range(100000):
    # % is the modulo operator
    if i % 3 == 0 or i % 5 == 0:
        sum += i
print(sum)
x = 5
print('Non-negative' if x >= 0 else 'Negative')
# Can there be multiple conditions (a la case_when)?

## Chapter 3
Tuples, lists, dicts, sets

### Lists
- Can modify items (unlike tuples)
    - Add with `append` method
    - `insert` inserts new element(s)
        - High-cost computation (b/c re-indexing)
            - Book suggests `collections.deque` instead
    - `pop` removes element at specified index
    - `remove` removes __first__ matching value
    - Use `in` to check if `val` in `list`

In [None]:
# Contrast with tuples
a_list = [2, 3, 7, None]
tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list
b_list[1] = 'peekaboo'
b_list

# Iteration
gen = range(10)
gen
list(gen)
list(range(0, 42, 1))

# Adding/removing/popping out list elements
b_list.append('dwarf')
b_list
b_list.insert(1, 'red')
b_list
b_list.pop(2)
b_list
b_list.append('foo')
b_list
b_list.remove('foo')
b_list
'dwarf' in b_list
'dwarf' not in b_list
for i in range(10):
    b_list.append('test{0}'.format(i))

### Combining and Arranging Lists
- Add lists with `+`
- Use `extend` to add multiple elements to previously-defined list
    - Less costly than concatenating
- Can specify a `key` with `sort` that tells how to sort results
- Find new element location with `bisect.bisect` and insert with `bisect.insort`
    - Need to check that the list is sorted first

In [None]:
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x
a = [7, 2, 5, 1, 3]
a.sort()
a
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key = len)
b

import bisect
c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c, 2)
bisect.bisect(c, 5)
bisect.bisect(c, 0)
bisect.insort(c, 6)
c

### Slicing
- Can subset or assign
- Omitted index treated as start/end
    - `[<from>:<to>:<by>]`


In [None]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]
seq
seq[3:4] = [6, 3]
seq
seq[:5]
seq[3:]
seq[-4:]
seq[-6:-2] # negative indexed from end
seq[::2] # takes every other
%timeit -n10000000 seq[::-1] # reverses order
%timeit -n10000000 seq.reverse() # which is faster? This one!


### Sequence Functions
- `enumerate` to iterate and note sequence
    - Useful for indexing
- `sorted` 
- `zip` pairs elements and generates list of tuples
    - Only pairs up to index of shortest sequence
    - Can use for multiple iteration (apply?)
    - Can also use to "unzip"
        - `obj1, obj2, ... = zip(*obj_to_unzip)`
            - Ensure it has the right dimensions
    - Presume it will be useful when creating dicts
- `reversed` (opposite of `sorted`)

    

In [None]:
i = 0
for v in range(len(seq)):
    x = i**i
    i += 1
for i, v in enumerate(range(len(seq))):
    #print(i**i)
    pass
#print((x, i))
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list): 
    mapping[v] = i
mapping

sorted([7, 1, 2, 6, 0, 3, 2])
sorted('horse race')

seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
zipped # why this structure? Also just learned that Ctrl-/ comments out the whole line (equivalent of Ctrl-Shift-C in R)
list(zipped)
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))
seq1 = ['Kramer'] * 5
seq2 = ['Michael', 'Marna', 'Evan', 'Allison', 'Jenna']
for i, (last_name, first_name) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, last_name, first_name))
    
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Curt', 'Schilling')]
first_names, last_names = zip(*pitchers)
first_names
last_names

list(reversed(range(10)))

## 3.2 Functions
- Define with `def`
- Positional and/or named arguments
- Use `return` to return from function (otherwise `None` returned)
- Requirement: Positional arguments before keyword

In [None]:
def my_function(x, y, z = 1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)
my_function(5, 6, z=0.7)
my_function(3.14, 7, 3.5)
my_function(10, 20)
my_function(x = 1, y = 2, z= 2.5)

def parse_phone_number(phone):
    phone = str(phone).replace('-', '').replace('(', '').replace(')', '').replace('.', '')
    if(len(phone)) == 7:
        print("Did you forget the area code?")
    elif(len(phone) == 10):
        print('(', phone[0:3], ') ', phone[3:6], '-', phone[-4:], sep = '')
    else:
        print("We only accept 7 or 10 digit phone numbers at this time.\nPlease make sure you entered your phone number correctly.")
parse_phone_number('4()-.152696180')


### Namespaces, Scope, Local Functions
- Local vs. global namespaces
    - Must define with `global` if should exist outside function
- Can `return` multiple values from single function


In [None]:
def func():
    a = []
    for i in range(5):
        a.append(i)
func()
a = []
def func():
    for i in range(5):
        a.append(i)
func()
a
func()
a

a = None
def bind_a_variable():
    global a
    a = []
bind_a_variable()
print(a)

def f(): 
    a = 5
    b = 6
    c = 7
    return a, b, c
a, b, c = f()
f()
return_value = f()
def f():
    a = 5
    b = 6
    c = 7
    return {'a': a, 'b': b, 'c': c}
f()

### Functions as Objects
- Define functions with `def`
- Pass functions as arguments of other functions
- Use `lambda` for anonmyous inline functions
    - Useful for `sort(key = <function>)`, e.g.
    - Can also build off/adapt existing functions

In [None]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']
states
import re
def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result
clean_strings(states)
# Could we condense?
def clean_strings2(strings):
    result = []
    for value in strings:
        result.append(re.sub('[!#?]', '', value).strip().title())
    return result
clean_strings2(states) # Same result with fewer lines of code

def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value) # can loop through functions
        result.append(value)
    return result
clean_strings(states, clean_ops)

for x in map(remove_punctuation, states):
    print(x)
    
def short_function(x):
    return x * 2
equiv_anon = lambda x: x * 2
equiv_anon(100) == short_function(100)
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]
ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

strings = ['foo', 'card', 'bar', 'aaaa', 'abab']
strings.sort(key = lambda x: len(set(list(x))))
strings

def add_numbers(x, y):
    return x + y
add_five = lambda y: add_numbers(5, y)
from functools import partial
add_five = partial(add_numbers, 5)
add_five(25)

### Generators
- Create generator with `yield` in place of `return`

In [None]:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for key in some_dict:
    print(key)
dict_iterator = iter(some_dict)
dict_iterator
list(dict_iterator)
def squares(n = 10):
    print('Generating squares from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        yield i ** 2
gen = squares()
gen
for x in gen: 
    print(x, end = ' ')
# If it were return instead, it would return the results
def squares2(n = 10):
    print('Generating squares from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        return i ** 2
squares2()

# Even more concise
gen = (x ** 2 for i in range(100))
gen
# Same as...
def _make_gen():
    for x in range(100):
        yield x ** 2
gen = _make_gen()

# Examples
# sum(x ** 2 for x in range(100))
dict((i, i **2) for i in range(5))

# Itertools
import itertools
first_letter = lambda x: x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names)) # names is a generator; which means we can't sort by it?
names = str(list(['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']).sort)
for letter, names in itertools.groupby(names, first_letter):
#     print(letter, list(names))
    pass
itertools.combinations?
itertools.combinations(range(5), 3)
list(itertools.combinations(range(5), 3))
itertools.permutations?
list(itertools.permutations(range(5, 0, -1), 2))
list(itertools.product(range(5), range(5)))

### Errors and Exceptions
- Use `try`/`except` blocks for exceptions
    - Specify which kind of error to catch (e.g., `ValueError`, `TypeError`, both, etc.)

In [None]:
float(1.2345)
# float('something')
def attempt_float(x):
    try:
        return float(x)
    except:
        return x
attempt_float(1.2345)
attempt_float('something')

# Distinguishing `valueError` and `typeError`
# float((1, 2))
def attempt_float(x):
    try:
        return float(x)
    except ValueError:
        return x
# attempt_float((1, 2))

def attempt_float(x):
    try:
        return float(x)
    except (ValueError, TypeError):
        return x
attempt_float((1, 2))

# Won't find the path here
f = open(path, 'w')


## 3.3 Files and OS
- Can `open` and `close` files
    - Use `with` to close files automatically when done
    - Use options `w`, `x`, and `rb` for writeable, writeable (prevents overwriting), and read in binary, respectively
        - More in Table 3.3
    - Use `tell` to find current position in file
    - Use `seek` to change position in file

In [None]:
path = 'examples/segismundo.txt'
f = open(path)
for line in f:
    pass
lines = [x.rstrip() for x in open(path)]
# lines = [l.rstrip() for l in f] # open(path) should be the same as f here?
lines
f.close()

with open(path) as f:
    lines = [x.rstrip() for x in f]
lines

f = open(path)
f.read(10)
f2 = open(path, 'rb')
f2.read(10)
f.tell()
f2.tell()

import sys
sys.getdefaultencoding()
f.seek(3)
f.read(1)
f.tell()
f.seek(10)

f.close()
f2.close()
for x in (f, f2):
    x.close
    
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)
with open('tmp.txt') as f: 
    lines = f.readlines()
lines

with open('examples/tmp2.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 25)
with open('examples/tmp2.txt') as f:
    lines = f.readlines()
lines

### Bytes and Unicode with Files
- Use `decode` for text with non-UTF characters

In [9]:
with open(path) as f:
    chars = f.read(10)
chars
with open(path, 'rb') as f: 
    data = f.read(10)
data
data[:4].decode('utf8')
data.decode('utf8')

sink_path = 'sink.txt'
with open(path) as source:
#     with(open(sink_path, 'xt', encoding='iso-8859-1')) as sink:
    with(open(sink_path, 'w', encoding = 'iso-8859-1')) as sink:
        sink.write(source.read())
with open(sink_path, encoding='iso-8859-1') as f:
    print(f.read(10))

f = open(path)
f.read(5)
f.seek(4)
f.read(1)
f.close()


