# CSCI E7 Introduction to Programming with Python
## Lecture 06 Jupyter Notebook
Fall 2021 (c) Jeff Parker

# Timing a program

## Use a Jupyter command to time operations¶

In [2]:
%%time

! python ../assignment5/reversals.py ../words.txt 

Wall time: 132 ms


python: can't open file '../assignment5/reversals.py': [Errno 2] No such file or directory


```python
CPU times: user 1.72 s, sys: 475 ms, total: 2.19 s
Wall time: 1min 41s
```

101 seconds

## Shortlist: Remove items we have tested

In [3]:
%%time

! python ../assignment5/reversalsShortlist.py ../words.txt 

Wall time: 101 ms


python: can't open file '../assignment5/reversalsShortlist.py': [Errno 2] No such file or directory


```python
CPU times: user 803 ms, sys: 241 ms, total: 1.04 s
Wall time: 1min 2s
```

62 seconds

### This is faster, but still unacceptable for most users.  

## Hold the words in a Dictionary rather than a list

In [4]:
 %%time

! python ../assignment5/reversalDict.py ../words.txt 

Wall time: 99.4 ms


python: can't open file '../assignment5/reversalDict.py': [Errno 2] No such file or directory


```python
CPU times: user 31.3 ms, sys: 14.3 ms, total: 45.6 ms
Wall time: 2.19 s
```

### 30 times faster than improved version 

# Dictionaries
## Mapping between *keys* and *values*

In [5]:
eng2sp = dict()
print(eng2sp)

{}


In [6]:
eng2sp['one'] = 'uno'
print(eng2sp)

{'one': 'uno'}


## Can test if a key is in the dictionary and return value
The speed of this test gives dictionaries their value

In [7]:
if ('one' in eng2sp):
    print("Found it")
    print(f"one: {eng2sp['one']}")

Found it
one: uno


## Can Add an entry

In [8]:
eng2sp['two'] = 'dos'

## Can iterate over all the keys

In [9]:
for word in eng2sp:
    print(word, eng2sp[word])

one uno
two dos


## Can initialize with multiple entries

In [10]:
eng2sp = {'one': 'uno', 'two' : 'dos', 'three': 'tres'}
print(eng2sp)

{'one': 'uno', 'two': 'dos', 'three': 'tres'}


### What will happen?

In [11]:
print(eng2sp['two'])

dos


In [12]:
print(eng2sp['four'])

KeyError: 'four'

## Catch the error that was generated

In [13]:
key = 'four'

try:
    print(eng2sp[key])
except KeyError:
    print(f"'{key}' was not in the dictionary")

'four' was not in the dictionary


## Dictionaries support len()

In [14]:
len(eng2sp)

3

## Can test if key is in dictionary

We have seen this above...

In [15]:
'one' in eng2sp

True

## ... but not optimized to test for values

In [16]:
'uno' in eng2sp

False

## Can build collection of the dictionary values and test against that
### Search for values isn't as fast as searching for keys

In [17]:
'uno' in eng2sp.values()
print(eng2sp.values())

dict_values(['uno', 'dos', 'tres'])


## Pretty Print

Python will print a dictionary

In [18]:
print(eng2sp)

{'one': 'uno', 'two': 'dos', 'three': 'tres'}


## You can do better than the default

In [19]:
def printDict(d: dict):
    for w in d:
        print(f"{w}: \t{d[w]}")

printDict(eng2sp)

one: 	uno
two: 	dos
three: 	tres


## How are Dictionaries implemented?
### Hashing - look at Lecture Notes

## A Dictionary can hold a list as a Value

In [20]:
d = {}
d['one'] = [1, 2, 3]
print(d)

{'one': [1, 2, 3]}


## But we cannot use a list as key

Keys must be immutable objects, and we can modify a list

In [21]:
lst = [4, 5, 6]
d[lst] = 'two'

TypeError: unhashable type: 'list'

## *Mutable objects have changing hash values*
## *This is unsuitable for the key of a dictionary*

## Application: Counting frequency

We are using the "Look before you Leap" (LBYL) idiom.  

In [22]:
def countChars(text: str) -> dict:
    counts = {}

    for ch in text:
        if (ch in counts):      # LBYL
            counts[ch] = counts[ch] + 1
        else:
            counts[ch] = 1
    return counts

print(countChars('All aboard'))

{'A': 1, 'l': 2, ' ': 1, 'a': 2, 'b': 1, 'o': 1, 'r': 1, 'd': 1}


# Return to the problem of finding reversals

We made very small changes

Replaced list with a dictionary

Program ran much faster - well under a second

The right data structure can make a huge difference

### I have edited the list below to highlight the essential differences

```python
% diff reversal.py reversalDict.py 
1c1
< # reversal.py
---
There are a number of other differences in the comments.  

14,16c16
< # Takes a list and returns a list of lower case words
< # Given  lst = ['art', 'Rat', 'Radar', 'tar', 'vista']
< # returns ['rat', 'radar']
---
> # Takes a list and a Dictionary and returns a list of lower case words
18,19c18,19
< def find_reversals(words: List[str]) -> List[str]:
<     "Look for reversals in a list"
---
> def find_reversals(order: List[str], words: Dict[str, None]) -> List[str]:
>     "Look for reversals in a Dict"
51a52,60
> def build_dict(lst: List[str]) -> Dict[str, None]:
>     "Take list and populate a Dictionary"
>     res = {}
>     
>     for word in lst:
>         res[word] = None
> 
>     return res
>
56,58c65,67
<     lst  = read_file(sys.argv[1])
<     CProfile.run('revs = find_reversals(lst)')
---
>     lst = read(sys.argv[1]))
>     dict = build_dict(lst)
>     cProfile.run('revs = find_reversals(lst, dict)')
```

In [23]:
# reversalDict.py 
#
# Look for reversals in a file of words using a dictionary to store words
# Usage:
#      % python reversalDict.py words.txt
#
# Jeff Parker, July, 2018
#
# This version profiles the work

from typing import List, Dict
import sys
import cProfile


# Takes a list and a Dictionary and returns a list of lower case words
#
def find_reversals(order: List[str], words: Dict[str, None]) -> List[str]:
    "Look for reversals in a Dict"

    # Initialize results
    results = []

    # Look for reversals
    for word in order:
        rev = word[::-1]
        if (word < rev) and (rev in words) and (word not in results):
            results.append(word)

    # Return with results
    return results


def read_file(filename: str) -> List[str]:
    "Read words from a text file and buid a list"  
    res = []

    try:
        with open(filename, 'r') as words:
            for word in words:
                res.append(word.lower().strip())

        return res

    except FileNotFoundError:
        print(f"Could not find file {filename}")
    except:
        print(f"Could not open file {filename}")

    return []

def build_dict(lst: List[str]) -> Dict[str, None]:
    "Take list and populate a Dictionary"
    res = {}
    
    for word in lst:
        res[word] = None

    return res


if (len(sys.argv) != 2):
    print("Usage: python reversals <filename>")
else:
    lst = read(sys.argv[1])
    dict = build_dict(lst)
    cProfile.run('revs = find_reversals(lst, dict)')

    print(len(revs))

    for word in revs:
        print(word, word[::-1])

Usage: python reversals <filename>


# Homework: Matching Parentheses

## You will be asked to match parens: 

In [24]:
#  is_valid_parens('{()[{}]}') should return True
#  is_valid_parens('{()[{}}')  should return False
#
def is_valid_parens(s: str) -> bool:
    pass

## Use a Stack

Please use a stack.  It makes the problem much simpler, and gives you a solution that will let you identify the spot where the problem appears.  

In [25]:
# Walk through example '{()[{}]}'
s = '{()[{}]}'

stack = []     # Holds pending open parens

# Process each character in turn

ch = '{'
stack.append(ch)    # This is an open paren, so Push
print(stack)

ch = '('
stack.append(ch)    # This is an open paren, so Push
print(stack)

ch = ')'
item = stack.pop()   # This is a close paren, so remove the last thing we pushed
print(item, ch)
print(stack)

# Are item and ch a pair? 
# If so, continue
# If not, we don't have a match - return False

# Continue processing the string s 

['{']
['{', '(']
( )
['{']


## What can go wrong?
- Wrong thing on the stack
    - I have a closing paren, and the open paren on stack doesn't match
- Too little on stack
    - I try to pop(), and there is nothing left on stack 
- Too much on stack
    - When the input string is done, there are still open parens on the stack 

## How to guard against popping empty stack?  
- LBYL
- EAFP

# Dictionary updates
How to guard against access errors
- LBYL
- EAFP
- Defaultdict

## What is the problem?

In [26]:
d = {}
print(d)

# Danger, Will Robinson!
d['a'] = d['a'] + 1
print(d)

{}


KeyError: 'a'

##  Look Before You Leap (LBYL) update

In [27]:
d = {}

# Increment count using Look Before You Leap (LBYL)
def record(ch, counts):
    if (ch in counts):
        counts[ch] = counts[ch] + 1
    else:
        counts[ch] = 1
        
print(d)
record('a', d)
print(d)
record('a', d)
print(d)

{}
{'a': 1}
{'a': 2}


## Easier to Ask Forgiveness than Permission (EAFP)

In [28]:
d = {}

# EAFP Easier to Ask Forgiveness than Permission
def record(ch, counts):
    try:
        counts[ch] = counts[ch] + 1
    except KeyError:
        counts[ch] = 1

   
print(d)
record('a', d)
print(d)
record('a', d)
print(d)

{}
{'a': 1}
{'a': 2}


## Defaultdict: no muss, no fuss

Dictionary will populate on access

We can specify default values, but KISS for first example

In [29]:
from collections import defaultdict

d = defaultdict(int)     # Values are integers, starting at default value 0
                         # We can create other types of defaultdict

# Default Dict takes care of initializing to default value
def record(ch, counts):
    counts[ch] = counts[ch] + 1
 
print(d)
record('a', d)
print(d)
record('a', d)
print(d)

defaultdict(<class 'int'>, {})
defaultdict(<class 'int'>, {'a': 1})
defaultdict(<class 'int'>, {'a': 2})


## *Default Dict provided simplest code*

# Program Practice
## Guessing Game
## Introduces Random Numbers and user input

In [None]:
import random

# Initialize random number generator
random.seed()

# Pick a number between 0 and 100
secret = random.randint(1, 99)

print("Guess my number between 0 and 100!")

count    = 0
response = 0

while (response != secret):

    # Get a number from the user
    response = int(input('Guess a number!'))

    count = count + 1

    # Respond to the user
    if (response < secret):
        print("Too low! Guess again!")
    elif (response > secret):
        print("Too high! Guess again!")
    else:
        print("Lucky guess!")

print("It took", count, "guesses")

Guess my number between 0 and 100!


# References
## Start with Scope
## What happens when we change var in a function?

*Notice the use of '=' in the f-string to print the variable name.*

```python
print(f'a = {a}')
print(f'{a = }')   # Better, because it is simpler
```

In [None]:
i = 1

def test(i, s, lst):
    print(f'\t {i = }, {s = }, {lst = }')
    i = i + 1
    s = s[1:]
    lst.pop()
    print(f'\t {i = }, {s = }, {lst = }')
    
 
# Initialize
i = 1
s = 'hello'
lst = ['a', 'b', 'c']
print(f'{i = }, {s = }, {lst = }')

# Call the function
test(i, s, lst)

# See what we have now
print(f'{i = }, {s = }, {lst = }')

## In Python, we pass parameters by reference
If your object is immutable, it remains unchanged.

Strings and integers are immutable.

Lists are mutable.

## *Moral: be careful with a list*

# Deep Copy
What happens when we make a copy?  What do we have in common?

In [None]:
a = [1, 2, 3]
print(f'{a = }')

b = a
print(f'{b = }')

print(f"a == b: {a == b}")
print(f"a is b: {a is b}")

print('\nPop b')
b.pop()
print(f'{a = }')
print(f'{b = }')
print()

print("a == b", a == b)

![Sharing](img/figure1.jpg)

In [None]:
a == b

In [None]:
a is b

## Make a copy of list a

In [None]:
a = [1, 2, 3]
print(f'{a = }')

b = a[:]         # Make a copy of a
print(f'{b = }')
print()

print(f"a == b: {a == b}")
print(f"a is b: {a is b}")

print('\nPop b')
b.pop()
print(f'{a = }')
print(f'{b = }')
print()

print("a == b", a == b)

![Copy](img/figure2.jpg)

In [None]:
a == b

In [None]:
a is b

## What about nested lists?

In [None]:
a = [1, [2, 3]]
print(f'{a = }')

b = a[:]         # Make a copy of a
print(f'{b = }')
print()

print("a == b", a == b)
print("a is b", a is b)

print('\nPop b')
b.pop()
print(f'{a = }')
print(f'{b = }')

![Copy](img/figure3.jpg)

In [None]:
a == b

In [None]:
a is b

## Now modify the nested sublist

In [None]:
a = [1, [2, 3]]
print(f'{a = }')

b = a[:]         # Make a copy of a
print(f'{b = }')
print()

print("a == b:", a == b)
print("a is b:", a is b)

print('\nPop b')
b[1].pop()
print(f'{a = }')
print(f'{b = }')

# What?!@!

In [None]:
a = [1, [2, 3]]
print(f'{a = }')

b = a[:]         # Make a copy of a
print(f'{b = }')
print()

print('*****************')
print("a[1] == b[1]:", a[1] == b[1])
print("a[1] is b[1]:", a[1] is b[1])
print('*****************')

print('\nPop b')
b[1].pop()
print(f'{a = }')
print(f'{b = }')

### *The lists are different, but they share a common sublist - see figure*

## Try it again

In [None]:
a = [1, 'a', 'b', 'c', [2, 3]]
print(f'{a = }')

b = a[:]         # Make a copy of a
print(f'{b = }')
print()

print("a[-1] == b[-1]:", a[-1] == b[-1])
print("a[-1] is b[-1]:", a[-1] is b[-1])

print('\nPop b')
b[-1].pop()
print(f'{a = }')
print(f'{b = }')

# Deep copy
We need more than a superficial copy

In [None]:
import copy

a = [1, 'a', 'b', 'c', [2, 3]]
print(f'{a = }')

b = copy.deepcopy(a)
print(f'{b = }')
print()

print("a[-1] == b[-1]", a[-1] == b[-1])
print("a[-1] is b[-1]", a[-1] is b[-1])

print('\nPop b')
b[-1].pop()
print(f'{a = }')
print(f'{b = }')

![Deep Copy](img/figure4.jpg)

## Not enough to copy: sometimes we need deep copy

# Sets

###  Unordered, mutable collection of unique & immutable objects.
- Unique = no duplicate elements
- Immutable = no mutable elements (e.g., no lists)

In [None]:
s = set()
print(s)

In [None]:
s = {'Alpha', 'Beta', 'Alpha'}
print(type(s))
print(s)

In [None]:
s = set('She sells sea shells')
print(s)

In [None]:
t = (1, 2, 3, 1)
print(t)
print(type(t))

In [None]:
t = (1, 2, 3, 1)
s = set(t)
print(s)

In [None]:
s = set([1, 2, 3], [4, 5, 6])
print(s)

In [None]:
s = set([1, 2, 3])
print(s)

In [None]:
s = set()
s.add('a')
print(s)
s.add('b')
print(s)

## Can we use sets as a key?

In [None]:
d = {}

d[s] = 1
print(d)

## Let's freeze it!

https://www.youtube.com/watch?v=wrfOpXApxYQ 

In [None]:
s = frozenset(s)
print(s)

In [None]:
d = {}

d[s] = 1
print(d)

In [None]:
s.add('c')

In [None]:
hash('adam')

In [None]:
hash(set('adam'))

In [None]:
hash(frozenset('adam'))

# Excel Files in Comma Separated Value form
## Banklist.csv - CSV file with Failed Banks

In [None]:
! head banklist.csv  # Unix command to show first lines of the CSV file banklist

```python
Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date
Fayette County Bank,Saint Elmo,IL,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17
"Guaranty Bank, (d/b/a BestBank in Georgia & Michigan) ",Milwaukee,WI,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17
First NBC Bank,New Orleans,LA,58302,Whitney Bank,28-Apr-17,26-Jul-17
Proficio Bank,Cottonwood Heights,UT,35495,Cache Valley Bank,3-Mar-17,18-May-17
Seaway Bank and Trust Company,Chicago,IL,19328,State Bank of Texas,27-Jan-17,18-May-17
Harvest Community Bank,Pennsville,NJ,34951,First-Citizens Bank & Trust Company,13-Jan-17,18-May-17
Allied Bank,Mulberry,AR,91,Today's Bank,23-Sep-16,17-Nov-16
The Woodbury Banking Company,Woodbury,GA,11297,United Bank,19-Aug-16,1-Jun-17
First CornerStone Bank,King of Prussia,PA,35312,First-Citizens Bank & Trust Company,6-May-16,6-Sep-16
```

In [None]:
## What are the columns?
! head -1 banklist.csv  # Unix command to show first line of the CSV file banklist

In [None]:
from collections import defaultdict

f = open('banklist.csv', 'rt')
try:
    d = defaultdict(int)
    for row in f:       # Get a line
        row = row.split(',') 
        state = row[2]  # Get state
        d[state] = 1 + d[state]
except:
    print("Musta goofed up somewhere!")
    d = {}
    
print(len(d))

## What?!@#$
### *How can we have 81 states?*

In [None]:
print(d)

## Quick check - Let's look at some of those odd states

```python
    # Check for states that aren't 2 characters long
    if (len(state) != 2):         
        print(state, row)
```

In [None]:
from collections import defaultdict

f = open('banklist.csv', 'rt')
try:
    d = defaultdict(int)
    for row in f:  # Get a line
        row = row.split(',') 
        state = row[2]  # Get state
        
        # Check for states that aren't 2 characters long
        if (len(state) != 2):         
            print(f"State: {state} \n\t Row: {row}\n")
          
        d[state] = 1 + d[state]
except:
    print("Musta goofed up somewhere!")
    d = {}
                        
print(len(d))

## What are we dealing with?

In [None]:
# What is going on with the Guaranty Bank of Milwaukee?
! grep 'Guaranty Bank' banklist.csv

```python
"Guaranty Bank, (d/b/a BestBank in Georgia & Michigan) ",Milwaukee,WI,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17
...
```
## Why is the state Milwaukee, rather than WI?

## And where did the quotes come from?

## Look at simple file with three contacts

There are no quotation marks in the Excel xls file.   

![csvfile](img/Addresses.jpg)

In [None]:
## Export as a csv and take a peek

! more Addresses.csv

```python
<U+FEFF>Number,Name,Address
1,"Karpathy, Zoltan",123 Main Street
2,"Gates, Bill","456 MS Blvrd, Redmond, Washington"
3,"Joker, The",
Addresses.csv (END)
```
### *Note <U+FEFF> at head.  We will revisit when we Pickle*

## If a cell has a comma, Excel wraps cell as a string
When Excel exports to CSV, wraps cells with commas in quotes.

When Excel imports a CSV file, treats blocks in quotes as one cell.

## Import without precaution

In [None]:
f = open('Addresses.csv', 'rt')
try:
    for row in f:  # Get a line
        row = row.strip()
        print(row)                # What does the row look like?
        lst = row.split(',') 
        print(lst, "\n")          # How did it wind up after split?
except:
    print("Musta goofed up somewhere!")

```python
2,"Gates, Bill","456 MS Blvrd, Redmond, Washington"
['2', '"Gates', ' Bill"', '"456 MS Blvrd', ' Redmond', ' Washington"'] 
```
### *There are 6 elements of the list, even though it only took 3 cells*

## We are splitting at the ',' as requested
```python
2, "Gates, Bill","456 MS Blvrd, Redmond, Washington"  

2 | "Gates | Bill" | "456 MS Blvrd | Redmond | Washington" 
```

## The CSV reader knows how to deal with this

Read the file with csv reader - see line 8

```python
    reader = csv.reader(f)
    for row in reader:
```

In [3]:
# Introduce CSV reader
import csv   # Comma Separated Values

import sys   # Read command line
from collections import defaultdict

f = open('banklist.csv', 'rt')
try:
    d = defaultdict(int)
    reader = csv.reader(f)
    for row in reader:
        state = row[2]
        if (len(state) != 2):
            print(row)
        d[state] = 1 + d[state]
except:
    print("Musta goofed up somewhere!")
                        
print(len(d))

45


### Only 45 states

In [4]:
print(max(d))

WY


In [5]:
print(d[WY])

NameError: name 'WY' is not defined

In [6]:
print(d['WY'])

1


### *Wyoming doesn't have many failures: it is just last alphabetically*

In [None]:
mx = 0
st = ''
for state in d:
    if d[state] > mx:
        mx = d[state]
        st  = state
        
print(mx, st)

## We can sort the list.  

In [None]:
lst = []
for state in d:
    lst.append([state, d[state]])
    
print(sorted(lst))

## Hmmm.  Put frequency first to get useful order
### List the states in order by the number of failures

In [None]:
lst = []
for state in d:
    lst.append([d[state], state])
    
print(sorted(lst))

In [None]:
print(sorted(lst, reverse=True))

## Note that one of the states is 'ST'

```python
[[1, 'HI'], [1, 'MA'], [1, 'NH'], [1, 'SD'], [1, 'ST'], ....
```

### This is a bug: We are treating the first line as valid input

```python
Bank Name,City, ST, CERT,Acquiring Institution,Closing Date,Updated Date

Fayette County Bank,Saint Elmo,IL,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17
```

## Don't take first row

### *Edit after recording*

We haven't checked for errors above, and we are treating ST as a valid State

In [None]:
def read_dict(filename):
    "Read in a CSV file, count instances of each Zip code"

    try:                             # Try to open file: EAFP
        f = open(filename, 'rt')
    except FileNotFoundError:
        print(f"Could not find '{filename}'")
        return {}

    try:                             # Wrap file in CSV reader
        reader = csv.reader(f)

        # Dictionary as Counter Design Pattern
        # Create a Default Dictionary to hold count for each state Code
        d = defaultdict(int)

        # Get a new line from File
        first = True
        for row in reader:
            if not first:
                # State is third element
                state = row[2]

                # Count this state code
                d[state] = 1 + d[state]

            first = False

        f.close()
        return d

    except:
        print(f"Attempted to read '{filename}' as CSV file")
        f.close()
        return {}

## Compare the best and the worst

In [None]:
# Iterate over the dictionary and build a list with [count, state]
res = [[d[state], state] for state in d]

# Display the best states: those with lowest count
res.sort()
for pair in res[:10]:
    print(pair)

print()

# Display the worst states
for pair in res[-10:]:
    print(pair)

## We will read CSV files in a future homework
There are many datasets published as CSV files

Good example of the utility of Python Libraries

# WTF
# How does Python implement dictionaries?

https://www.laurentluce.com/posts/python-dictionary-implementation/

Also see Raymond Hettinger video in Person, Place, or Thing