# First day:  List comprehensions and generators

### List comprehensions

In [1]:
from collections import Counter
import calendar
import itertools
import random
import re
import string

import requests

In [2]:
names = 'pybites mike bob julian tim sara guido'.split()
names

['pybites', 'mike', 'bob', 'julian', 'tim', 'sara', 'guido']

In [3]:
for name in names:
    print(name.title())

Pybites
Mike
Bob
Julian
Tim
Sara
Guido


In [5]:
first_half_alphabet = list(string.ascii_lowercase)[:13]
first_half_alphabet

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm']

In [6]:
new_names = []
for name in names:
    if name[0] in first_half_alphabet:
        new_names.append(name.title())
        
new_names

['Mike', 'Bob', 'Julian', 'Guido']

In [10]:
new_names2 = [n.title() for n in names if n[0] in first_half_alphabet]

In [11]:
assert new_names == new_names2

In [26]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}
resp = requests.get('https://projects.bobbelderbos.com/pcc/harry.txt', headers=headers)

In [28]:
words = resp.text.lower().split()
words[:5]

['the', 'boy', 'who', 'lived', 'mr.']

In [30]:
counter = Counter(words)
counter.most_common(5)

[('the', 202), ('he', 136), ('a', 108), ('and', 100), ('to', 93)]

In [31]:
'-' in words

True

In [33]:
words = [re.sub(r'\W+', r'', word) for word in words]
words[:5]

['the', 'boy', 'who', 'lived', 'mr']

In [34]:
'-' in words

False

In [35]:
'the' in words

True

In [37]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}
resp = requests.get('https://projects.bobbelderbos.com/pcc/stopwords.txt', headers=headers)
stopwords = resp.text.lower().split()
stopwords[:5]
len(stopwords)

319

In [39]:
words = [word for word in words if word.strip() and word not in stopwords]
'the' in words

False

In [40]:
counter = Counter(words)
counter.most_common(5)

[('dursley', 45),
 ('dumbledore', 35),
 ('said', 32),
 ('mr', 30),
 ('professor', 30)]

### Generators

In [41]:
def num_gen():
    for i in range(5):
        yield i

In [42]:
gen = num_gen()

In [43]:
next(gen)

0

In [44]:
for i in gen:
    print(i)

1
2
3
4


In [45]:
next(gen)

StopIteration: 

In [46]:
options = 'red yellow blue white black green purple'.split()
options

['red', 'yellow', 'blue', 'white', 'black', 'green', 'purple']

In [47]:
def create_select_options(options=options):
    select_list = []
    
    for option in options:
        select_list.append(f"<option value={option}>{option.title()}</option>")
        
    return select_list

In [48]:
from pprint import pprint as pp
pp(create_select_options())

['<option value=red>Red</option>',
 '<option value=yellow>Yellow</option>',
 '<option value=blue>Blue</option>',
 '<option value=white>White</option>',
 '<option value=black>Black</option>',
 '<option value=green>Green</option>',
 '<option value=purple>Purple</option>']


In [49]:
def create_select_options_gen(options=options):
    for option in options:
        yield f"<option value={option}>{option.title()}</option>"

In [50]:
create_select_options_gen()

<generator object create_select_options_gen at 0x7f27b4ccf200>

In [51]:
list(create_select_options_gen())

['<option value=red>Red</option>',
 '<option value=yellow>Yellow</option>',
 '<option value=blue>Blue</option>',
 '<option value=white>White</option>',
 '<option value=black>Black</option>',
 '<option value=green>Green</option>',
 '<option value=purple>Purple</option>']

In [52]:
# Performance of list vs generator

# list
def leap_years_lst(n=1000000):
    leap_years = []
    for year in range(1, n+1):
        if calendar.isleap(year):
            leap_years.append(year)
    return leap_years

# generator
def leap_years_gen(n=1000000):
    for year in range(1, n+1):
        if calendar.isleap(year):
            yield year

In [57]:
%timeit -n1 leap_years_lst()

125 ms ± 5.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [58]:
%timeit -n1 leap_years_gen()

The slowest run took 4.98 times longer than the fastest. This could mean that an intermediate result is being cached.
2.03 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
