In [0]:
from collections import Counter
import calendar
import itertools
import random
import re
import string

import requests

**List** **comprehension**

In [1]:
names = 'pybites mike bob julian tim sara guido'.split()
names

['pybites', 'mike', 'bob', 'julian', 'tim', 'sara', 'guido']

In [2]:
for name in names:
    print(name.title())

Pybites
Mike
Bob
Julian
Tim
Sara
Guido


Then I want to only keep the names that start with A-M, the strings module makes it easier:

In [6]:
first_half_alphabet = list(string.ascii_lowercase)[:13]
first_half_alphabet

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm']

In [7]:
new_names = []
for name in names:
    if name[0] in first_half_alphabet:
        new_names.append(name.title())
new_names

['Mike', 'Bob', 'Julian', 'Guido']

In [8]:
new_names2 = [name.title() for name in names if name[0] in first_half_alphabet]
new_names2

['Mike', 'Bob', 'Julian', 'Guido']

In [0]:
assert new_names == new_names2

In [10]:
resp = requests.get('http://projects.bobbelderbos.com/pcc/harry.txt')
words = resp.text.lower().split()
words[:5]

['the', 'boy', 'who', 'lived', 'mr.']

In [11]:
cnt = Counter(words)
cnt.most_common(5)

[('the', 202), ('he', 136), ('a', 108), ('and', 100), ('to', 93)]


We should not count stopwords, also:

In [12]:
'-' in words

True

Let's first clean up any non-alphabetic characters here:

In [13]:
words = [re.sub(r'\W+', r'', word) for word in words]

'-' in words

False

In [14]:
'the' in words

True

Now we will filter those stopwords out, plus the empty strings caused by the previous list comprehension:

In [15]:
resp = requests.get('http://projects.bobbelderbos.com/pcc/stopwords.txt')
stopwords = resp.text.lower().split()
stopwords[:5]

['a', 'about', 'above', 'across', 'after']

In [16]:
words = [word for word in words if word.strip() and word not in stopwords]
words[:5]

['boy', 'lived', 'mr', 'mrs', 'dursley']

In [17]:
'the' in words

False

In [18]:
cnt = Counter(words)
cnt.most_common(5)

[('dursley', 45),
 ('dumbledore', 35),
 ('said', 32),
 ('mr', 30),
 ('professor', 30)]

**Generators**

A generator is a function that returns an iterator. It generates values using the yield keyword, when called with next() (a for loop does this implicitly), and it raises a StopIteration exception when there are no more values to generate. Let's see what this means with a very simple example:

In [0]:
def num_gen():
    for i in range(5):
        yield i
        
gen = num_gen()

In [20]:
next(gen)

0

In [21]:
# It takes off where we left it in the last statement
for i in gen:
    print(i)

1
2
3
4


In [22]:
# There are no more values to generate
next(gen)

StopIteration: ignored

In [0]:

# for catches the exception for us
for i in gen:
    print(i)

In [25]:
options = 'red yellow blue white black green purple'.split()
options

['red', 'yellow', 'blue', 'white', 'black', 'green', 'purple']

In [0]:
def create_select_options_gen(options=options):    
    for option in options:
        yield f'<option value={option}>{option.title()}</option>'

In [28]:
print(create_select_options_gen())

<generator object create_select_options_gen at 0x7f3d641f81a8>


In [29]:
list(create_select_options_gen())

['<option value=red>Red</option>',
 '<option value=yellow>Yellow</option>',
 '<option value=blue>Blue</option>',
 '<option value=white>White</option>',
 '<option value=black>Black</option>',
 '<option value=green>Green</option>',
 '<option value=purple>Purple</option>']