In [1]:
# "Unicode provides a unique number for every character, no matter what the platform, no matter what the program,
# no matter what the language." -The Unicode Consortium

# Python 3 strings are Unicode strings.

In [2]:
# Create a function that:
# -Takes a Python Unicode character
# -Looks up its name
# -Looks up the character again from the name (confirmation step)

def unicode_test(value):
    import unicodedata
    name = unicodedata.name(value)
    value2 = unicodedata.lookup(name)
    print('value="%s", name="%s", value2="%s"' % (value, name, value2))

In [3]:
unicode_test('A')

value="A", name="LATIN CAPITAL LETTER A", value2="A"


In [4]:
unicode_test('$')

value="$", name="DOLLAR SIGN", value2="$"


In [5]:
unicode_test('\u00a2') # Specify by Unicode ID

value="¢", name="CENT SIGN", value2="¢"


In [6]:
unicode_test('\u20ac')

value="€", name="EURO SIGN", value2="€"


In [7]:
unicode_test('\u2603')

value="☃", name="SNOWMAN", value2="☃"


In [9]:
import unicodedata # Need to import because outside of function.

unicodedata.name('\u00e9')

'LATIN SMALL LETTER E WITH ACUTE'

In [10]:
unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')

'é'

In [11]:
# Use Latin small letter e with acute in a string by calling Unicode ID.

place = 'caf\u00e9'
place

'café'

In [13]:
# Use Latin small letter e with acute in a string by calling name.

other_place = 'caf\N{LATIN SMALL LETTER E WITH ACUTE}'
other_place

'café'

In [15]:
# Build a string with a Unicode character by appending.

u_umlaut = '\N{LATIN SMALL LETTER U WITH DIAERESIS}'
u_umlaut

'ü'

In [17]:
drink = 'Gew' + u_umlaut + 'rztraminer'

print('Now I can finally have my', drink, 'in a ', place)

Now I can finally have my Gewürztraminer in a  café


In [18]:
# "The string len function counts Unicode characters, not bytes."

len('$')

1

In [19]:
len('\U0001f47b')

1

In [20]:
unicode_test('\U0001f47b')

value="👻", name="GHOST", value2="👻"


In [21]:
# "UTF-8 is the standard text encoding in Python, Linux, and HTML...If you use UTF-8 encoding throughout
# your code, life will be much easier than trying to hop in and out of various encodings." Provides:
# - A way to encode character strings to bytes
# - A way to decode bytes to character strings

In [None]:
# encode()
# - First argument is encoding name
# - Second argument helps to avoid exceptions; default is 'strict'
#   but can use 'ignore' (throw away anything that won't encode)
#   'replace' (replace anything that won't encode with a '?')
#   'backslashreplace' (produce a Python Unicode character string)
#   'xmlcharrefreplace' (produce character entity strings for use in web pages)

In [22]:
snowman = '\u2603'

In [23]:
len(snowman)

1

In [24]:
ds = snowman.encode('utf-8') # Encode Unicode snowman character to byte sequence

In [25]:
len(ds)

3

In [26]:
ds

b'\xe2\x98\x83'

In [27]:
snowman.encode('ascii', 'ignore')

b''

In [28]:
snowman.encode('ascii', 'replace')

b'?'

In [29]:
snowman.encode('ascii', 'backslashreplace')

b'\\u2603'

In [30]:
snowman.encode('ascii', 'xmlcharrefreplace')

b'&#9731;'

In [31]:
# decode()
# Text from external sources is encoded as byte strings,
# so we need to know which encoding was used in the first place, to eventually get Unicde strings.

In [33]:
place = 'caf\u00e9'
place

'café'

In [34]:
type(place)

str

In [35]:
place_bytes = place.encode('utf-8') # Encode in a bytes variable
place_bytes

b'caf\xc3\xa9'

In [36]:
type(place_bytes)

bytes

In [37]:
place2 = place_bytes.decode('utf-8') # Decode byte string back to a Unicode string
place2

'café'

In [38]:
# Formatting strings: Old style with %s
# - "The %s inside the string means to interpolate a string."
# - "The number of % appearances in the string needs to match the number of data items after the %."
# - Single data item goes right after the %; group multiple items into a tuple with format (item1, item2)
# - "You can add other values between the % and the type specifier 
#   to designate minimum and maximum widths, alignment, and character filling."

In [39]:
'%s' % 42 # Format as string

'42'

In [40]:
'%d' % 42 # Format as decimal integer

'42'

In [41]:
'%x' % 42 # Format as hex integer

'2a'

In [42]:
'%o' % 42 # Format as octal integer

'52'

In [43]:
'%s' % 7.03 # Format float as string

'7.03'

In [44]:
'%f' % 7.03 # Format float as decimal float

'7.030000'

In [45]:
'%e' % 7.03 # Format float as exponential float

'7.030000e+00'

In [46]:
'%g' % 7.03 # Format float as decimal OR exponential float

'7.03'

In [47]:
'%d%%' % 100 # An integer and a literal '%'

'100%'

In [48]:
actor = 'Richard Gere'
cat = 'Chester'
weight = 28

In [49]:
"My wife's favorite actor is %s." % actor

"My wife's favorite actor is Richard Gere."

In [50]:
"My cat, %s, weighs %s pounds." % (cat, weight)

'My cat, Chester, weighs 28 pounds.'

In [51]:
n = 42
f = 7.03
s = 'string cheese'

In [52]:
'%d %f %s' % (n,f,s) # Format as decimal integer, float, and string with default widths

'42 7.030000 string cheese'

In [55]:
# Min field width 10 chars per variable, right align and fill unused chars with spaces
'%10d %10f %10s' % (n,f,s)

'        42   7.030000 string cheese'

In [56]:
# Same field width, left align
'%-10d %-10f %-10s' % (n,f,s)

'42         7.030000   string cheese'

In [54]:
# Field width 10 chars, max char width 4 (truncate string, limit chars after decimal in float), right align
'%10.4d %10.4f %10.4s' % (n,f,s) 

'      0042     7.0300       stri'

In [57]:
# Default field width, max char width 4, right align
'%.4d %.4f %.4s' % (n,f,s)

'0042 7.0300 stri'

In [59]:
# Get field widths from arguments instead of hard-coding.
'%*.*d %*.*f %*.*s' % (10,4,n,10,4,f,10,4,s)

'      0042     7.0300       stri'

In [61]:
# New-style formatting with {} and format, as recommended in Python 3.

# Simplest usage includes spacing as entered, default formatting
'{} {} {}'.format(n,f,s)

'42 7.03 string cheese'

In [62]:
# Specify the order in which arguments appear in formatted string (default formatting)
# Here, the third argument named appears first, etc

'{2} {0} {1}'.format(f,s,n)

'42 7.03 string cheese'

In [64]:
# Named arguments with specifiers included, default formatting
'{n} {f} {s}'.format(n=42, f=7.03, s='string cheese')

'42 7.03 string cheese'

In [66]:
# Arguments combined into a dictionary
# {0} is entire dictionary, {1} is the string 'other'
# Default formatting
d = {'n':42, 'f':7.03, 's':'string cheese'}
'{0[n]} {0[f]} {0[s]} {1}'.format(d, 'other')

'42 7.03 string cheese other'

In [67]:
# New-style formatting specifications and positional arguments
'{0:d} {1:f} {2:s}'.format(n,f,s)

'42 7.030000 string cheese'

In [68]:
# Same values as named arguments, new-style formatting
'{n:d} {f:f} {s:s}'.format(n=42, f=7.03, s='string cheese')

'42 7.030000 string cheese'

In [69]:
# Min field width 10, right-align by default
'{0:10d} {1:10f} {2:10s}'.format(n,f,s)

'        42   7.030000 string cheese'

In [70]:
# Min field width 10, explicit right-align with >
'{0:>10d} {0:>10f} {2:>10s}'.format(n,f,s)

'        42  42.000000 string cheese'

In [71]:
# Min field width 10, left-align
'{0:<10d} {1:<10f} {2:<10s}'.format(n,f,s)

'42         7.030000   string cheese'

In [72]:
# Min field width 10, centered
'{0:^10d} {1:^10f} {2:^10s}'.format(n,f,s)

'    42      7.030000  string cheese'

In [73]:
# Cannot use precision value (after decimal point) for integers with new-style formatting,
# but can still use it to specify number of digits after the decimal for floats, 
# and max characters for strings.

# Will return error
'{0:>10.4d} {1:>10.4f} {2:>10.4s}'.format(n,f,s)

ValueError: Precision not allowed in integer format specifier

In [74]:
'{0:>10d} {1:>10.4f} {2:>10.4s}'.format(n,f,s)

'        42     7.0300       stri'

In [76]:
# Fill character lets you add any character to pad output fields
# Enter after the colon, before, any alignment or width specifiers

'{0:*^20s}'.format('BIG SALE')

'******BIG SALE******'

In [77]:
# Matching with regex
# Methods used with re
# - match(): Exact matching
# - search(): Return the first match, if any
# - findall(): Return a list of all non-overlapping matches, if any
# - split(): Split source at matches with pattern and return list of string pieces
# - sub(): Takes a replacement arg, changes all parts of source that match pattern to replacement

In [2]:
import re

source = 'Young Frankenstein'
m = re.match('You', source)

In [79]:
if m:
    print(m.group()) # If match returns an object, see what matched

You


In [80]:
m = re.match('^You', source) # Start anchor same as default match

if m:
    print(m.group())

You


In [81]:
# Default behavior won't find pattern that doesn't occur at beginning of source

m = re.match('Frank', source)

if m:
    print(m.group())

In [82]:
m = re.search('Frank', source)

if m:
    print(m.group())

Frank


In [84]:
m = re.match('.*Frank', source) # Add .* to match anything before pattern

if m:
    print(m.group())

Young Frank


In [5]:
m = re.findall('n', source)
# m
print('Found', len(m), 'matches.')

Found 4 matches.


In [6]:
m = re.findall('n.', source) # 'n', followed by any character; will not match final 'n'
m

['ng', 'nk', 'ns']

In [7]:
m = re.findall('n.?', source) # 'n', optionally followed by any character; will match final 'n'
m

['ng', 'nk', 'ns', 'n']

In [8]:
m = re.split('n', source)
m

['You', 'g Fra', 'ke', 'stei', '']

In [9]:
m = re.sub('n', '?', source)
m

'You?g Fra?ke?stei?'

In [10]:
# Exploring special characters with printable

import string
printable = string.printable

In [11]:
len(printable)

100

In [12]:
printable[0:50]

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'

In [13]:
printable[50:]

'OPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [14]:
# Find digits in printable

re.findall('\d', printable)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [15]:
# Find digits, letters, or an underscore in printable

re.findall('\w', printable) # Special character for alphanumeric characters

['0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 '_']

In [16]:
# Find space characters in printable

re.findall('\s', printable)

[' ', '\t', '\n', '\r', '\x0b', '\x0c']

In [17]:
# Test source string made up of
# - Three ASCII letters
# - Three punctuation symbols that should not match a \w.
# - A Unicode LATIN SMALL LETTER E WITH CIRCUMFLEX
# - A Unicode LATIN SMALL LETTER E WITH BREVE

x = 'abc' + '-/*' + '\u00ea' + '\u0115'

In [18]:
re.findall('\w', x)

['a', 'b', 'c', 'ê', 'ĕ']

In [19]:
# Using main pattern specifiers for regex

# ^ anchors the search to the beginning of the source
# $ anchors the search to the end of the source
# .$ matches any character at the end of the line

source = '''I wish I may, I wish I might have a dish of fish tonight.'''

In [20]:
re.findall('wish', source)

['wish', 'wish']

In [21]:
re.findall('wish|fish', source)

['wish', 'wish', 'fish']

In [22]:
re.findall('^wish', source)

[]

In [23]:
re.findall('^I wish', source)

['I wish']

In [24]:
re.findall('fish$', source)

[]

In [25]:
re.findall('fish tonight.$', source)

['fish tonight.']

In [26]:
# Escape the dot at the end of the source to match it literally.

re.findall('fish tonight\.$', source)

['fish tonight.']

In [27]:
re.findall('[wf]ish', source)

['wish', 'wish', 'fish']

In [28]:
re.findall('[wsh+]', source)

['w', 's', 'h', 'w', 's', 'h', 'h', 'h', 's', 'h', 's', 'h', 'h']

In [29]:
re.findall('ght\W', source) # Find 'ght' followed by non-alphanumeric character

['ght ', 'ght.']

In [30]:
re.findall('I (?=wish)', source) # Find 'I' followed by 'wish'

['I ', 'I ']

In [31]:
re.findall('(?<=I) wish', source) # Find 'wish' preceded by 'I'

[' wish', ' wish']

In [32]:
re.findall('\bfish', source) # Will not return results because regex rules conflict with Python string rules

[]

In [33]:
re.findall(r'\bfish', source) # Use Python raw string to define regex string

['fish']

In [34]:
m = re.search(r'(. dish\b).*(\bfish)', source)

In [35]:
m.group()

'a dish of fish'

In [36]:
m.groups()

('a dish', 'fish')

In [38]:
m = re.search(r'(?P<DISH>. dish\b).*(?P<FISH>\bfish)', source) # Match expression and save in group with NAME

In [39]:
m.group()

'a dish of fish'

In [40]:
m.groups()

('a dish', 'fish')

In [41]:
m.group('DISH')

'a dish'

In [42]:
m.group('FISH')

'fish'

In [43]:
# Binary data basics

In [44]:
# Two types of sequence of eight-bit integers, with possible values 0 to 255:
# - bytes: Immutable, like a tuple of bytes
# - bytearray: Mutable, like a list of bytes


In [45]:
# Create a bytes variable called the_bytes and a bytearray variable called the_byte_array

blist = [1,2,3,255]

In [47]:
the_bytes = bytes(blist)
the_bytes

b'\x01\x02\x03\xff'

In [48]:
the_byte_array = bytearray(blist)
the_byte_array

bytearray(b'\x01\x02\x03\xff')

In [49]:
the_bytes[1] = 127 # Prove that bytes variable is immutable

TypeError: 'bytes' object does not support item assignment

In [51]:
the_byte_array[1] = 127 # Prove that bytes array variable is mutable
the_byte_array

bytearray(b'\x01\x7f\x03\xff')

In [54]:
# Two ways to create a 256-element result, with values from 0 to 255

# "When printing bytes or bytearray data, Python uses \xXX for non-printable bytes
# and their ASCII equivalents for printable ones (plus some common escape characters, such as \n instead of \x0a)."

the_bytes = bytes(range(0,256))
the_bytes

b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'

In [53]:
the_byte_array = bytearray(range(0,256))
the_byte_array

bytearray(b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff')

In [55]:
# Convert binary data to and from Python data structures with struct

import struct

valid_png_header = b'\x89PNG\r\n\x1a\n'
data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR' + \
    b'\x00\x00\x00\x9a\x00\x00\x00\x8d\x08\x02\x00\x00\x00\xc0'

In [56]:
if data[:8] == valid_png_header:
    width, height = struct.unpack('>LL', data[16:24]) # Width extracted from bytes 16:20, height from 21:24
    # >LL is format string that tells unpack how to interpret byte sequences and assemble into Python data types.
    # > means integers are stored in big-endian format
    # Each L specifies a four-byte, unsigned long integer
    print('Valid PNG, width', width, 'height', height)
else:
    print('Not a valid PNG.')

Valid PNG, width 154 height 141


In [57]:
data[16:20]

b'\x00\x00\x00\x9a'

In [61]:
data[20:24]

b'\x00\x00\x00\x8d'

In [62]:
0x9a

154

In [63]:
0x8d

141

In [64]:
# Convert Python data to bytes

import struct

struct.pack('>L', 154)

b'\x00\x00\x00\x9a'

In [65]:
struct.pack('>L', 141)

b'\x00\x00\x00\x8d'

In [66]:
# Use a count prefix instead of specifying number of endian specifiers

struct.unpack('>2L', data[16:24])

(154, 141)

In [67]:
# Indirectly grab the 'interesting bytes'
# - Use big-endian integer format
# - Skip 16 bytes (16x)
# - Read 8 bytes, two unsigned long integers (2L)
# - Skip the final 6 bytes (6x)

struct.unpack('>16x2L6x', data)

(154, 141)

In [73]:
! pip install construct

Collecting construct
  Downloading construct-2.8.16.tar.gz (63kB)
[K    100% |████████████████████████████████| 71kB 350kB/s 
[?25hBuilding wheels for collected packages: construct
  Running setup.py bdist_wheel for construct ... [?25l- \ | / - \ | / - \ | / - \ done
[?25h  Stored in directory: /Users/johannaanderson/Library/Caches/pip/wheels/ce/d7/e3/20505fd7784811146af9dab9ca989121c9b7a8a92cfa5d5e91
Successfully built construct
Installing collected packages: construct
Successfully installed construct-2.8.16
[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [79]:
import construct
from construct import Struct, Const, String # Cannot import Magic, UBInt32
from construct import Magic

ImportError: cannot import name 'Magic'

In [80]:
# Import issues with construct cause this to fail.

fmt = Struct('png',
            Magic(b'\x89PNG\r\n\x1a\n'),
            UBInt32('length'),
            Const(String('type', 4), b'IHDR'),
            UBInt32('width'),
            UBInt32('height')
            )

NameError: name 'Magic' is not defined

In [81]:
# Convert between bytes and various string representations (e.g., hex)

import binascii

In [82]:
valid_png_header = b'\x89PNG\r\n\x1a\n'
print(binascii.hexlify(valid_png_header))

b'89504e470d0a1a0a'


In [83]:
print(binascii.unhexlify(b'89504e470d0a1a0a'))

b'\x89PNG\r\n\x1a\n'


In [84]:
# Things to do

In [90]:
# 7.1 "Create a Unicode string called mystery and assign it the value '\U0001f4a9'. Print mystery.
# Look up the Unicode name for mystery."

import unicodedata

mystery = '\U0001f4a9'
mystery
unicodedata.name(mystery)

'PILE OF POO'

In [91]:
# 7.2 "Encode mystery, this time using UTF-8, into the bytes variable pop_bytes. Print pop-bytes."

pop_bytes = mystery.encode('utf-8')
pop_bytes

b'\xf0\x9f\x92\xa9'

In [93]:
# 7.3 "Using UTF-8, decode pop-bytes into the string variable pop_string. Print pop_string. 
# Is pop_string equal to mystery?"

pop_string = pop_bytes.decode('utf-8')
pop_string
pop_string == mystery

True

In [96]:
# 7.4 "Write the following poem by using old-school formatting.
# Substitute the strings 'roast beef', 'ham', 'head', and 'clam' into [specified] string."

beef = 'roast beef'
ham = 'ham'
head = 'head'
clam = 'clam'

"My kitty cat likes %s, \
My kitty cat likes %s, \
My kitty cat fell on his %s, \
And now thinks he's a %s." % (beef, ham, head, clam)

"My kitty cat likes roast beef, My kitty cat likes ham, My kitty cat fell on his head, And now thinks he's a clam."

In [98]:
# Book answer for 7.4

poem = '''
    My kitty cat likes %s,
    My kitty cat likes %s,
    My kitty cat fell on his %s,
    And now thinks he's a %s.
    '''

args = ('roast beef', 'ham', 'head', 'clam')
print(poem % args)


    My kitty cat likes roast beef,
    My kitty cat likes ham,
    My kitty cat fell on his head,
    And now thinks he's a clam.
    


In [107]:
# 7.5 "Write a form letter by using new-style formatting."

letter = '''
    Dear {salutation} {name},
    
    Thank you for your letter. We are sorry that our {product} {verbed} in your {room}.
    Please note that it should never be used in a {room}, especially near any {animals}.
    
    Send us your receipt and {amount} for shipping and handling.
    We will send you another {product} that, in our tests, is {percent}% less likely to have {verbed}.

    Thank you for your support.
    
    Sincerely, 
    {spokesman}
    {job_title}
    '''

In [108]:
# 7.6 "Make a dictionary called response with values for the string keys 
# 'salutation', 'name', 'product', 'verbed' (past tense verb), 'room', 'animals', 
# 'amount', 'percent', 'spokesman', and 'job_title'. Print letter with values from response."

response = {'salutation':'Hello', 'name':'Valued Customer', 'product':'chemistry set', 'verbed':'exploded', 'room':'office', 'animals':'cats', 'amount':'$127.00', 'percent':'40', 'spokesman':'Rusty Flak', 'job_title':'Public Relations Manager'}
letter.format(**response)

'\n    Dear Hello Valued Customer,\n    \n    Thank you for your letter. We are sorry that our chemistry set exploded in your office.\n    Please note that it should never be used in a office, especially near any cats.\n    \n    Send us your receipt and $127.00 for shipping and handling.\n    We will send you another chemistry set that, in our tests, is 40% less likely to have exploded.\n\n    Thank you for your support.\n    \n    Sincerely, \n    Rusty Flak\n    Public Relations Manager\n    '

In [109]:
# 7.7 Create mammoth text string to practice regex from "Ode on the Mammoth Cheese" at http://bit.ly/mcintyre-poetry.

mammoth = '''
    We have seen thee, queen of cheese,
    Lying quietly at your ease,
    Gently fanned by evening breeze,
    Thy fair form no flies dare seize.

    All gaily dressed soon you'll go
    To the great Provincial show,
    To be admired by many a beau
    In the city of Toronto.

    Cows numerous as a swarm of bees,
    Or as the leaves upon the trees,
    It did require to make thee please,
    And stand unrivalled, queen of cheese.

    May you not receive a scar as
    We have heard that Mr. Harris
    Intends to send you off as far as
    The great world's show at Paris.

    Of the youth beware of these,
    For some of them might rudely squeeze
    And bite your cheek, then songs or glees
    We could not sing, oh! queen of cheese.

    We'rt thou suspended from balloon,
    You'd cast a shade even at noon,
    Folks would think it was the moon
    About to fall and crush them soon.
'''

In [114]:
# 7.8 "Import the re module to use Python's regular expression functions.
# Use re.findall() to print all the words that begin with c."

import re

c_words = re.findall(r'\bc\w*', mammoth)
c_words

['cheese', 'city', 'cheese', 'cheek', 'could', 'cheese', 'cast', 'crush']

In [118]:
# 7.9 "Find all four-letter words that begin with c."

c_four_words = re.findall(r'\bc\w{3}\b', mammoth)
c_four_words

['city', 'cast']

In [123]:
# 7.10 "Find all the words that end with r."

r_end_words = re.findall(r"\b[\w']*r\b", mammoth) # Book explanation for words with characters like apostrophes
r_end_words

['your', 'fair', 'Or', 'scar', 'Mr', 'far', 'For', 'your', 'or']

In [128]:
# 7.11 "Find all the words that contain exactly three vowels in a row."

vowels = re.findall(r'\b\w*[aeiou]{3}[^aeiou\s]*\w*\b', mammoth) # Book answer
vowels

['queen', 'quietly', 'beau', 'queen', 'squeeze', 'queen']

In [131]:
# 7.12 "Use unhexlify to convert [specified] hex string...to a bytes variable called gif."

import binascii

gif = binascii.unhexlify('47494638396101000100800000000000ffffff21f90401000000002c000000000100010000020144003b')
gif
len(gif)

42

In [135]:
# 7.13 "The bytes in gif define a one-pixel transparent GIF file, one of the most common graphics file formats.
# A legal GIF starts with the string GIF89a. Does gif match this?

legal_gif = b'GIF89a'

if gif[:6] == legal_gif:
    print('Legal GIF.')
else:
    print('Not a legal GIF.')

Legal GIF.


In [138]:
# 7.14 "The pixel width of a GIF is a 16-bit big-endian integer beginning at byte offset 6,
# and the height is the same size, starting at offet 8. Extract and print these values for gif.
# Are they both 1?"

import struct

width, height = struct.unpack('<HH', gif[6:10]) # Book answer--why does it use what looks like little endian specifier?
width, height

(1, 1)