# Basic python language syntax

- Comments are marked by ( # )
- End-of-line terminates a statement
- ( ; ) can terminates a statement within line

In [1]:
# This is the format of comments

midpoint = (1 + 1 + 1
            + 1 + 1)  # (...) for expression continued in the next line

lower = []; upper = [] # multiple statements on a single line separated by (;)

- Indentation before a statement denotes blockings
- Indented code blocks are preceded by ( : ) 
- White space within statement does not matter

In [2]:
# code blocks are indicated by indentation preceded by (:)
# so white space is important
for i in range(10):
    if (i < midpoint):
        lower.append(1*i)
    else:
        upper.append(    1    *   i        ) # white space within statement does not matter

# Variables and objects

- Variables are pointers assigned by ( = )
- Variables in python is **not** a container of data, but a reference to values
- Variables are *dynamically typed* (variables can point to objects of any type)

In [3]:
# assign integer 4 to x
x = 4

x = 'hello' # x becomes a string
x = [1, 2, 3] # x becomes a list

- Changing a mutable object changes all its references
- Assigning another value to a variable simply changes its pointing object

In [4]:
# y points to x
y = x 
print('x:', x, '\ny:', y)

# changing the list pointed by x and y
x.append(4)
print('x:', x, '\ny:', y)

# changing the pointed object of x does not change y
x = 'hello there'
print('x:', x, '\ny:', y)



x: [1, 2, 3] 
y: [1, 2, 3]
x: [1, 2, 3, 4] 
y: [1, 2, 3, 4]
x: hello there 
y: [1, 2, 3, 4]


- Python is an object-oriented programming language
- Types in python are linked to the objects themselves

In [5]:
x = 4
type(x)

int

In [6]:
y = 'hello there'
type(y)


str

In [7]:
z = 3.14159
type(z)

float

- Every entity has **metadata** (attributes) and **functionality** (methods)
- Attributes and methods are accessed by dot syntax (.)

In [8]:
# methods of a list object
L = [1, 2, 3]
L.append(100)
print(L)

[1, 2, 3, 100]


In [9]:
# attributes of a numerical type
x = 4.5
print(x.real, "+", x.imag, 'i')

# methods of a numerical type
y = 4.0

print(x.is_integer())
print(y.is_integer())

4.5 + 0.0 i
False
True


# Operators

## Arithmetic operations

In [10]:
# true division
print(11 / 2)

# floor division
print(11 // 2)

# modulus
print(11 % 2)

# exponentiation
print(11 ** 2)

5.5
5
1
121


## Self-update operations

In [11]:
a = 11; b = 2

# a = a + b = 11 + 2 = 13
a += b
print(a)

# a = a - b = 13 - 2 = 11
a -= b
print(a)

# a = a / b = 11 / 2 = 5.5
a /= b
print(a)

13
11
5.5


## Comparison and boolean operations

In [12]:
# check equivalent
print(25 % 2 == 1)
print(11 / 2 == 3)

# check non-equivalent
print(1 + 1 != 2)

# check range
print(15 < 25 < 30)

True
False
False
True


In [13]:
# combined with boolean operations
x = 4
print((x <= 6) and not (x > 10))
print((x == 3) or (x % 2 != 3))

True
True


## Identity and membership

In [14]:
# identity (is), (is not) operator
a = [1, 2, 3]
b = [1, 2, 3]
c = a

print('a == b?', a == b)
print('a is b?', a is b)
print('a is not b?', a is not b)
print('a is c?', a is c)

a == b? True
a is b? False
a is not b? True
a is c? True


In [15]:
# membership (in), (not in) operator
print('1 in [1, 2, 3]?', 1 in [1, 2, 3])
print('4 in [1, 2, 3]?', 4 in [1, 2, 3])
print('4 not in [1, 2, 3]?', 4 not in [1, 2, 3])

1 in [1, 2, 3]? True
4 in [1, 2, 3]? False
4 not in [1, 2, 3]? True


# Simple values

## Integers

- Integers in python are **variable-percision** so overflow would not happen
- Division of two integers returns a **float** if there is decimals in the result

In [16]:
# division of two integers
print(5 / 2)

# division of two integers that returns another integer (floor division)
print(5 // 2)

2.5
2


## Floating-point numbers

- Can be defined either in standard decimal notation, or in exponential notation
- Floating-point arthimetic is approximate, and exact equility test is **not reliable**

In [17]:
x = 14000.00 # decimal notation
y = 1.4e4 # exponential notation
print(x == y)

True


In [18]:
# equility test failed on floating-point arthimetic
print(0.1 + 0.2 == 0.3)

False


## Complex numbers
- Use the j-suffix to indicate the imaginary part

In [19]:
c = 3 + 4j
print('real:', c.real)
print('imaginary:', c.imag)
print('conjugate:', c.conjugate())
print('magnitude:', abs(c))

real: 3.0
imaginary: 4.0
conjugate: (3-4j)
magnitude: 5.0


## String
- Can be expressed by ('...') or ("...")
- Many useful string-operation methods

In [20]:
a = "hello there"
b = "hi"

# length of the string
print('length:', len(a))

# make uppercase
print('make uppercase:', b.upper())

# capitalize
print('capitalize:', a.capitalize())

# combine
print('combine:', a + " " + b)

# multiplication
print('5x multiplication:', 5 * b)

# accessing individual character
print('4th letter:', a[4])

length: 11
make uppercase: HI
capitalize: Hello there
combine: hello there hi
5x multiplication: hihihihihi
4th letter: o


## None type
- Any function with no return value is returning (None)

In [21]:
# there is no return value for print()
return_value = print('hello there')
print(return_value)

hello there
None


## Boolean type
- With two possible values, (True) and (False) (must be capitalized)
- Can be constructed by bool()

In [22]:
# any numeric type is False if equal to 0, otherwise True
print('1111:', bool(1111))
print('0:', bool(0))

# bool is False if the string is empty and True otherwise
print('empty string:', bool(''))
print('a:', bool('a'))

# bool is False for empty sequences and True for non-empty ones
print('[]:', bool([]))
print('[1, 2, 3]:', bool([1, 2, 3]))

1111: True
0: False
empty string: False
a: True
[]: False
[1, 2, 3]: True


# Built-in data structures

## Lists
- *Ordered* and *mutable* collection
- Can contain objects of any types in a list

In [23]:
L = [2, 5, 7, 3]

# length of a list
print('length:', len(L))

# append a value to the end
L.append(11)
print('append 11:', L)

# combine lists
print('combine with [3, 5]:', L + [3, 5])

# sort with numerical order
L.sort()
print('sorted:', L)

length: 4
append 11: [2, 5, 7, 3, 11]
combine with [3, 5]: [2, 5, 7, 3, 11, 3, 5]
sorted: [2, 3, 5, 7, 11]


In [24]:
# append a string to the end
L.append('hello there')
print('append with string:', L)

append with string: [2, 3, 5, 7, 11, 'hello there']


### List indexing
- The 1st element from the beginning has index 0, and the one from the end has index -1

In [25]:
# access the 1st and 3rd element from the beginning in L
print('1st element:', L[0])
print('3rd element:', L[3])

# access the 2nd element from the end
print('2nd last element:', L[-2])

1st element: 2
3rd element: 7
2nd last element: 11


### List slicing

In [26]:
# slice from the start to a certain index
print('to 3rd element:', L[0:3])
print('neglecting 0 alternative:', L[:3])

# slice from the end
print('slice inversely to the last 3rd element:', L[-3:])

# select every 2nd element (skip 1)
print('skip 1:', L[::2])

# or every element in reverse
print('all in reverse:', L[::-1])

to 3rd element: [2, 3, 5]
neglecting 0 alternative: [2, 3, 5]
slice inversely to the last 3rd element: [7, 11, 'hello there']
skip 1: [2, 5, 11]
all in reverse: ['hello there', 11, 7, 5, 3, 2]


### Replace element

In [27]:
# replace one element at a certain index
L[0] = 300
print('1st element replaced:', L)

# replace a slice of element
L[1:3] = [8, 2]
print('1:3 replaced by [8, 2]:', L)

1st element replaced: [300, 3, 5, 7, 11, 'hello there']
1:3 replaced by [8, 2]: [300, 8, 2, 7, 11, 'hello there']


## Tuples
- Similar to lists but *immutable*
- Can be accessed by indexing and slicng
- Often used for functions with **multiple return values**

In [28]:
# can be defined with or without ()
a = (1, 2, 3)
b = 1, 2, 3
print('a == b?', a == b)

a == b? True


In [29]:
# function with multiple return values
x = 0.125
print(x.as_integer_ratio())

# define each return value individually
numerator, denominator = x.as_integer_ratio()
print('numerator:', numerator, '\ndenominator:', denominator)

(1, 8)
numerator: 1 
denominator: 8


## Dictionaries
- Flexible mapping of keys and values
- Created by constructing a list of `key:value` pairs
- Access a value by a key

In [30]:
numbers = {'one':1, 'two':2, 'three':3}
print(numbers)

# access the value under the key 'two'
print(numbers['two'])

# add a new value:key pair
numbers['ninety'] = 90
print(numbers)

{'one': 1, 'two': 2, 'three': 3}
2
{'one': 1, 'two': 2, 'three': 3, 'ninety': 90}


## Sets
- *Unordered* collection of *unique* items
- Can be applied to mathematic operations

In [31]:
primes = {2, 3, 5, 7}
odds = {1, 3, 5, 7, 9}

# union of two sets
print(primes.union(odds))
# or
print(primes | odds)

# intersection of two sets
print(primes.intersection(odds))
# or 
print(primes & odds)

# difference
print(primes.difference(odds))
print(primes - odds)

# symmetric difference (items appearing in only 1 set)
print(primes.symmetric_difference(odds))
print(primes ^ odds)

{1, 2, 3, 5, 7, 9}
{1, 2, 3, 5, 7, 9}
{3, 5, 7}
{3, 5, 7}
{2}
{2}
{1, 2, 9}
{1, 2, 9}


# Control flow

## Conditional statement

In [32]:
x = -5

if x == 0:
    print(x, '= 0')
elif x > 0:
    print(x, '> 0')
elif x < 0:
    print(x, '< 0')
else:
    print(x, 'is something else')

-5 < 0


## For loop

In [33]:
# for loop of items in a list
for N in [2, 3, 5, 7]:
    print(N, end=' ') # print all on the same line

2 3 5 7 

In [34]:
# for loop in range 0-9
for i in range(10):
    print(i, end=' ')

0 1 2 3 4 5 6 7 8 9 

In [35]:
# in range 5-10
for i in range(5, 10):
    print(i, end=' ')

5 6 7 8 9 

In [36]:
# in range from 0 to 10 by 2
for i in range(0, 10, 2):
    print(i, end=' ')

0 2 4 6 8 

## While loop

In [37]:
i = 0
while i < 10:
    print(i, end=' ')
    i += 1

0 1 2 3 4 5 6 7 8 9 

## `break` and `continue`
- `break` statement breaks out the loop entirely
- `continue` statement skips the remainder of the current loop and goes to the next iteration
- Can be used in for and while loops

In [38]:
# example of continue
# print a string of even numbers
for n in range(20):
    # check if n is even
    if n % 2 == 0:
        continue
    print(n, end=' ')

1 3 5 7 9 11 13 15 17 19 

In [39]:
# example of break
# find fabonacci numbers
a, b = 0, 1
a_max = 100
L = []

while True:
    (a, b) = (b, a + b)
    if a > a_max:
        break
    else:
        L.append(a)
print(L)

[1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]


# Functions
- Functions can be called using parentheses
- Functions have a *name* and some *arguments*
- **Keyword arguments** are defined by name

In [40]:
# example of keyword argument
print(1, 2, 3, sep='--') # sep is one available keyword argument

1--2--3


## Defining functions
- Functions are defined by `def`
- Multiple return values are put in a tuple indicated by `,`

In [41]:
# defining a function
def fibonacci(N):
    L = []
    a, b = 0, 1
    while len(L) < N:
        a, b = b, a + b
        L.append(a)
    return L

In [42]:
print(fibonacci(10))

[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]


In [43]:
# returning multiple values
def real_imag_conj(val):
    return val.real, val.imag, val.conjugate()

r, i, c = real_imag_conj(3 + 4j)

print(r, i, c)

3.0 4.0 (3-4j)


## Default argument values
- Specifying values that are used most of the time in functions
- Giving the flexibility to change those values

In [44]:
# example of defaut argument values
def fibonacci_2(N, a=0, b=1):
    L=[]
    while len(L) < N:
        a, b = b, a + b
        L.append(a)
    return L

In [45]:
fibonacci_2(10) # using the default arguments

[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]

In [46]:
fibonacci_2(10, 0, 2) # changing the default b to 2

[2, 2, 4, 6, 10, 16, 26, 42, 68, 110]

In [47]:
fibonacci_2(10, b=2, a=0) # specifying by names, order does not matter

[2, 2, 4, 6, 10, 16, 26, 42, 68, 110]

## Flexible functions
- Used for functions that we don't know the number of arguments
- `*args` and `**kwargs` catch all arguments that are passed
- `*` before a variable means expanding it as a **sequence**
- `**` before a variable means expanding it as a **dictionary**
- Can also be used in a **function call**

In [48]:
# example of flexible function
def catch_all(*args, **kwargs):
    print("args =", args)
    print("kargs =", kwargs)

In [49]:
# use of catch_all
catch_all(1, 2, 3, a=4, b=5)

args = (1, 2, 3)
kargs = {'a': 4, 'b': 5}


In [50]:
# another usage
catch_all('a', keyword=2)

args = ('a',)
kargs = {'keyword': 2}


In [51]:
# usage in a function call
inputs = (1, 2, 3)
keywords = {'pi': 3.14}
catch_all(*inputs, **keywords)

args = (1, 2, 3)
kargs = {'pi': 3.14}


## Anonymous (lambda) functions
- Defining the function short and convenient with the `lambda` statement
- **Function is an object** in python, so it can be passed as arguments

In [52]:
# example of lambda function
add = lambda x, y: x + y
add(1, 2)

3

In [53]:
# equivalent to:
def add(x, y):
    return x + y
add(1, 2)

3

In [54]:
# using lambda function as an argument
# sorting a dictionary by a certain key
data = [{'first':'Guido', 'last':'Van Rossum', 'YOB':1956},
        {'first':'Grace', 'last':'Hopper',     'YOB':1906},
        {'first':'Alan',  'last':'Turing',     'YOB':1912}]
# sorted alphabetically by first name
sorted(data, key=lambda item: item['first'])

[{'first': 'Alan', 'last': 'Turing', 'YOB': 1912},
 {'first': 'Grace', 'last': 'Hopper', 'YOB': 1906},
 {'first': 'Guido', 'last': 'Van Rossum', 'YOB': 1956}]

In [55]:
# or by the year of birth
sorted(data, key=lambda item: item['YOB'])

[{'first': 'Grace', 'last': 'Hopper', 'YOB': 1906},
 {'first': 'Alan', 'last': 'Turing', 'YOB': 1912},
 {'first': 'Guido', 'last': 'Van Rossum', 'YOB': 1956}]

# Errors and exceptions
- There are 3 basic types of errors:
    - **Syntax errors** for invalid code in python
    - **Runtime errors** for the syntactically valid code failing to execute
    - **Sematic errors** for not getting the the expected result
    
## Runtime errors
- The main focus is to cleanly deal with **runtime errors**
- We can use `try` and `except` clause to handle runtime exceptions
- If an error is raised in the `try` statement, the error will be caught and the `except` statement will be executed
- Ambiguity comes when another exception comes up, so it's better to catch exceptions **explicitly**

In [56]:
# using try except with no error
# the try... block will execute and the except... block will not
try:
    print('a')
except:
    print('b')

a


In [57]:
# using try except with some errors
# the try... block will not execute and the except... block will
try:
    print(1 / 0)
except:
    print('b')

b


In [58]:
# check user's input within a function
def safe_divide(a, b):
    try:
        return a / b
    except: 
        return 0

# check normal division
print(safe_divide(1, 2))
# check zero-division
print(safe_divide(1, 0))

0.5
0


In [59]:
# ambiguity comes when another exception comes up
# here we're facing TypeError, but only ZeroDivisionError is handled
print(safe_divide(1, '2'))
print(safe_divide(1, 2))

0
0.5


In [60]:
# so we can catch exceptions explicitly
def safe_divide(a, b):
    try:
        return a / b
    except ZeroDivisionError:
        # catching zero-division errors only
        return 0

In [None]:
# try except else finally
try:
    # do something
except:
    # happens if it fails
else: 
    # happens if it succeeds
finally:
    # happens no matter what

## Raising exceptions
- Allows users to figure out what caused the errors
- Using `raise` statement to raise a customized exception

In [61]:
# raising an exception
raise RuntimeError("my error message")

RuntimeError: my error message

# Iterators

In [1]:
# the most basic iterator in python
for i in range(10):
    print(i, end=' ')

0 1 2 3 4 5 6 7 8 9 

- In Python 3, `range` is not a list but a **iterator**. 
- We can also do iterations over a list.

## Iterating over lists

In [3]:
# iterating over lists
for value in [2,4,6,8,10]:
    # do something
    print(value + 1, end=' ')

3 5 7 9 11 

- The underlying mechanism for the human-readable iterators is that Python automatically checks whether it has an `iterator` interface. 
- We can check this property using `iter()` function.
- The `iter` object is also a container that provides the access to the next valid object, called by `next()`.

In [6]:
# check iterator
I = iter([2,4,6,8,10])
I

<list_iterator at 0x7f7efc888880>

In [7]:
# accessing next object
print(next(I))
print(next(I))

2
4


## Range

- `range()` function with size specified will return a `range` object.
- A full list is never explicitly created by using `range`, which saves lots of system memory.
- Python's `itertool` library contains a `count` function that acts as an **infinite range**.

In [10]:
# a range object
I_r = range(10)
I_r

range(0, 10)

In [12]:
# has a iterator
iter(I_r)

<range_iterator at 0x7f7efc888f90>

In [15]:
# iterating in range 10 ** 12
for i in range(10 ** 12):
    if i >= 10: break
    print(i, end=', ')
        

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 

In [16]:
# count from itertool
from itertools import count
for i in count():
    if i >= 10: break
    print(i, end=', ')
    
# if we didn't stop a loop with such a condition, 
# it will keep going until we stop it, i.e., ctrl-c

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 

## Useful iterators
### enumerate
- When we need to iterate values in an array and keep track of the index.
- Python also provides another cleaner version of enumeration.

In [17]:
# example of enumeration
L = [2,4,6,8,10]
for i in range(len(L)):
    print(i, L[i])

0 2
1 4
2 6
3 8
4 10


In [18]:
# cleaner version
for i, val in enumerate(L):
    print(i, val)

0 2
1 4
2 6
3 8
4 10


### zip
- Applies when we have multiple lines to iterate over simutaneously. `zip` does what it means - zipping together iteratebles.
- Any number of iterables can be zipped together, and the shortest of them will determine the length of `zip`.

In [19]:
# example of zip
L = [2,4,6,8,10]
R = [3,6,9,12,15]
for lval, rval in zip(L, R):
    print(lval, rval)

2 3
4 6
6 9
8 12
10 15


### map and filter
- `map` iterator takes a function and applies it to the values in the iterator.
- `filter` only returns values for which the filter function evaluates to be true (the function being passed must return a boolean).

In [20]:
# example of map
# find the first 10 square number
square = lambda x: x**2
for val in map(square, range(10)):
    print(val, end=' ')

0 1 4 9 16 25 36 49 64 81 

In [21]:
# example of filter
# find values up to 10 for which x % 2 is 0
is_even = lambda x: x % 2 == 0
for val in filter(is_even, range(10)):
    print(val, end=' ')

0 2 4 6 8 

### Iterators as function argument
- The `*args` syntax can also work with any iterator.

In [22]:
# simple example of *args syntax
print(*range(10))

0 1 2 3 4 5 6 7 8 9


In [23]:
# compress the map example
print(*map(lambda x: x**2, range(10)))

0 1 4 9 16 25 36 49 64 81


In [26]:
# do the opposite of zip
L1 = [1,2,3,4]
L2 = ['a','b','c','d']
z = zip(L1, L2)
print(*z)

z = zip(L1, L2)
new_L1, new_L2 = zip(*z)
print(new_L1, new_L2)

(1, 'a') (2, 'b') (3, 'c') (4, 'd')
(1, 2, 3, 4) ('a', 'b', 'c', 'd')


## Specialized iterators: itertools
- The `itertool` module contains a whole host of useful iterators. 

In [27]:
# itertools.permutations 
# iterates over all permutations in a sequence.
from itertools import permutations
p = permutations(range(3))
print(*p)

(0, 1, 2) (0, 2, 1) (1, 0, 2) (1, 2, 0) (2, 0, 1) (2, 1, 0)


In [28]:
# similar for itertools.combination
from itertools import product
p = product('ab', range(3))
print(*p)

('a', 0) ('a', 1) ('a', 2) ('b', 0) ('b', 1) ('b', 2)


# List comprehensions
- List comprehensions are a way to compress a list-building `for` loop into a short readable line.
- The basic syntax is `[expr for var in iterable]`, where `expr` is any valid expression, `var` is a variable name, and `iterable` is any iterable object. 

In [6]:
# constructing a list of the first 12 perfect squares
# normal looping
L = []
for n in range(12):
    L.append(n ** 2)
L

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]

In [7]:
# the equivalent comprehension
[n ** 2 for n in range(12)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]

## Multiple iteration
- Multiple iteration applies when you want to build a list from two values. 
- To achieve this, add another `for` along with a new variable.

In [8]:
# iteration with two variables
[(i,j) for i in range(2) for j in range(3)]

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]

- Note that the second `for` acts as the iterator index, varying the fastest.
- The same applies to iterations with more than 2 variables.

## Conditionals on the iterator
- We can further control the iteration by adding a conditional to the end of expression. 

In [9]:
# producing a list of numbers excluding multiples of 3
[i for i in range(20) if i % 3 > 0]

[1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19]

In [11]:
# equivalent loop syntax
L = []
for val in range(20):
    if val % 3:
        L.append(val)
L

[1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19]

## Conditionals on the value

In [12]:
# a simple example
val = -10
val if val >= 0 else -val

10

- Very similar to the `int absval = (val < 0) ? -val : val` operation in Java.
- This operation defines the value of a variable on some conditions. This works well with list comprehensions.

In [14]:
# constructing a multiple of 3 excluding multiples of 3 and negating odd numbers
[val if val % 2 else -val for val in range(20) if val % 3]

[1, -2, -4, 5, 7, -8, -10, 11, 13, -14, -16, 17, 19]

- We can also create a **set comprehension** in similar way.
- Since sets contain no duplication, this applies to comprehensions as well.

In [15]:
# example of set comprehension
{n**2 for n in range(12)}

{0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121}

In [16]:
# no duplication
{a % 3 for a in range(1000)}

{0, 1, 2}

- Adding a `:` would allow us to create a **dict comprehension**.

In [17]:
# example of dict comprehension
{n:n**2 for n in range(6)}

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# Generators
- A generator expression is essentially a **list comprehension** in which elements are generated **as needed** rather than all at once. They are similar, but not totally the same.

In [18]:
# list comprehension
[n ** 2 for n in range(12)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]

In [19]:
# generator expression
(n ** 2 for n in range(12))

<generator object <genexpr> at 0x7ffd128582e0>

- Printing the generator does not print its contents. 
- To print its content, we can pass it to the `list` constructor.

In [20]:
G = (n ** 2 for n in range(12))
list(G)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]

- A list is a collection of values, while a generator is a **recipe of producing values**.
- A generator does not actually compute the values until they are needed.
- The benefits of generator relates to memory efficiency and computational efficiency. The size of a generator expression can be unlimited.
- In iteration, both lists and generator expressions act the same.

In [22]:
# list in iteration
L = [n ** 2 for n in range(12)]
for val in L:
    print(val, end=' ')
    
# generator in iteration
G = (n ** 2 for n in range(12))
for val in G:
    print(val, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 0 1 4 9 16 25 36 49 64 81 100 121 

In [24]:
# example of infinite generator expression count()
# it would go forever if we do not stop it
from itertools import count
for i in count():
    print(i, end=' ')
    if i >= 10: break

0 1 2 3 4 5 6 7 8 9 10 

In [25]:
# another example using count()
# a prime number generator
factors = [2, 3, 5, 7]
G = (i for i in count() if all(i % n > 0 for n in factors))
for val in G:
    print(val, end=' ')
    if val > 40: break

1 11 13 17 19 23 29 31 37 41 

- A list can be iterated multiple times, whereas a generator expression is **single use**.
- This property is useful since we can pause the iteration and restart it somewhere.

In [26]:
# use G for one iteration
G = (n ** 2 for n in range(12))
list(G)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]

In [28]:
# cannot be used for the second iteration
list(G)

[]

In [31]:
# pausing the iteration and restart it later
G = (n ** 2 for n in range(12))
for n in G:
    print(n, end=' ')
    if n > 30: break
print("\ndoing something in between")
for n in G:
    print(n, end=' ')

0 1 4 9 16 25 36 
doing something in between
49 64 81 100 121 

## Generator functions: using `yield` statement
- When constructing more complicated generators, we use **generator functions** with `yield` statement.
- Similar to lists, there are two ways to construct equivalent generators.

In [32]:
# one way
G1 = (n ** 2 for n in range(12))

# another way
def gen():
    for n in range(12):
        yield n ** 2
G2 = gen()

print(*G1)
print(*G2)

0 1 4 9 16 25 36 49 64 81 100 121
0 1 4 9 16 25 36 49 64 81 100 121


- Instead of returning a single value with `return`, a generator function uses `yield` to yield a (possibly infinite) sequence of values.
- Such generator function preserves all properties of a generator expression as above.

## Example: prime number generator
- Use a generator function to construct an unbounded series of prime numbers.
- The algorithm being used is the Sieve of Eratosthenes, which simply means that by removing all multiples of each values starting from 2, eventually only primes are left.

In [33]:
# generates primes up to N
def gen_primes(N):
    primes = set()
    for n in range(2, N):
        if all(n % p > 0 for p in primes):
            primes.add(n)
            yield n
print(*gen_primes(70))

2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67


# Modules and packages
- Modules and packages are one feature in Python providing more specified functionality.

## Loading modules: `import` statement
- `import` statement is used to load built-in and third-party modules.
- There are few ways to use the statement, ranked from the most recommended to the least.

### Explicit module import
- Explicit import preserves the module's content in a namespace.
- The namespace is used to refer to its content with a `.`.

In [34]:
# example of explicit module import
# and the use of namespace
import math
math.cos(math.pi)

-1.0

### Explicit module import by alias
- For longer module names, it is more convenient to create a shorter alias for the namespace.
- This feature is commonly used by `import ... as ...`.

In [35]:
# example of import by alias
import numpy as np
np.cos(np.pi)

-1.0

### Explicit import of module contents
- We use `from ... import ...` when we only want to import few functions from a module.
- In this way, we don't need to refer the namespace (use the local namespace).

In [36]:
# example
from math import cos, pi
cos(pi)

-1.0

### Implicit import of module contents
- We use `from ... import *` when we want to import the entire module content into the local namespace.
- The problem is that such import sometimes overwrites function names you don't intend to.

In [37]:
# example
from math import *
sin(pi) ** 2 + cos(pi) ** 2

1.0

# String manipulation and regular expressions
- It is possible to define multi-line of strings with a `"""` syntax surrounded.

In [38]:
# multiple line of string
multipleline = """
    one
    two
    three
    """

## Simple string manipulation
- Python provides convenient built-in string operating methods.

### Adjusting case

In [44]:
text = "hahaHAhAh hAh HaaA"
# upper()
text.upper()

'HAHAHAHAH HAH HAAA'

In [45]:
# lower()
text.lower()

'hahahahah hah haaa'

In [47]:
# title()
text.title()

'Hahahahah Hah Haaa'

In [48]:
# capitalize()
text.capitalize()

'Hahahahah hah haaa'

In [49]:
# swapcase()
text.swapcase()

'HAHAhaHaH HaH hAAa'

### Adding & removing spaces
- The most basic method is `strip()`, which removes the white spaces (or other characters) from the begining or end of string.
- We can control removing which side by `lstrip()` and `rstrip()`.

In [51]:
line = '      content with space     '
# use of strip()
line.strip()

'content with space'

In [52]:
# lstrip()
line.lstrip()

'content with space     '

In [53]:
# rstrip()
line.rstrip()

'      content with space'

In [55]:
# stripping characters off
num = "00010392000"
num.strip('0')

'10392'

- The opposite operation of `strip()` is `center()`, `ljust()` and `rjust()`, which adds spaces or other characters in some specified locations.

In [56]:
# center() adds spaces around the given string
line = "this is the content"
line.center(30)

'     this is the content      '

In [57]:
# ljust() adds spaces or characters from right
line.ljust(30, '0')

'this is the content00000000000'

In [60]:
# rjust() does the same from left
line.rjust(30, '0')

'00000000000this is the content'

In [61]:
# The same operation of adding 0s can be achieved by zfill()
line.zfill(30)

'00000000000this is the content'

### Finding and replacing substrings
- `find()`, `index()` function very similarly by returning the index of the **first occurence from the start** of a certain pattern within the string. The only difference is `find()` returns -1 if the pattern isn't found, whereas `index()` raises an error.
- `rfind()`, `rindex()` acts the same except they search for the **first occurence from the end**.

In [67]:
line = 'This is a line of meaningless words line'
# use of find()
line.find('line')

10

In [68]:
# find() with patterns do not exist
line.find('benwilliams')

-1

In [69]:
# use of index()
line.index('line')

10

In [70]:
# index() with non-existing pattern
line.index('prezbenwilliams')

ValueError: substring not found

In [71]:
# use of rfind() (rindex() does the same)
line.rfind('line')

36

- In addition, `replace()` can replace a given sub-string with a new one. 
- A more flexible way of replacing strings can refer to regular expressions.

In [73]:
# use of replace to substitute 'line' by 'prezbenwilliams'
line.replace('line', 'prezbenwilliams')

'This is a prezbenwilliams of meaningless words prezbenwilliams'

### Splitting and partitioning string
- `partition()` and `split()` help us find a sequence of sub-strings given some string.
- `partition()` returns a tuple `(string-before-split, split-point, string-after-split)`.
- `rpartiotion` does things from another direction.

In [76]:
# use of partition()
line.partition('line')

('This is a ', 'line', ' of meaningless words line')

In [77]:
# rparition() does things from another direction
line.rpartition('line')

('This is a line of meaningless words ', 'line', '')

- `split()` find all instances of the split point and returns the substring in between in a `list`.
- The default of `split()` seperates a string by spaces.

In [78]:
# use of split()
line.split()

['This', 'is', 'a', 'line', 'of', 'meaningless', 'words', 'line']

- `splitlines()` acts similarly except it only splits between lines.

In [80]:
# use of splitlines()
mulline = """hhhhaaaa
jjjjjaaaaa
bbbbabiala
"""
mulline.splitlines()

['hhhhaaaa', 'jjjjjaaaaa', 'bbbbabiala']

- `join()` undoes a split and joins a list with a given seperator.

In [81]:
# use of join()
'--'.join(['1', '2', '3'])

'1--2--3'

In [87]:
# use \n to join with multiple lines
print(
    '\n'.join(['this is the first line', 'second line', '来自黑暗寒冬的随从们、仆从们、士兵们，听从克尔苏加德的召唤', 'Déjà vu']))

this is the first line
second line
来自黑暗寒冬的随从们、仆从们、士兵们，听从克尔苏加德的召唤
Déjà vu


## Format strings
- Sometimes we need to transform other value types to strings, using `str()`.

In [88]:
# transform a double to string
pi = 3.14
str(pi)

'3.14'

In [92]:
# string combination
str(pi) + " hhhhha " + "yes"

'3.14 hhhhha yes'

- A more flexible way to achieve this is to use **format strings**, where places specified strings into special markers `{}`.
- For multiple markers, we can use index or key as the markers.
- In addition, for numerical values, we can specify the format.

In [93]:
# example of format strings
"The value of pi is {}".format(pi)

'The value of pi is 3.14'

In [94]:
# use index as markers
"First letter: {0}, last letter: {1}".format('A', 'Z')

'First letter: A, last letter: Z'

In [95]:
# use key as markers
"First letter: {first}, last letter: {last}".format(last='Z', first='A')

'First letter: A, last letter: Z'

In [97]:
# specify the format of numerical values
# the .3f encodes the desired digits beyond decimal
"pi = {0:.3f}".format(pi)

'pi = 3.140'

## Flexible pattern matching with regular expressions
- The built-in regular expression allows us to do more flexible string manipulations.
- Regular expressions are fundamentally a means of **flexible pattern matching** in strings.
- The regular expression is contained in the built-in `re` module.

In [99]:
# import the regular expression module
import re

In [100]:
# compile a regular expression
# here \s stands for any white spaces
# and + stands for one or more entity precceding it
regex = re.compile('\s+')

# the split() returns a list of substrings that matches the pattern
regex.split(line)

['This', 'is', 'a', 'line', 'of', 'meaningless', 'words', 'line']

- `match` can tell whether the string pattern matches the regex expression.

In [101]:
for s in ["   ", "abc  ", "    abc"]:
    if regex.match(s):
        print(repr(s), "matches")
    else:
        print(repr(s), "not matches")

'   ' matches
'abc  ' not matches
'    abc' matches


- `search()` can find the index of the first pattern matching the regex.

In [102]:
# example of search()
regex = re.compile('line')
match = regex.search(line)
match.start()

10

- `sub()` operates like replace in string. 

In [104]:
# example of sub()
regex.sub('LINE', line)

'This is a LINE of meaningless words LINE'

### More advanced usage

In [110]:
# a more complex example: matching email address
email = re.compile('\w+@\w+\.[a-z]{3}')
text = "ihisfsefiseeoisefhi try kjhsddfkjh@gmail.com or khfkbsdfsef@ubc.com"
email.findall(text)

['kjhsddfkjh@gmail.com', 'khfkbsdfsef@ubc.com']

In [112]:
# do some further operations, like substitution
email.sub('--@--.--', text)

'ihisfsefiseeoisefhi try --@--.-- or --@--.--'

### Basics of regular expression syntax
- If the regex is built with simple characters or strings, there would be simply matching for that exact string.

In [113]:
# simple string matching
regex = re.compile('ion')
regex.findall('great lion expectations')

['ion', 'ion']

- Some characters have special meanings.
- If we don't want them to be converted, use `\` before them.
- Adding `r` as a preface indicates a **raw string**, meaning that the special characters will not be converted.

In [116]:
# usage of \ to escape
regex = re.compile(r'\$')
regex.findall("the cost is $20")

['$']

In [119]:
# here a tab is indicated by \t
print('a\tb\tc')
# raw string
print(r'a\tb\tc')

a	b	c
a\tb\tc


- More about regex can be find [here](https://github.com/ziishaned/learn-regex/blob/master/translations/README-cn.md).