# Cool features that make Python more friendly than other languages

Copyright 2017-2023 Forrest Sheng Bao http://fsbao.net

## Line breaks and semicolons


In [51]:
a = 1; b = 2   
if a > 0: 
    print ("a is positive") ; print("1234")
print(a, b)

a is positive
1234
1 2


## Chain assignments

In [42]:
d=10
a, b= 1, 2
x=y=z=10 # x = 10, y = 10 , z = 10, non-inertial 

## Docstrings, doctest, and automatic documentation

In [20]:
def foo(x): 
    """ a function to compute the square of integer x
    
    Parameters 
    ----------
    
        x: integer
            the number to be squared
    
    Returns 
    -----------
    
        y: integer
            the square of x 
    
    Examples
    ------------
        
    >>> f(1,2,3)
    1
    >>> f(3,4,5)
    4567
    
    """
 
    return y 

# Doctest and good pratices 

* Linux has succeeded not because the original goal was to make it widely portable and widely available, but because it was based on good design principles and a good development model. -- Linus Torvalds, "The Linux edge", Open Sources: Voices from the Open Source Revolution, O'Reilly, 1999 
* Modularize 
* Test each module before 

Why do we define functions? 
* For reusing a snippet of code -- save me from re-typing
* For others to pay to use your function -- so I can be rich and be a happy professor
* For making debugging easier -- aim small, miss small 

In [33]:
def f(a, b, c):
    '''
    
    Examples
    ------------
        
    >>> f(1,2,3)
    1
    >>> f(3,4,5)
    4567
    '''
    return (a+b)/c

In [34]:
# Doctest is one way to test from examples in docstrings
import doctest 
doctest.run_docstring_examples(f, globals())



#if __name__ == '__main__':
#    import doctest
#    doctest.testmod()


**********************************************************************
File "__main__", line 20, in NoName
Failed example:
    f(3,4,5)
Expected:
    4567
Got:
    1


## Printng tricks: f-strings, `end`, and formatted output

In [55]:
x = i 
print (f"The half of x is {x/2}")
print (f"The half of x is {x//2}") # quotient
print (f"The half of x is {x%2}")  # remainder

The half of x is 2.0
The half of x is 2
The half of x is 0


In [59]:
print (f"{100/3: .4f}") # control precision

 33.3333


In [62]:
print ("{0}, {1}".format("hello", "world"))

# use dictionary 
print ("{key1}, {key2}".format(key1="hello", key2="world"))

hello, world
hello, world


In [None]:
for i in range(5):
    print (i, end=' --> ')
    # print (a[i])

0 --> 1 --> 2 --> 3 --> 4 --> 

## Lists and for-loops

Element-wise iteration 

In [45]:
x = 0 
a =[1,2,3]
for i in a:
    x += i
print (x)
print (i)

6
3


### List and dictionary comprehension


In [68]:
a = [i**2 for i in range(10)]

print (a)

a = [i for i in range(10) if i%2 == 0]

print (a)

a = [i if i%2 == 0 else 0 for i in range(10) ] # note the order of conditions and for changes

print (a)

# add a branch to the else statement

a = [i if i%2 == 0 else 0 for i in range(10) if i%3 == 0] # note the order of conditions and for changes

print (a) 

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
[0, 2, 4, 6, 8]
[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]
[0, 0, 6, 0]


In [70]:
a = {i: i**2 for i in range(10)}

print (a)

print (a[3])

print (a.get(100, "not found"))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}
9
1


### `setdefault` for dictionaries

In [75]:
x= {}

x.setdefault(1, []).append(1)

print (x)

# equivalent to 

if 1 not in x:
    x[1] = []
    x[1].append(1)

{1: [1]}


### Iterators 

Iterators are powerful tools to iterate over a sequence of elements. It can be useful when you wanna iterate over a large sequence of elements but you do not want to store them in memory.

A use case: https://github.com/forrestbao/DL4SC/blob/refactor/pass-config/preprocessing.py

In [76]:
def foo(): 
    for i in range(10):
        yield i 

x = next(foo())
print (x)

0
0


In [6]:
for x in foo(): 
    print (x)

0
1
2
3
4
5
6
7
8
9


### `tqdm`

TQDM is a 3rd party package that allows you to easily add a progress bar to your code. 

In [77]:

from tqdm import tqdm
from time import sleep
for i in tqdm(range(50)):
    sleep(0.1)
    pass

100%|██████████| 50/50 [00:05<00:00,  9.88it/s]


### `enumerate`

In [59]:
a=[11,12,13]
for index, element in enumerate(a): 
    print (index, element)


0 11
1 12
2 13


## Copy and deep copy 
In Python, the assignments of some types of objects only creates references, not actual copies. 

In [79]:
x = [10, 20]
y = x 

In [80]:
x[1] = 100

In [81]:
y  # y is changed when x is changed

[10, 100]

In [83]:
y = x[:] # copy the list
x [0] = 1000
print (x, y)

[1000, 100] [1000, 100]


In [19]:
import copy # a python native module 
a =[1,2,3,4,5,6]
for i in  copy.deepcopy(a):
    print (i)
    a.remove(i)

1
2
3
4
5
6


## Tuples

In [28]:
a=[1,2,3] # mutable
b=(4,5,6) # immutable

TypeError: can only concatenate tuple (not "int") to tuple

### `zip`

In [42]:
c = tuple(zip([1,2,3], [4,5,6], [7,8,9, 10])) 
print (c)
d= list(zip(*c))
print (d)

((1, 4, 7), (2, 5, 8), (3, 6, 9))
[(1, 2, 3), (4, 5, 6), (7, 8, 9)]


## Dictionary methods `keys()`, `values()`, `items()`

In [84]:
gpa={"yuanzhi":3.7, "gupta":3.5, "john":3.6}
print (gpa["john"])
for name, grade in gpa.items():
    print (name, "'s GPA is: ", grade)
print (gpa.keys())
print (gpa.values())

3.6
yuanzhi 's GPA is:  3.7
gupta 's GPA is:  3.5
john 's GPA is:  3.6
dict_keys(['yuanzhi', 'gupta', 'john'])
dict_values([3.7, 3.5, 3.6])
dict_items([('yuanzhi', 3.7), ('gupta', 3.5), ('john', 3.6)])


## Functions

In [85]:
def foo(a, b):
  return a//b, a%b 
quotient, remainder = foo(70, 12)
print (quotient)
print (remainder)

5
10


In [64]:
def foo(a,b):
  return a+b
foo(1,2)

3

### Multiple returns
A function with multiple return variables actually returns a tuple. 

In [5]:
def foo(x):
    return x+1, x+2

print (foo(2))
print (foo(2)[0])
print (foo(2)[1])

(3, 4)
3
4


### positional (mandatory) and optional (keyword) arguments 

Positional arguments cannot have default value. Hence, they are mandatory. 
Keyword arguments have default value. Hence, you can ignore them when calling the function or override the default value. 

In [87]:
def foo(x, y=1, z=3):
    return x+y+z

print (foo(2))
print (foo(2, 5))
print (foo(2, z=9, y=100))

6
10
111


In [88]:
kwargs = {"y": 1, "z": 3}
print (foo(2, **kwargs))

6


In [None]:
def bar(a, b, third=1, fourth=0):
    if third: # not the same as if third != None
        return a+b+third
    else:
        return a+b
print (bar(1,2, third=None))
print (bar(1,2,  fourth=1000, third=-1))

3
2


### * and **

In [62]:
# def bar2(a,b,c,d,e,f, v=1, w=2, q=3, l=5): 
def bar2(*must, **optional):
    total = 0 
    for i in must:
        total += i
    print (total)
    for name, value in optional.items():
        total += value
    return total 

# print (bar2(1,2,3,4, v=1 , w=2, ))

def bar3(*args, **kwargs):
    print (args)
    print (kwargs)

print (bar3(1,2,3,4, v=1 , w=2, ))

(1, 2, 3, 4)
{'v': 1, 'w': 2}
None


Behind the scene: positionary arguments are packed into a tuple and keyword arguments are packed into a dictionary.  

In [12]:
bar2(*(1,2), **{"x":3, "y":4})

3


10

### nested functions 

In [13]:
def foo(a,b):
    def  bar(a,b):
        return a//b 
    def  hoo(a,b):
        return a%b 
    return bar(a,b), hoo(a,b)
foo(123,7)
hoo(123, 7)

NameError: name 'hoo' is not defined

### Tying hint
But note that Python interpreter does not enforce typing. You need to use other tools. 


In [65]:
def foo(a:int, b:int) -> int:
    return a + b
foo(1,2)
foo("1233", "345")

False

## Functional programming 
### `map` and dictionary/list/tuple comprehension

In [68]:
def sqrt(a):
    return a**2 

print ( [  sqrt(x)    for x in range(5)   ] )

print (list( map(sqrt, [1,2,3])))

[0, 1, 4, 9, 16]
[1, 4, 9]


### lambda (anonymous) function


In [93]:
print (
    (lambda x, y : x+y) (10, 11), 
    (lambda x, y : x+y) ("hello, ", "world")
)
# (lambda x, y : x+y) ("hello", 1)

21 hello, world


### `reduce`

In [95]:
def plus(a,b): 
    return a+b

import functools 
print (functools.reduce (plus,  [1,2,3,4]))
print (functools.reduce (lambda x, y: (x,y) ,  [1,2,3,4]))

10
(((1, 2), 3), 4)


### dot product using map and reduce 

In [96]:
a=[1,2,3]; b = [1,2,2]

import operator # python built-in 
print (list(map(operator.mul, a, b )))
print (functools.reduce(operator.add, map(operator.mul, a, b ) ))

[1, 4, 6]
11


### starmap

In [14]:
def foo(a, b):
    return a+b

# standard map 
A, B = [1,2,3,4], [0.1, 0.2, 0.3, 0.4]
print (list(map(foo, A, B)))

# star map 
import itertools
print (list(zip(A, B))) # pair elements of A and B together 
list(itertools.starmap(foo, zip(A, B)))

[1.1, 2.2, 3.3, 4.4]
[(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)]


[1.1, 2.2, 3.3, 4.4]

In [None]:
print (list(zip([1,2,], [3,4,5])))
print (list(map(foo, [1,3], range(10, 15), range(100, 105))))

[(1, 3), (2, 4)]
[111, 115]


### What if I wanna set the same value for keyword arguments? `partial`


In [97]:
def  foo(a, b):
    print (a,b)
foo2 = functools.partial(foo, b=10)
list(map(foo2, [1,2,3]))

1 10
2 10
3 10


[None, None, None]

In [None]:
print ( list(map(lambda x:x**0.5, [1,2,34,89]))) 
print (list(map(lambda x, y: x+y , [1,2,3,4], [5,6,7,8])))
print (list(map(lambda x, y: (x, y) , [1,2,3,4], [5,6,7,8])))
print (list(map(lambda x, y, z: x+y+z , [1,2,3,4], [5,6,7,8], [-1, -2, -3, -4])))

[1.0, 1.4142135623730951, 5.830951894845301, 9.433981132056603]
[6, 8, 10, 12]
[(1, 5), (2, 6), (3, 7), (4, 8)]
[5, 6, 7, 8]


## Exception handling 

In [100]:
def foo(a,b):
    try :
        print (a/b)
    except TypeError: 
        print  ("wrong type")
    except ZeroDivisionError: 
        print ("you don't know math")
    except ValueError:
        pass 

foo(1, 2)
foo("a", "b")
foo(1, 0)

0.5


## `eval` and `exec` 

In [104]:
x = 1 

print (eval("1+x")) # an expression only, no statements, return the value of the expression
y = eval("5+2")
print (y)

z = exec("a=1;b=2; a+b")  # no return 
print (a)
print (b)

2
7
1
2


## Classes and OOP

Finetuning models means you inherit the base class and override some methods. https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py

In [107]:
class experiments: 
    def __init__(self, url, epochs): # constructor 
        self.url = url
        self.epochs = epochs

    def one_more_epoch(self):
        self.epochs += 1

    def __str__(self): # Let's you print the inside of an object
        return f"Run experiment {self.epochs} times from {self.url}"

    # def __repr__(self):
    #     return f"Run experiment {self.epochs} times from {self.url}"
    
    def run(self):
            self.one_more_epoch()
            print (self.epochs)

exp1 = experiments("kaggle.com", 2)
exp1.run()
print (exp1)

3
Run experiment 3 times from kaggle.com


In [None]:
class advancedExperiments(experiments): # inheritance 
    def __init__(self, url, epochs):
        super().__init__(url, epochs)  ## note the super 
    def runrun(self):
        for epoch in range(self.epochs):
            print (self.url)
            print ("thjis is advanced")

exp2 = advancedExperiments("loser.rocks", 4)
exp2.runrun()

loser.rocks
thjis is advanced
loser.rocks
thjis is advanced
loser.rocks
thjis is advanced
loser.rocks
thjis is advanced


## Parallelization

Another lib: embaraassingly parallelizable lib `joblib` https://joblib.readthedocs.io/en/latest/parallel.html

In [80]:
def foo(x):
    x += 0.1
    return x*x

import multiprocessing
with multiprocessing.Pool() as p:
    p.map(foo, range(10000))

<multiprocessing.pool.Pool state=TERMINATE pool_size=8>


## Selected third-party libraries
* Beautifulsoup [example](https://github.com/forrestbao/pebble/blob/master/webcrawler/ACL2019_download.ipynb)
* NumPy/Scipy
* Matplotlib
* ipywidget
* Matplotlib, `matplotlib.animation`

# Numpy

In [48]:
import numpy 

In [83]:
L = [1, [2,3 ]]
L = [[1, 2], [3,4]]

print (numpy.array(L))


a= numpy.array([[1,2], 
                [3,4]])
a
type(a)

[[1 2]
 [3 4]]


numpy.ndarray

In [None]:
a.transpose()

array([[1, 3],
       [2, 4]])

In [None]:
numpy.linalg.inv(a)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [None]:
a*a # hadamard product or element-wise product

array([[ 1,  4],
       [ 9, 16]])

In [None]:
a@a
# or 
numpy.matmul(a, a)

array([[ 7, 10],
       [15, 22]])

## axes system in Numpy

In [84]:
print (a)
print (a.max(axis=0))
print (a.max(axis=1))

[[1 2]
 [3 4]]
[3 4]
[2 4]


In [None]:
a= numpy.array([
                [[1,2], 
                 [3,4]], 
                [[5,6], 
                 [7,8]]
               ])
print (a.max(axis=0))
print (a.max(axis=1))
print (a.max(axis=2))

[[5 6]
 [7 8]]
[[3 4]
 [7 8]]
[[2 4]
 [6 8]]


In [29]:
a= numpy.array([[1,2], 
                [3,4]])
print(a)
print (numpy.vstack((a,a))) 
print (numpy.hstack((a,a))) 
print (numpy.stack((a,a), axis=1))

[[1 2]
 [3 4]]
[[1 2]
 [3 4]
 [1 2]
 [3 4]]
[[1 2 1 2]
 [3 4 3 4]]
[[[1 2]
  [1 2]]

 [[3 4]
  [3 4]]]


## Properly check whether two numpy.ndarray s are the same

In [30]:
numpy.array_equal(a,a)

True

In [31]:
a==a

array([[ True,  True],
       [ True,  True]])

## broadcast
Note in the example below that `a` is a 2D numpy array while `b` is 1D. Mathematically, the product between the two are not defined. 

In [21]:
import numpy
a= numpy.array([[1,2,5], 
                [3,4,6]])
b = numpy.array([-1, 1, 0])

a*b


array([[-1,  2,  0],
       [-3,  4,  0]])

In [86]:
b=numpy.array(range(30))
print (b)
c = b.reshape(5,2,3)
print (c)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]
[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]

 [[12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]]

 [[24 25 26]
  [27 28 29]]]


### Filter elements using broadcasted comparitors 

In [49]:
X = numpy.array(
    [1,0,1,1,0]
)
print (X==1)

[ True False  True  True False]


In [22]:
# ravel a numpy array 

a = numpy.array([[1,2,3], [4,5,6]])
print (a)
print (a.ravel())
print (a.flatten())

[[1 2 3]
 [4 5 6]]
[1 2 3 4 5 6]
[1 2 3 4 5 6]


In [28]:
# squeeze a numpy array

a = numpy.array([[[1,2,3], [4,5,6]]])
print (a)
print (a.shape) 
print (a.squeeze().shape)
print (a.squeeze())


(1, 2, 3)
(2, 3)
[[1 2 3]
 [4 5 6]]


## Sliding over data 

See https://numpy.org/doc/stable/reference/generated/numpy.lib.stride_tricks.sliding_window_view.html

## Think matrixly

If you write for-loops when dealing with numpy arrays, something might be wrong. 

In [34]:
a=numpy.array([[1,2], [3,4]])

# Do not do this
row_sum = 0 
for i in range(len(a)):
    row_sum += a[i].max()

print (row_sum)


# Do this: 
sum(a.max(axis=1))

6


6

In [6]:
a=numpy.array(
    [
        [
            [1,2], 
            [3,4]
        ], 
        [
            [5,6],
            [7,8]
        ]
    ]
)

In [12]:
a.max(axis=1)

array([2, 4])

In [49]:
a=numpy.array([[1,2], [3,4], [5,6]])
numpy.inner(a,a)

array([[ 5, 11, 17],
       [11, 25, 39],
       [17, 39, 61]])

In [5]:
x = numpy.array(
    [1,2,3],
)
y = numpy.array(
    [4,5,6],
) 

# dot product of x and y 
print (numpy.dot(x,y))

# hadamard product of x and y
print (x*y)

32
32


# Einstein ops lib 

Appeared at ICLR 2022

Numpy has its own Einstein sum functins. But `einops` is more powerful.

In [39]:
X=numpy.array([[[1,2,3], [4,5,6]], 
               [[7,8,9], [10,11,12]], 
               [[13,14,15], [16,17,18]], 
               [[19,20,21], [22,23,24]]
              ])

In [34]:
import einops
einops.rearrange(X, 
            'i j k -> i (j k)')

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])

In [36]:
einops.rearrange(X, 
        'i j k -> i (k j)')

array([[ 1,  4,  2,  5,  3,  6],
       [ 7, 10,  8, 11,  9, 12],
       [13, 16, 14, 17, 15, 18],
       [19, 22, 20, 23, 21, 24]])

In [38]:
# The beautiful way
print (numpy.einsum('ijk -> i', X))
print ("\n"*5)

# The ugly way
print (numpy.sum( 
          numpy.sum(X, axis=2), 
          axis=1
      ))

[ 21  57  93 129]






[ 21  57  93 129]
