# Python - intermediate

- profiling
- memory management
- threading
- async.io
- socket
- regular expression

## Profiling

In [1]:
import cProfile
import functools

def fibo(n):
    if n == 1:
        return 1
    if n == 2:
        return 1
    return fibo(n - 2) + fibo(n - 1)

# with cache
cache = {}

def fibo_with_cache(n):
    if n in cache:
        return cache[n]
    if n == 1:
        v = 1
    elif n == 2:
        v = 2
    else:
        v = fibo_with_cache(n - 2) + fibo_with_cache(n - 1)
    cache[n] = v
    return v

@functools.lru_cache(maxsize=50)
def fibo2(n):
    if n == 1:
        return 1
    if n == 2:
        return 1
    return fibo2(n - 2) + fibo2(n - 1)

cProfile.run('fibo(30)')
cProfile.run('fibo_with_cache(30)')
cProfile.run('fibo2(30)')

         1664082 function calls (4 primitive calls) in 2.366 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
1664079/1    2.366    0.000    2.366    2.366 <ipython-input-1-35f1d2d7da4e>:4(fibo)
        1    0.000    0.000    2.366    2.366 <string>:1(<module>)
        1    0.000    0.000    2.366    2.366 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


         60 function calls (4 primitive calls) in 0.000 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     57/1    0.000    0.000    0.000    0.000 <ipython-input-1-35f1d2d7da4e>:14(fibo_with_cache)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' object

## Memory management

Some of the below are implmentation dependent, and they are not the part of Python specification.

### Details on objects

- mutable vs immutable objects
- replacing vs copying
- `is` vs `==`
- assignment statement as rebinding
- interning
- "call by sharing"
  - reference to an object is passed in
  - argument can be reassigned in the function but it's not visible to the caller
  
details:

- Simple assignment does not create copies.
- Augmented assignment with += or *= creates new objects if the lefthand variable is bound to an immutable object, but may modify a mutable object in place.
- Assigning a new value to an existing variable does not change the object previously bound to it. This is called a rebinding: the variable is now bound to a different object. If that variable was the last reference to the previous object, that object will be garbage collected.
- Function parameters are passed as aliases, which means the function may change any mutable object received as an argument. There is no way to prevent this, except making local copies or using immutable objects (e.g., passing a tuple instead of a list).
- Using mutable objects as default values for function parameters is dangerous because if the parameters are changed in place, then the default is changed, affecting every future call that relies on the default.

### IDs

In [2]:
print(id(-6))
print(id(-5))
print(id(1))
print(id(2-1))
print(id(256))
print(id(257))

140621930257552
10914304
10914496
10914496
10922656
140621930257680


In [3]:
print(id(-6))
print(id(-5))
print(id(1))
print(id(2-1))
print(id(256))
print(id(257))

140621930257552
10914304
10914496
10914496
10922656
140621930257456


In [4]:
print(id({}))
print(id({}))

140621930106744
140621930106600


In [5]:
print(id({}))
print(id({}))
print(id({}))
print()
print(id(()))
print(id(()))
print(id(()))

140621930044296
140621930044800
140621929948792

140622118191176
140622118191176
140622118191176


In [6]:
a = (1, 2, 3)
print(id(a))
a += (4, 5)
print(id(a))
b = [1, 2, 3]
print(id(b))
b += [4, 5]
print(id(b))

140621973653040
140621930046568
140621930952904
140621930952904


### Garbage collection

In [7]:
import sys
print(sys.getrefcount(1))
a=1
print(sys.getrefcount(1))

3385
3385


In [8]:
class A:
    pass
a = A()
print(sys.getrefcount(a))
b = a
print(sys.getrefcount(a))
# one for `a`, one for `b`, and one for an argument of sys.getrefcount()

2
3


In [9]:
import gc
print("gc.isenabled()", gc.isenabled())
print("gc.get_threshold()", gc.get_threshold())
print("gc.get_count()", gc.get_count())

gc.isenabled() True
gc.get_threshold() (700, 10, 10)
gc.get_count() (365, 4, 3)


In [10]:
gc.collect()

0

In [11]:
print("gc.get_count()", gc.get_count())

gc.get_count() (379, 0, 0)


### Finalizer

In [12]:
import weakref

s1 = {1, 2, 3, 4}
s2 = s1

In [13]:
def bye():
    print("Gone with the wind...")
ender = weakref.finalize(s1, bye)
ender.alive

True

In [14]:
del s1
ender.alive

True

In [15]:
s2 = "spam"

Gone with the wind...


In [16]:
ender.alive

False

### Weak references

In [17]:
import sys
import weakref
import gc

def bye():
    print("Gone with the wind...")

my_object = {-1, -2, -5}
print(sys.getrefcount(my_object))
weakref.finalize(my_object, bye)
print(sys.getrefcount(my_object))

2
2


In [18]:
wref = weakref.ref(my_object)

In [19]:
print(sys.getrefcount(my_object))
print(sys.getrefcount(wref()))

2
2


In [20]:
wref()

{-5, -2, -1}

In [21]:
print(id(my_object))
print(id(wref()))

140621989054280
140621989054280


In [22]:
my_object = {1,2,3,4,5}

In [23]:
print(wref())

{-5, -1, -2}


In [24]:
gc.collect()
# in python interpreter "Gone with the wind.." is printed.

0

In [25]:
wref()is None
# in python interpreter "True" is printed

False

## Socket programming

In [26]:
import socket
import ssl

In [27]:
HOST = 'example.com'  # The server's hostname or IP address
PORT = 80                # The port used by the server

In [28]:
IP = socket.gethostbyname(HOST)
print(IP)

93.184.216.34


In [29]:
MESSAGE = \
b"""
GET / HTTP/1.1
User-Agent: Mozilla/4.0 (compatible; MSIE5.01; Windows NT)
Host: example.com
Accept-Language: en-us
Accept-Encoding: identity
Connection: keep-alive
Cache-Control: private, max-age=0, no-cache
Pragma: no-cache
Expires: Wed, 21 Oct 2015 07:28:00 GMT

"""

In [30]:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    s.connect((HOST, PORT))
    s.sendall(MESSAGE)
    data = s.recv(1024*1024)
    print(data)

print(data.decode('UTF-8'))

b'HTTP/1.1 200 OK\r\nAccept-Ranges: bytes\r\nCache-Control: max-age=604800\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Wed, 20 Nov 2019 05:13:31 GMT\r\nEtag: "3147526947"\r\nExpires: Wed, 27 Nov 2019 05:13:31 GMT\r\nLast-Modified: Thu, 17 Oct 2019 07:18:26 GMT\r\nServer: ECS (sjc/4E73)\r\nVary: Accept-Encoding\r\nX-Cache: HIT\r\nContent-Length: 1256\r\n\r\n<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-colo

In [31]:
import socket
import ssl

MESSAGE = \
b"""
GET / HTTP/1.1
User-Agent: Mozilla/4.0 (compatible; MSIE5.01; Windows NT)
Host: google.com
Accept-Language: en-us
Accept-Encoding: identity
Connection: keep-alive
Cache-Control: private, max-age=0, no-cache
Pragma: no-cache
Expires: Wed, 21 Oct 2015 07:28:00 GMT

"""
HOST = "google.com"
PORT = 443         # The port used by the server

context = ssl.create_default_context()

with socket.create_connection((HOST, PORT)) as sock:
    with context.wrap_socket(sock, server_hostname=HOST) as ssock:
        ssock.sendall(MESSAGE)
        data = ssock.recv(1024)

print(data.decode('UTF-8'))

HTTP/1.1 301 Moved Permanently
Location: https://www.google.com/
Content-Type: text/html; charset=UTF-8
Date: Wed, 20 Nov 2019 05:13:32 GMT
Expires: Fri, 20 Dec 2019 05:13:32 GMT
Cache-Control: public, max-age=2592000
Server: gws
Content-Length: 220
X-XSS-Protection: 0
X-Frame-Options: SAMEORIGIN
Alt-Svc: quic=":443"; ma=2592000; v="46,43",h3-Q050=":443"; ma=2592000,h3-Q049=":443"; ma=2592000,h3-Q048=":443"; ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000

<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>301 Moved</TITLE></HEAD><BODY>
<H1>301 Moved</H1>
The document has moved
<A HREF="https://www.google.com/">here</A>.
</BODY></HTML>



## Threading

In [32]:
import concurrent.futures
import time

N_THREADS = 5
fake_data = [x for x in range(N_THREADS)]
counter = 0

def increment_counter(fake_value):
    global counter
    for _ in range(1000000):
        counter += 1

with concurrent.futures.ThreadPoolExecutor(max_workers=N_THREADS) as executor:
    executor.map(increment_counter, fake_data)

print("counter: {}".format(counter))

counter: 2719992


In [33]:
import dis

dis.dis(increment_counter)

 10           0 SETUP_LOOP              24 (to 26)
              2 LOAD_GLOBAL              0 (range)
              4 LOAD_CONST               1 (1000000)
              6 CALL_FUNCTION            1
              8 GET_ITER
        >>   10 FOR_ITER                12 (to 24)
             12 STORE_FAST               1 (_)

 11          14 LOAD_GLOBAL              1 (counter)
             16 LOAD_CONST               2 (1)
             18 INPLACE_ADD
             20 STORE_GLOBAL             1 (counter)
             22 JUMP_ABSOLUTE           10
        >>   24 POP_BLOCK
        >>   26 LOAD_CONST               0 (None)
             28 RETURN_VALUE


In [34]:

import concurrent.futures
import threading
import time

lock = threading.Lock()

N_THREADS = 5
fake_data = [x for x in range(N_THREADS)]
counter = 0

def increment_counter(fake_value):
    global counter
    for _ in range(1000000):
        lock.acquire()
        counter += 1
        lock.release()

with concurrent.futures.ThreadPoolExecutor(max_workers=N_THREADS) as executor:
    executor.map(increment_counter, fake_data)

print("counter: {}".format(counter))

counter: 5000000


In [35]:
import threading
import time
import logging

logging.basicConfig(level=logging.DEBUG,
                    format='(%(threadName)-9s) %(message)s',)

def consumer(cv):
    logging.debug('Consumer thread started ...')
    with cv:
        logging.debug('Consumer waiting ...')
        cv.wait()
        logging.debug('Consumer consumed the resource')

def producer(cv):
    logging.debug('Producer thread started ...')
    with cv:
        logging.debug('Making resource available')
        logging.debug('Notifying to all consumers')
        cv.notifyAll()

if __name__ == '__main__':
    condition = threading.Condition()
    cs1 = threading.Thread(name='consumer1', target=consumer, args=(condition,))
    cs2 = threading.Thread(name='consumer2', target=consumer, args=(condition,))
    pd = threading.Thread(name='producer', target=producer, args=(condition,))

    cs1.start()
    time.sleep(2)
    cs2.start()
    time.sleep(2)
    pd.start()

(consumer1) Consumer thread started ...
(consumer1) Consumer waiting ...
(consumer2) Consumer thread started ...
(consumer2) Consumer waiting ...
(producer ) Producer thread started ...
(producer ) Making resource available
(producer ) Notifying to all consumers
(consumer2) Consumer consumed the resource
(consumer1) Consumer consumed the resource


## asyncio

In [36]:
# apply nest_asyncio
# this is only for jupyter notebook environment
import nest_asyncio
nest_asyncio.apply()

In [37]:
import asyncio

counter = 0

async def increment_counter(id):
    global counter
    print(f"before sleep id={id}")
    await asyncio.sleep(1)
    counter += 1
    print(f"after sleep id={id}")

async def run_all():
    await asyncio.gather(*[increment_counter(i) for i in range(10)])

print(f"before run counter={counter}")

loop = asyncio.get_event_loop()
loop.run_until_complete(run_all())

print(f"after run counter={counter}")


before run counter=0
before sleep id=8
before sleep id=5
before sleep id=0
before sleep id=3
before sleep id=6
before sleep id=7
before sleep id=2
before sleep id=4
before sleep id=1
before sleep id=9
after sleep id=8
after sleep id=5
after sleep id=0
after sleep id=3
after sleep id=6
after sleep id=7
after sleep id=2
after sleep id=4
after sleep id=1
after sleep id=9
after run counter=10


In [38]:
import asyncio

counter = 0

async def increment_counter(id):
    global counter
    print(f"before sleep id={id}")
    await asyncio.sleep(1)
    counter += 1
    raise Exception(id)
    print(f"after sleep id={id}")

async def run_all():
    try:
        results = await asyncio.gather(*[increment_counter(i) for i in range(10)])
    except Exception as e:
        print("Got exception", e)
        pass

print(f"before run counter={counter}")

loop = asyncio.get_event_loop()
loop.run_until_complete(run_all())

print(f"after run counter={counter}")


before run counter=0
before sleep id=8
before sleep id=1
before sleep id=3
before sleep id=0
before sleep id=7
before sleep id=2
before sleep id=4
before sleep id=5
before sleep id=9
before sleep id=6
Got exception 8
after run counter=8


## Regular expressions

In [39]:
import re
text = "abc123,aaa bb cc"

In [40]:
print(re.compile(r"\w+").findall(text))

['abc123', 'aaa', 'bb', 'cc']


In [41]:
print(re.findall(r"\w+", text))

['abc123', 'aaa', 'bb', 'cc']


In [42]:
r2 = re.findall(r"^\w+", text)
print(r2)

['abc123']


In [43]:
email_pattern = r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
emails = [
    "user@example.com",
    "user@example@com",
]
for email in emails:
    result = re.match(email_pattern, email)
    if result:
        print(f"{email} is an email address.")
    else :
        print(f"{email} is NOT an email address.")

user@example.com is an email address.
user@example@com is NOT an email address.


In [44]:
korean_cell_phone_pattern = r"010-\d{4}-\d{4}"
phone_numbers = [
    "010-1234-5678",
    "010-123-456"
]
for num in phone_numbers:
    result = re.match(korean_cell_phone_pattern, num)
    if result:
        print(f"{num} is an phone number.")
    else :
        print(f"{num} is NOT an phone number.")

010-1234-5678 is an phone number.
010-123-456 is NOT an phone number.


In [45]:
import re

text = "My name is Paul. Nice to meet you."
text2 = re.sub(r"My name is (.+). Nice to meet you.", r"Hello, \1. Good to see you, \1.", text)
print(text2)

Hello, Paul. Good to see you, Paul.


## References

- https://docs.python.org/3/library/profile.html
- https://realpython.com/python-memory-management/
- https://stackify.com/python-garbage-collection/
- https://realpython.com/python-sockets/
- https://realpython.com/intro-to-python-threading/
- https://realpython.com/python-concurrency/