#### Some tips and ideas to make python code run faster

Use built-in functions and libraries, they are tested and optimzied  
min, max, all, map
math.factorial, math.comb, math.fsum




https://medium.com/codex/say-goodbye-to-loops-in-python-and-welcome-vectorization-e4df66615a52   
https://www.kdnuggets.com/2021/06/make-python-code-run-incredibly-fast.html   
https://yanick-andrade.medium.com/enhancing-performance-in-python-a29bd06b7ad0  

In [1]:
# Use built-in functions and libraries, they are tested and optimzied
import string 
def upper_basic(n):
    newList = []
    for w in string.ascii_lowercase*n:
        newList.append(w.upper())        

In [2]:
%timeit upper_basic(1000)

4.12 ms ± 23.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [3]:
def upper_o2(n):
    newList = map(str.upper, string.ascii_lowercase*n)

In [72]:
%timeit upper_o2(1000)

1.22 µs ± 18.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [None]:
# performance difference: 2.95 ms vs 1.21 us

#### prefer comprehension list over loop

In [4]:
def list_loop(n):
    new_list = []
    for i in range(1, n):
        if i % 2 == 0:
            new_list.append(i**2)
            

In [5]:
%timeit list_loop(1001)

304 µs ± 7.03 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [6]:
def list_comp(n):
    new_list = [i**2 for i in range(1, n) if i%2 == 0]
    

In [76]:
%timeit list_comp(1001)

256 µs ± 350 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


#### use prop import

In [10]:
import math 
def import_module(n):
    for i in range(n):
        value = math.sqrt(n)

In [11]:
%timeit import_module(1001)

137 µs ± 2.21 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [12]:
del math

In [13]:
from math import sqrt 
def import_module_name(n):
    for i in range(n):
        value = sqrt(n)

In [14]:
%timeit import_module_name(1001)

89.8 µs ± 773 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [15]:
# preformance difference: 121 us vs 86 us

#### string concateation
+= operator creates new string
"".join is different and faster

In [16]:
def string_cat_1(n):
    s = ""
    for i in range(1, 101):
        s += string.ascii_letters
        

In [17]:
def string_cat_2(n):
    s = ""
    for i in range(1, 101):
        s.join(string.ascii_letters)

In [18]:
%timeit string_cat_1(101)


18.8 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [19]:
%timeit string_cat_2(101)

129 µs ± 499 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#### Use vectorization  
apply operations to all elements of an array in one go  
"for" loop manipulates one row at a time


In [20]:
def find_sum(n):
    total = 0
    for i in range(n):
        total += i

In [21]:
%timeit find_sum(1_000_000)

74.9 ms ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
import numpy as np
def find_sum_vector(n):
    total = 0
    total = np.sum(np.arange(n))

In [23]:
%timeit find_sum_vector(1_000_000)

2.08 ms ± 78.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
(67.8-1.84)/67.8

0.9728613569321534

#### create dataframe  
create a 5 million row x 4 columns filled with random values


In [25]:
import numpy as np 
import pandas as pd
df = pd.DataFrame(np.random.randint(0, 50, size=(5_000, 4)), columns=('a','b','c','d'))
df.shape

(5000, 4)

In [26]:
df.head()

Unnamed: 0,a,b,c,d
0,6,31,13,46
1,30,11,5,31
2,39,29,15,17
3,14,43,24,27
4,30,39,8,13


In [27]:
def create_new_col(df):
    for idx, row in df.iterrows():
        df.at[idx, 'ratio'] = 100 *  (row['d'] / (1+(row['c'])))

In [28]:
%timeit create_new_col(df)

482 ms ± 16.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
def create_new_vec(df):
    df['ratio'] = 100 *  (df['d'] / (1+(df['c'])))

In [30]:
%timeit create_new_vec(df)

513 µs ± 43.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [31]:
# preformance difference: 439 ms vs 501 ms

In [32]:
(439*1000 - 501) / 439

998.8587699316629

#### Dataframe if - else statement, create new column based on conditions

In [33]:
# use basic if / else statements
def df_if_new(df):
    for idx, row in df.iterrows():
        if row.a == 0:
            df.at[idx, 'e'] = row.d
        elif (row.a <= 25) & (row.a > 0):
            df.at[idx, 'e'] = (row.b) - (row.c)
        else:
            df.at[idx, 'e'] = row.b + row.c

In [34]:
%timeit df_if_new(df)

656 ms ± 28.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
# use vectorization
def df_if_vec(df):
    df['e'] = df['b'] + df['c']
    df.loc[df['a'] < 25, 'e'] = df['b'] - df['c']
    df.loc[df['a'] == 0, 'e'] = df['d']
    

In [36]:
%timeit df_if_vec(df)

2.03 ms ± 34.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [37]:
(598 - 2.02)/598

0.9966220735785953

#### Deep learning multi-linear regression calculations  
$$ y = m{_1}x{_1} + m{_2}x{_2} +m{_3}x{_3} +m{_4}x{_4} +m{_5}x{_1} + c $$

Use loop for million of rows of calculations is slow  
Vectorization is the  optimal solution

In [38]:
# create random data 
import numpy as np
m = np.random.rand(1,5)
n = np.random.rand(100000,5)
m.shape, n.shape


((1, 5), (100000, 5))

In [39]:
# use loop for calculations
import numpy as np

def loop_reg_sum(col, row):
    m = np.random.rand(1,col)
    n = np.random.rand(row,col)
    result = []
    for i in range(row):
        total = 0 
        for j in range(col):
            total += n[j][j]*m[0][j]
#         print(i, total)
        result.append(total)


In [40]:
%timeit loop_reg_sum(5, 100_000)

407 ms ± 7.12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
# use vectorization 
def vec_reg_sum(col, row):
    m = np.random.rand(1,col)
    n = np.random.rand(row,col)    
    result = np.dot(n, m.T)
    

In [42]:
%timeit vec_reg_sum(5, 100_000)

6.45 ms ± 270 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [43]:
## the fastest way to loop in python
## sum the numbers from 0 to n-1 in different ways

In [44]:
# use while (this is slow)
def while_loop(n = 1_000_000):
    i = 0
    s = 0
    while i < n:    # python implementation
        s += i
        i +=  1 
    return s


In [45]:
# use for (better than  while)
def for_loop(n = 1_000_000):
    s = 0
    for i in range(n):     # c implementation
        s += i
        return s

In [46]:
# add unimpacted python operation to simulate python operation 
def for_loop_with_increment(n=1_000_000):
    s = 0
    for i in range(n):   # i increments in range
        s += i
        i += 1    # redundant python operation 
    return s 

In [47]:
# loop with test operation
def for_loop_with_test(n=1_000_000):
    s = 0
    for i in range(n):
        if i < n: pass   # redundant check, make it slow
        s += i
    return s

In [48]:
# with both redundant += and < operations
def for_loop_with_increment_and_test(n=1_000_000):
    s = 0
    for i in range(n):
        if i < n: pass    # make it slow
        i += 1    # make it slow
        s += i
    return s

In [49]:
# use built-in sum
def sum_range(n=1_000_000):
    return sum(range(n))

In [50]:
# use numpy (implemented in c, faster)
import numpy 
def sum_numpy(n=1_000_000):
    return numpy.sum(numpy.arange(n))    # this is a one c call, but a whole array is created in memory


In [51]:
# use math knowledge
def sum_math(n=1_000_000):
    return (n * (n-1)) // 2

In [52]:
import timeit

print('while loop\t\t', timeit.timeit(while_loop, number = 1))
print('for loop\t\t', timeit.timeit(for_loop, number = 1))
print('for_loop_with_increment\t\t', timeit.timeit(for_loop_with_increment, number = 1))
print('for_loop_with_test\t\t', timeit.timeit(for_loop_with_test, number = 1))
print('for_loop_with_increment_and_test\t\t', timeit.timeit(for_loop_with_increment_and_test, number = 1))
print('sum_range\t\t', timeit.timeit(sum_range, number = 1))
print('sum_numpy\t\t', timeit.timeit(sum_numpy, number = 1))
print('sum_math\t\t', timeit.timeit(sum_math, number = 1))

# python programming consideration
# use math formula
# use c implementation
# use built-in function, sum, map ... which loops for you
# for or while loop



while loop		 0.1154504309999993
for loop		 3.713000069183181e-06
for_loop_with_increment		 0.11514549399998941
for_loop_with_test		 0.0975002409999206
for_loop_with_increment_and_test		 0.1432448260000001
sum_range		 0.022303372999999738
sum_numpy		 0.005475752999927863
sum_math		 2.3169999394667684e-06


In [53]:
## memoization or cache to optimize
# useful for recursive functions, or operations used over and over again
# you don't want to repeat to calculate values again

In [54]:
# use cache dict
from time import perf_counter
from functools import wraps

def memoize(func):
    cache = {}
    
    @wraps(func)
    def wrapper(*args, **kwargs):
        key = str(args) + str(kwargs)
        if key not in cache:
            cache[key] = func(*args, **kwargs)
        return cache[key]
    return wrapper


In [55]:
# fibonacci using memoize 
def fibonacci_plain(n=100) -> int:
    if n < 2:
        return n
    return fibonacci_plain(n-1) + fibonacci_plain(n-2)

In [56]:
# no memoization call, very slow, cpu humming, 20 mins still running, killed this cell
start = perf_counter()
fibonacci_plain()
end = perf_counter()
print(end-start)

KeyboardInterrupt: 

In [57]:
# fibonacci using memoize 
@memoize
def fibonacci(n=1000) -> int:
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

In [58]:
# get result instantly
print('fibonacci with memorize\t\t', timeit.timeit(fibonacci, number = 1))

fibonacci with memorize		 0.0036856550000265997


In [59]:
# use memoize fib function
start = perf_counter()
fibonacci(1000)
end = perf_counter()
print(end-start)

7.581699992442736e-05


In [60]:
## httpx 
## asyncio
import requests 
import re
import time 

urls = {
    "1": "https://google.com",
    "2": "https://yahoo.com",
    "3": "https://microsoft.com",
    "4": "https://google.com",
    "5": "https://apple.com",
    "6": "https://ibm.com",
    "7": "https://amazon.com",
    "8": "https://twitter.com",
    "9": "https://tiktok.com",
    "10": "https://oracle.com",
    "11": "https://intel.com",
    "12": "https://tesla.com",
    "13": "https://nasa.com",
    "14": "https://ebay.com",
    "15": "https://wikipedia.com"
}


def count_words_in_web_page():
    outputs = []
    for url in urls.values():
        print(url)
        outputs = outputs + [requests.get(url).text]    # get text, did not use append, synchronous call
        #print(outputs)

    count_https = []
    count_http = []        
    for output in outputs:
        count_https += re.findall("https://", output)    # text processing, not use pre-compiled re
        count_http += re.findall("http://", output)
    
    print(len(count_https), len(count_http))
    
# index = 0
# while count_https[index]:
#     if index >= len(count_https):
#         break
#     index +=  1

start = time.perf_counter()
count_words_in_web_page()  
elapsed = time.perf_counter() - start
print(f'{elapsed:.2f} seconds')


https://google.com
https://yahoo.com
https://microsoft.com
https://google.com
https://apple.com
https://ibm.com
https://amazon.com
https://twitter.com
https://tiktok.com
https://oracle.com
https://intel.com
https://tesla.com
https://nasa.com
https://ebay.com
https://wikipedia.com
3071 732
10.50 seconds


In [72]:
# while loop is slow 
a = [1,2,3]
l = 0 
while a:
    if l >= len(a)-2:
        break
    a[l]
    l+=1

In [None]:
# let's do some profiling
import cProfile
import pstats 

with cProfile.Profile() as pr:
    count_words_in_web_page()
    
stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)    # which call takes most of the time, tottime
stats.print_stats()
stats.dump_stats(filename='output.prof')

In [None]:
!pip install snakeviz

In [76]:
!which snakeviz

/Users/user/opt/anaconda3/bin/snakeviz


In [77]:
!snakeviz ./output.prof

snakeviz web server started on 127.0.0.1:8080; enter Ctrl-C to exit
http://127.0.0.1:8080/snakeviz/%2FUsers%2Fuser%2FDocuments%2Frepo%2Fpyclopedia%2Fperformance_tricks%2Foutput.prof
^C

Bye!


In [78]:
## a improved version 
## using httpx and asyncio

# import requests 
import re
import time 

import httpx  # drop-in replacement for request, including asynchronous calls
import asyncio 


urls = {
    "1": "https://google.com",
    "2": "https://yahoo.com",
    "3": "https://microsoft.com",
    "4": "https://google.com",
    "5": "https://apple.com",
    "6": "https://ibm.com",
    "7": "https://amazon.com",
    "8": "https://twitter.com",
    "9": "https://tiktok.com",
    "10": "https://oracle.com",
    "11": "https://intel.com",
    "12": "https://tesla.com",
    "13": "https://nasa.com",
    "14": "https://ebay.com",
    "15": "https://wikipedia.com"
}

# mark as async
async def count_words_in_web_page_async():
    outputs = []
    
    async with httpx.AsyncClient() as client:
        tasks = (client.get(url) for url in urls.values())
        reqs = await asyncio.gather(*tasks)    # waits for task, but await till all donee
        
        outputs = [req.text for req in reqs]
        #print(outputs)
        
    count_https, count_http =[], []
    for output in outputs:
        count_https += re.findall("https://", output)    # text processing, not use pre-compiled re
        count_http += re.findall("http://", output)
#     print(count_https)
#     print(count_http)


start = time.perf_counter()
await (count_words_in_web_page_async())   # schedule func to run
# asyncio.run(count_words_in_web_page_async())   # for python>3.7 and ipython < 7.0
elapsed = time.perf_counter() - start
print(f'{elapsed:.2f} seconds')



1.03 seconds


In [None]:
# let's do some profiling
import cProfile
import pstats 

with cProfile.Profile() as pr:
    await count_words_in_web_page_async()
    
stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)    # which call takes most of the time, tottime
stats.print_stats()
stats.dump_stats(filename='output_async.prof')

In [80]:
# start a web page 
!snakeviz ./output_async.prof

snakeviz web server started on 127.0.0.1:8080; enter Ctrl-C to exit
http://127.0.0.1:8080/snakeviz/%2FUsers%2Fuser%2FDocuments%2Frepo%2Fpyclopedia%2Fperformance_tricks%2Foutput_async.prof
^C

Bye!


In [81]:
## HTTP requests with async and await

In [None]:
# conda install -c conda-forge unicorn
# somehow not working, keep it as reference
'''
pip install "fastapi[all]"
pip install "unicorn[standard]"

from fastapi import FastAPI
import string 
import random 

app = FastAPI()

@app.get('/')
async def index():
    num = ''.join(random.choices(string.ascii_lowercase, k=5))
    return {'data': num}

@app.get('/items/{item_id}')
async def read(item_id: int):
    return {'item_id': item_id}
'''

In [None]:
## copy / paste below to run a web sever 
## python main.py 
## vim main.py
## !curl http://127.0.0.1:5000/item

'''
(base) user-2:performance_tricks user$ cat main.py 
import random 
import string

from flask import Flask, json, request

api = Flask(__name__)

@api.route('/item')
def get_items():
    name = request.args.get('num')
    print(name)
    num = ''.join(random.choices(string.ascii_lowercase, k=5))
    return {'data': num, 'num': name}

if __name__ == '__main__':
    api.run(debug=True)
(base) user-2:performance_tricks user$ python main.py 

'''
    

In [None]:
# 1, basic http requests
def simple_request(nums):
    for num in nums:
        r = requests.get(f'http://127.0.0.1:5000/item?num={num}')
        #print(r.json())
        
start = perf_counter()
simple_request(range(1, 10000))
stop = perf_counter()
print("time taken:", stop-start)

In [None]:
#1a, use concurrent.future.ThreadPoolExecutor

import concurrent.futures

start = perf_counter()
def async_request(num):
#     r = requests.get(f'http://127.0.0.1:60000/item')
    r = requests.get(f'http://127.0.0.1:5000/item?num={num}')

#     print(r.json())

nums = range(1, 10000)
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(async_request, nums)
stop = perf_counter()
print("time taken:", stop - start)

In [None]:
## make async http server
## use fastapi and httpx

'''
pip install fastapi  
pip install uvicorn


(base) user-2:performance_tricks user$ cat main-httpx.py 
from typing import Union
import random, string

from fastapi import FastAPI

app = FastAPI()


@app.get("/")
async def read_root():
    return {"Hello": "World"}


@app.get("/items/{item_id}")
async def read_item(item_id: int, q: Union[str, None] = None):
    #return {"item_id": item_id, "q": q}
    print(item_id)
    num = ''.join(random.choices(string.ascii_lowercase, k=5))
    return {'data': num, 'num': item_id}
(base) user-2:performance_tricks user$ uvicorn main-httpx:app --reload --port 60000

'''

In [None]:
# 2, use concurrent.future.ThreadPoolExecutor

import concurrent.futures

start = perf_counter()
def async_request(num):
    r = requests.get(f'http://127.0.0.1:60000/items/{num}')
    #print(r.json())

nums = range(1, 10000)
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(async_request, nums)
stop = perf_counter()
print("time taken:", stop - start)

In [None]:
!pip install aiohttp

In [None]:
# 3, use asyncio, 
# run this program on terminal if jupyter notebook not work

"""
(base) user-2:performance_tricks user$ cat asyncio-server.py asyncio-client.py 
from typing import Union
import random, string

from fastapi import FastAPI

app = FastAPI()


@app.get("/")
async def read_root():
    return {"Hello": "World"}


@app.get("/items/{item_id}")
#def read_item(item_id: int, q: Union[str, None] = None):
async def read_item(item_id: int, q: Union[str, None] = None):
    #return {"item_id": item_id, "q": q}
    #print(item_id)
    num = ''.join(random.choices(string.ascii_lowercase, k=5))
    return {'data': num, 'num': item_id}
from time import perf_counter
import asyncio
import aiohttp

async def fetch(s,  url):
    async with s.get(f'http://127.0.0.1:60000/items/{url}') as r:
        if r.status != 200:
            r.raise_for_status()
        return await r.text()


async def fetch_all(s, urls):
    tasks = []
    for url in urls:
        task = asyncio.create_task(fetch(s, url))
        tasks.append(task)

    res = await asyncio.gather(*tasks)
    return res 


async def main():
    urls = range(1, 10000)
    async with aiohttp.ClientSession() as session:
        htmls = await fetch_all(session, urls)
       #         print(htmls)

if __name__ == '__main__':
    start = perf_counter()
    asyncio.run(main())
    stop = perf_counter()
    print('time taken:', stop - start)

(base) user-2:performance_tricks user$ 


(base) user-2:performance_tricks user$ uvicorn asyncio-server:app --reload --port 60000
INFO:     Will watch for changes in these directories: ['/Users/user/Documents/repo/pyclopedia/performance_tricks']
INFO:     Uvicorn running on http://127.0.0.1:60000 (Press CTRL+C to quit)
INFO:     Started reloader process [58792] using WatchFiles
INFO:     Started server process [58794]
INFO:     Waiting for application startup.
INFO:     Application startup complete.


(base) user-2:performance_tricks user$ python asyncio-client.py 
time taken: 5.840775378
(base) user-2:performance_tricks user$ 



(base) user-2:performance_tricks user$ python asyncio-client.py 
time taken: 5.799928292000001


"""

In [None]:
## use aiohttp server and client 
''' 

(base) user-2:performance_tricks user$ cat aiohttp-server.py  aiohttp-client.py 
from aiohttp import web
import random 
import string 

async def handle(request):
    name = request.match_info.get('name', "Anonymous")
    text = "Hello, " + name
    return web.Response(text=text)


#@app.get("/items/{item_id}")
#def read_item(item_id: int, q: Union[str, None] = None):
async def read_item(request):
    #return {"item_id": item_id, "q": q}
    #print(item_id)
    num = ''.join(random.choices(string.ascii_lowercase, k=5))
    return {'data': num, 'num': num}

app = web.Application()
app.add_routes([web.get('/', handle),
                web.get('/item', read_item),
                web.get('/{name}', handle)])


if __name__ == '__main__':
    web.run_app(app)
import aiohttp
import asyncio


from time import perf_counter
import asyncio
import aiohttp

async def fetch(s,  url):
    async with s.get(f'http://127.0.0.1:8080/') as r:
        if r.status != 200:
            r.raise_for_status()
        return await r.text()


async def fetch_all(s, urls):
    tasks = []
    for url in urls:
        task = asyncio.create_task(fetch(s, url))
        tasks.append(task)

    res = await asyncio.gather(*tasks)
    return res

'''
async def main():

    async with aiohttp.ClientSession() as session:
        async with session.get('http://127.0.0.1:8080/') as response:

            print("Status:", response.status)
            print("Content-type:", response.headers['content-type'])

            html = await response.text()
            print("Body:", html[:15], "...")
'''

async def main():
    urls = range(1, 10000)
    async with aiohttp.ClientSession() as session:
        htmls = await fetch_all(session, urls)
       #         print(htmls)


start = perf_counter()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
stop = perf_counter()
print('time taken:', stop - start)



(base) user-2:performance_tricks user$ python aiohttp-server.py 
======== Running on http://0.0.0.0:8080 ========
(Press CTRL+C to quit)


(base) user-2:performance_tricks user$ python aiohttp-client.py 
time taken: 4.371056760999999
    
    
'''    

In [86]:
# use built-in function to speed up code
# slow way
words = ['a', 'b', 'c']
mylist = []

for word in words:
    mylist.append(word.upper())

mylist

['A', 'B', 'C']

In [87]:
# same as above, better way, 
# map(func, *iterables) -> map
# Return an iterator that applies a function to every item of iterable, yielding the results.
# Make an iterator that computes the function using arguments obtained from the iterable. 
mylist = map(str.upper, words)
list(mylist)

['A', 'B', 'C']

In [88]:
# starmap
# Used instead of map() when argument parameters are already grouped in tuples from a single iterable (the data has been “pre-zipped”). 
from itertools import starmap
nums = [(1,2), (3,4), (5,6)]

def multiply(x, y):
    return x * y 

m = starmap(multiply, nums)
print(list(m))

[2, 12, 30]


In [89]:
# starmap with lambda
from itertools import starmap

arr = [(1,2), (3,4), (5,6)]
res = starmap(lambda x, y: x+y, arr)
print(list(res))

[3, 7, 11]


In [None]:
# string concatenation, 
# don't use "+" creates a new string, then copy the  old string
# join() is faster than "+", 
#''.join("abc")

In [92]:
import time 
start = time.perf_counter()
a = ''
for i in range(100_000_000):
    a.join('a')
end = time.perf_counter()
print(end-start)

27.35930863199974


In [93]:
start = time.perf_counter()
a = ''
for i in range(100_000_000):
    a += 'a'
end = time.perf_counter()
print(end-start)    

28.80989407099969


In [94]:
# write a function to find the intersection of 2 unsorted  arrays
# https://www.youtube.com/watch?v=Bl47ETLB_8Y
# Python programmer vs C programmer (speedrun)
# use set functions 
a = [1,2,3,4,5]
b = [5,4,3,6,8]
c = list(set(a) & set(b))
print(*c, c)




3 4 5 [3, 4, 5]


In [95]:
def intersect(a, b):
    return  set(a) & set(b)

In [96]:
def inter(a,b):
    return set(a) & set(b)

# Does not remove duplicates (you could create a remove duplicate function)  
def interNaive(a,b):
    result = []
    for i in a:
        for j in b:
            if (i == j):
                result.append(i)
    return result
    
a = [ 8, 3, 1, 5]
b = [ 5, 3, 4, 6, 9]
print(f"The intersection is: {inter(a,b)}")
print(f"The intersection is: {interNaive(a,b)}")

The intersection is: {3, 5}
The intersection is: [3, 5]


In [97]:
## use dataclass to save coding time and maintenance
# frozen is immutable set for hashing, good for using key and dictionaries
# order like totalordering

import inspect 
from dataclasses import dataclass, field
from pprint import pprint
from typing import List

# automatically includes mamy base methods
@dataclass(frozen=True, order=True) 
class Account:
    account_id: int
    account_name: str = ""
    notes: List[str] = field(default_factory=list)   # to have class values
        
a = Account(100, 'Alice')
print(a)
# check with functions are implemented in the class
pprint(inspect.getmembers(Account, inspect.isfunction))

Account(account_id=100, account_name='Alice', notes=[])
[('__delattr__',
  <function __create_fn__.<locals>.__delattr__ at 0x7ff6092baa60>),
 ('__eq__', <function __create_fn__.<locals>.__eq__ at 0x7ff5e92e3dc0>),
 ('__ge__', <function __create_fn__.<locals>.__ge__ at 0x7ff6092ba820>),
 ('__gt__', <function __create_fn__.<locals>.__gt__ at 0x7ff6092bae50>),
 ('__hash__', <function __create_fn__.<locals>.__hash__ at 0x7ff6092bab80>),
 ('__init__', <function __create_fn__.<locals>.__init__ at 0x7ff5e92e3280>),
 ('__le__', <function __create_fn__.<locals>.__le__ at 0x7ff6092ba3a0>),
 ('__lt__', <function __create_fn__.<locals>.__lt__ at 0x7ff6092ba790>),
 ('__repr__', <function __create_fn__.<locals>.__repr__ at 0x7ff5e92e34c0>),
 ('__setattr__',
  <function __create_fn__.<locals>.__setattr__ at 0x7ff6092bac10>)]


In [98]:
# don't boiler code youself from scratch like below, it has high coding and maintenance cost

class BasicAccount:
    # init 
    def __init__(self, account_id: int, account_name:  str):
        self.__account_id: int = account_id
        self.__account_name = account_name
        

    @property
    def account_id(self):
        return self.__account_id
    
    @property
    def account_name(self):
        return self.__account_name
    # print out values 
    def __repr__(self):
        return f"{self.__class__.__name__}, {self.__account_id=}, {self.__account_name=}"
    
    # for comparision 
    def __eq__(self, other):
        if other.__class__ is self.__class__:
            return (self.__account_id, self.__account_name) == (other.__account_id, other.__account_name)
        else:
            return NotImplemented
        
    # make this class nice to have it
    def __ne__(self, other):
        result = self.__eq__(other)
        if result is NotImplemented:
            return  NotImplemented
        else:
            return not result 
        
    # need hash if we want to put this into dictionary
    # to make class hashable, we should maek it immutable
    def __hash__(self):
        return hash((self.__class__, self.__account_id, self.__account_name))
    
    # to sort the class, we need le, gt, ge methods below
    def __le__(self, other):
        if other.__class__ is self.__class__:
            return (self.account_id, self.account_name) <= (other.account_id, other, self.account_name)
        else:
            return NotImplemented
        
    def __gt__(self, other):
        if other.__class__ is self.__class__:
            return (self.account_id, self.account_name) > (other.account_id, other, self.account_name)
        else:
            return NotImplemented

    def __ge__(self, other):
        if other.__class__ is self.__class__:
            return (self.account_id, self.account_name) >= (other.account_id, other, self.account_name)
        else:
            return NotImplemented

        

In [99]:
acct = BasicAccount(10, 'Bob')
print(acct)


BasicAccount, self.__account_id=10, self.__account_name='Bob'
