In [23]:
## Sync Execution

In [24]:
import time 

def basic_func(x): 
   time.sleep(2)
   if x == 0: 
     return 'zero'
   elif x%2 == 0: 
     return 'even' 
   else: 
     return 'odd'

starttime = time.time() 

final_results = ''

for i in range(0,10): 
   y = i*i 
   results = basic_func(y)
   final_results += str(i) + ' squared results in a/an ' + results + ' number \n'

print(final_results)
print('That took {} seconds'.format(time.time() - starttime)) 


0 squared results in a/an zero number 
1 squared results in a/an odd number 
2 squared results in a/an even number 
3 squared results in a/an odd number 
4 squared results in a/an even number 
5 squared results in a/an odd number 
6 squared results in a/an even number 
7 squared results in a/an odd number 
8 squared results in a/an even number 
9 squared results in a/an odd number 

That took 20.020225048065186 seconds


In [25]:
## Multiprocessing Execution

In [26]:
import time
import multiprocessing 

def basic_func(x):
    time.sleep(2)
    if x == 0:
        return 'zero'
    elif x%2 == 0:
        return 'even'
    else:
        return 'odd'

def multiprocessing_func(x):
    y = x*x
    print('{} squared results in a/an {} number'.format(x, basic_func(y)))
                                                
if __name__ == '__main__':
    starttime = time.time()
    processes = []
    for i in range(0,10):
        p = multiprocessing.Process(target=multiprocessing_func, args=(i,))
        processes.append(p)
        p.start()
                                                                                                              
    for process in processes:
        process.join()
                                                                                                 
    print('That took {} seconds'.format(time.time() - starttime))


0 squared results in a/an zero number
1 squared results in a/an odd number
2 squared results in a/an even number
3 squared results in a/an odd number
4 squared results in a/an even number
5 squared results in a/an odd number
6 squared results in a/an even number
7 squared results in a/an odd number
8 squared results in a/an even number
9 squared results in a/an odd number
That took 2.1266958713531494 seconds


In [27]:
## Dask Execution

In [28]:
import time 
from dask import delayed

def basic_func(x): 
    time.sleep(2) 
    if x == 0: 
        return 'zero' 
    elif x%2 == 0: 
        return 'even' 
    else: 
        return 'odd' 

starttime = time.time() 

delayed_basic_func = delayed(basic_func)

final_results = ''

for i in range(0,10): 
    y = i*i 
    delayed_results = delayed_basic_func(y) # or delayed(basic_func)(y)
    final_results += str(i) + ' squared results in a/an ' + delayed_results + ' number \n'

    
print(final_results.compute())
print('That took {} seconds'.format(time.time() - starttime)) 



0 squared results in a/an zero number 
1 squared results in a/an odd number 
2 squared results in a/an even number 
3 squared results in a/an odd number 
4 squared results in a/an even number 
5 squared results in a/an odd number 
6 squared results in a/an even number 
7 squared results in a/an odd number 
8 squared results in a/an even number 
9 squared results in a/an odd number 

That took 2.017840623855591 seconds


In [None]:
## Nympy Arrays

In [29]:
%%time
import numpy as np 
x = np.random.normal(10, 0.1, size=(20000, 20000))
y = x.mean(axis=0) 
print(y) 


[ 9.99960599 10.0004581   9.99946486 ...  9.99990647  9.99971249
  9.99900106]
CPU times: user 38.7 s, sys: 6.53 s, total: 45.3 s
Wall time: 39.5 s


In [None]:
## Dask Arrays

In [4]:
%%time 
import dask.array as da 
# Use Dask Array to cut into 1000x1000 sized chunks, 8MB/chunck, 20X20 chunks
x = da.random.normal(10, 0.1, size=(20000, 20000), chunks=(1000, 1000))
# or create dash array from np array using
# x = da.from_array(nparray, chunks=(1000,))
y = x.mean(axis=0) # Perform NumPy-style operations 
print(y.compute()) 


[ 9.99952498  9.99901692 10.00107679 ... 10.00020655  9.99928667
  9.99890999]
CPU times: user 44.4 s, sys: 44.2 s, total: 1min 28s
Wall time: 2.32 s


In [None]:
## Pandas Dataframes

In [None]:
import os
from glob import glob
import pandas as pd

filenames = sorted(glob(os.path.join('data', 'nycflights', '*.csv')))
sums = []
counts = []
for fn in filenames:
    df = pd.read_csv(fn)
    by_origin = df.groupby('Origin')
    total = by_origin.DepDelay.sum()
    count = by_origin.DepDelay.count()
    sums.append(total)
    counts.append(count)

# Combine intermediates to get total mean-delay-per-origin
total_delays = sum(sums)
n_flights = sum(counts)
mean = total_delays / n_flights
print(mean)

In [None]:
## Dask Dataframes

In [None]:
import dask.dataframe as dd
df = dd.read_csv("data/nycflights/*.csv")
mean = df.groupby("Origin").DepDelay.mean()
print(mean.compute())
Wall time: 2.58 s