In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import time
import numpy as np
import parsl
from parsl import *
from ipyparallel import Client
import aws


executor = parsl.executors.ipp.IPyParallelExecutor()
dfk = DataFlowKernel(executor)
provider = aws.EC2Provider('providerconf.json')

In [None]:
import random

@App('python', dfk)
def mc_pi(n):
    import random
    count = 0
    for i in range(n):
        x = random.random()
        y = random.random()
        if (x**2 + y**2) <= 1:
            count += 1
    return count/n

def mc_pi_serial(n):
    import random
    count = 0
    for i in range(n):
        x = random.random()
        y = random.random()
        if (x**2 + y**2) <= 1:
            count += 1
    return count/n

In [None]:

%timeit est_pi = mc_pi_serial(1000000)


In [None]:
clusterOptions = [1,2,4,8,16]
allres = []
for q, size in enumerate(clusterOptions):
    rates = []
    increase = (size - clusterOptions[q-1]) if q > 0 else 1
    provider.scale_out(increase)
    print(increase)
    time.sleep(180)
    for i in range(10):
        time.sleep(0.5)
        res = []
        begin = time.time()
        results = [mc_pi(n) for n in [250000]*32]
        pi = sum([result.result() for result in results])/8
        end = time.time()
        total = (end-begin)
        print(pi,total)
        res.append(total)
    allres.append(np.mean(res))
print(allres)      

In [None]:
@App('python', dfk)
def parsl_test_app(sleeptime, in_data, out_data_size=0):
    import time
    import numpy as np
    import os
    time.sleep(sleeptime)
    return np.random.rand(int(out_data_size))

def test_latency(n):
    fus = []
    tic = time.time()
    echo = lambda x: x
    tic = time.time()
    for i in range(n):
        fus.append(parsl_test_app(0,[5],55))
    toc = time.time()
    fus[-1].result()
    tac = time.time()
    rt = tac-tic
    sent = toc-tic
    return sent, rt

In [None]:
#Task throughput as a function of nodes
# 1, 2, 4, 8, 16 t2.small. Do c4.large
#provider.scale_in(10)
allRates = []
clusterOptions = [1,2,4,8,16]
for i, size in enumerate(clusterOptions):
    rates = []
    increase = (size - clusterOptions[i-1]) if i > 0 else 1
    print(increase)
    provider.scale_out(increase)
    time.sleep(180)
    for n in [300,300,300,300,300,300,300]:
        # short rest between tests
        time.sleep(.5)
        s, rt = test_latency(n)
        rates.append(n/s)
        print( "%4i %6.1f %6.1f" % (n,n/s, n/rt))
    allRates.append(np.mean(rates))

In [None]:
allres = [0.96068835258483887,
 0.47994661331176758,
 0.26649975776672363,
 0.2669832706451416,
 0.26372933387756348]
allRates = [1594.6386265752353, 1562.5417109307268, 1591.7190755992528, 1584.7605780185415, 1573.3746584762646]
fig, ax1 = plt.subplots()
ax1.plot(clusterOptions, allRates, color='blue')
ax1.set_ylabel("Tasks Submitted/sec", color="blue")
ax1.set_xlabel("Nodes", color="blue")
ax1.set_yticks(np.arange(500.,2000.,100.))
ax1.set_ticklabels(np.arange(500.,2000.,100.))
plt.plot()


In [None]:
provider.scale_out(8)

In [None]:
import numpy as np
import multiprocessing as mp

np.random.seed(123)

# Generate random 2D-patterns
mu_vec = np.array([0,0])
cov_mat = np.array([[1,0],[0,1]])
x_2Dgauss = np.random.multivariate_normal(mu_vec, cov_mat, 10000)

from scipy.stats import multivariate_normal
var = multivariate_normal(mean=[0,0], cov=[[1,0],[0,1]])
print('actual probability density:', var.pdf([0,0]))

@App('python', dfk)
def parzen_estimation(x_samples, point_x, h):
    import numpy as np
    k_n = 0
    for row in x_samples:
        x_i = (point_x - row[:,np.newaxis]) / (h)
        for row in x_i:
            if np.abs(row) > (1/2):
                break
        else: # "completion-else"*
            k_n += 1
    return (h, (k_n / len(x_samples)) / (h**point_x.shape[1]))

def parzen_estimation_s(x_samples, point_x, h):
    k_n = 0
    for row in x_samples:
        x_i = (point_x - row[:,np.newaxis]) / (h)
        for row in x_i:
            if np.abs(row) > (1/2):
                break
        else: # "completion-else"*
            k_n += 1
    return (h, (k_n / len(x_samples)) / (h**point_x.shape[1]))

def serial(samples, x, widths):
    return [parzen_estimation_s(samples, x, w) for w in widths]

def multiprocess(samples, x, widths):
    results = [parzen_estimation(samples, x, w) for w in widths]
    results = [p.result() for p in results]
    results.sort()
    return results

widths = np.arange(0.1, 1.3, 0.1)
point_x = np.array([[0],[0]])
results = []

results = multiprocess(x_2Dgauss, point_x, widths)

for r in results:
    print('h = %s, p(x) = %s' %(r[0], r[1]))


widths = np.linspace(1.0, 1.2, 100)
import timeit

mu_vec = np.array([0,0])
cov_mat = np.array([[1,0],[0,1]])
n = 10000

x_2Dgauss = np.random.multivariate_normal(mu_vec, cov_mat, n)

benchmarks = []

benchmarks.append(timeit.Timer('serial(x_2Dgauss, point_x, widths)',
            'from __main__ import serial, x_2Dgauss, point_x, widths').timeit(number=1))

benchmarks.append(timeit.Timer('multiprocess( x_2Dgauss, point_x, widths)',
            'from __main__ import multiprocess, x_2Dgauss, point_x, widths').timeit(number=1))

benchmarks.append(timeit.Timer('multiprocess( x_2Dgauss, point_x, widths)',
            'from __main__ import multiprocess, x_2Dgauss, point_x, widths').timeit(number=1))

benchmarks.append(timeit.Timer('multiprocess( x_2Dgauss, point_x, widths)',
            'from __main__ import multiprocess, x_2Dgauss, point_x, widths').timeit(number=1))

benchmarks.append(timeit.Timer('multiprocess( x_2Dgauss, point_x, widths)',
            'from __main__ import multiprocess, x_2Dgauss, point_x, widths').timeit(number=1))

import platform

def print_sysinfo():

    print('\nPython version  :', platform.python_version())
    print('compiler        :', platform.python_compiler())

    print('\nsystem     :', platform.system())
    print('release    :', platform.release())
    print('machine    :', platform.machine())
    print('processor  :', platform.processor())
    print('CPU count  :', mp.cpu_count())
    print('interpreter:', platform.architecture()[0])
    print('\n\n')
from matplotlib import pyplot as plt
import numpy as np

def plot_results():
    bar_labels = ['serial', '2', '3', '4', '6']

    fig = plt.figure(figsize=(10,8))

    # plot bars
    y_pos = np.arange(len(benchmarks))
    plt.yticks(y_pos, bar_labels, fontsize=16)
    bars = plt.barh(y_pos, benchmarks,
             align='center', alpha=0.4, color='g')

    # annotation and labels

    for ba,be in zip(bars, benchmarks):
        plt.text(ba.get_width() + 2, ba.get_y() + ba.get_height()/2,
                '{0:.2%}'.format(benchmarks[0]/be),
                ha='center', va='bottom', fontsize=12)

    plt.xlabel('time in seconds for n=%s' %n, fontsize=14)
    plt.ylabel('number of processes', fontsize=14)
    t = plt.title('Serial vs. Multiprocessing via Parzen-window estimation', fontsize=18)
    plt.ylim([-1,len(benchmarks)+0.5])
    plt.xlim([0,max(benchmarks)*1.1])
    plt.vlines(benchmarks[0], -1, len(benchmarks)+0.5, linestyles='dashed')
    plt.grid()

    plt.show()


In [None]:
plot_results()