# Parallelized version

In [1]:
import math
import random
import numpy as np
import time
from functools import reduce

## Initializing spark

In [2]:
from pyspark import SparkConf, SparkContext

In [3]:
conf = SparkConf().setAppName("appName").setMaster("local[*]")
sc = SparkContext(conf=conf)

22/12/16 15:11:03 WARN Utils: Your hostname, Alexs-MacBook-Air.local resolves to a loopback address: 127.0.0.1; using 192.168.1.132 instead (on interface en0)
22/12/16 15:11:03 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


22/12/16 15:11:04 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


## Functions

### Auxiliar functions

In [4]:
def str_to_number_list(line):
    numbers = line.replace(" ","").split(",")
    numbers = [float(number) for number in numbers]
    
    # Separate X and y
    numbers = (numbers[1:], numbers[0])
    
    return numbers

In [5]:
def trainTestSplit(dataset):
    train = dataset.filter(lambda x: x[2]==1)
    train = train.map(lambda x: (x[0], x[1]))
    test = dataset.filter(lambda x: x[2]==0)
    test = test.map(lambda x: (x[0], x[1]))
    return (train, test)

In [6]:
def parallelAssign2cluster_slower(x):
    global c
    min_distance = np.Inf
    centroid = 0
    for it, cen in enumerate(c.value):
        d = 0
        for y,z in zip(x[0],cen):
            d += abs(y-z)
        if d <= min_distance:
            min_distance = d
            centroid = it
        
    return (centroid, x[0])

In [7]:
def parallelKMeans_slower(data, K, n_iter):
    
    data_len = sc.broadcast(data.count())
    
    global c
    c = data.takeSample(0, K)
    c = [datum[0] for datum in c]
    c = sc.broadcast(c)
    
    for iter in range(n_iter):
        data_asigned = data.map(parallelAssign2cluster_slower)
        c = sc.broadcast(data_asigned.reduceByKey(lambda x,y: [q+w for q,w in zip(x,y)]) \
            .map(lambda x: [a/data_len.value for a in x[1]]).collect())
    
    return c.value

### Mandatory functions

In [8]:
def parallelReadFile(filename):
    dataset = sc.textFile(filename)
    
    # Extract the header from the data
    header = dataset.first()
    dataset = dataset.filter(lambda row: row != header)
    
    dataset = dataset.map(str_to_number_list)
    return dataset.sample(False, 1)

In [9]:
def parallelAssign2cluster(x):
    global c
    min_distance = np.Inf
    centroid = 0
    for it, cen in enumerate(c.value):
        d = reduce(lambda x,y:abs(x)+abs(y), [y-z for y,z in zip(x[0],cen)])
        if d <= min_distance:
            min_distance = d
            centroid = it
        
    return (centroid, x[0])

In [10]:
def parallelKMeans(data, K, n_iter):
    
    data_len = sc.broadcast(data.count())
    
    global c
    c = data.takeSample(0, K)
    c = [datum[0] for datum in c]
    c = sc.broadcast(c)
    
    for iter in range(n_iter):
        data_asigned = data.map(parallelAssign2cluster)
        len_per_cluster = sc.broadcast(data_asigned.countByKey())
        c = sc.broadcast(data_asigned.reduceByKey(lambda x,y: [q+w for q,w in zip(x,y)]) \
            .map(lambda x: [a/len_per_cluster.value[x[0]] for a in x[1]]).collect())
    
    return c.value

## Testing

In [11]:
RDD_Xy = parallelReadFile("../data/tot_mnist_shuf.csv")
RDD_Xy.count()

                                                                                

70000

In [12]:
centroids = parallelKMeans(RDD_Xy, 3, 2)

                                                                                

In [13]:
elapsed_times = []

core_list = list(range(1,9,1))
cluster_list = [3,5,7,8,9,10,11]
functions = [parallelKMeans, parallelKMeans_slower]

for cores in core_list:
    for clusters in cluster_list:
        for Kmeans in functions:
            global sc
            sc.stop()
            conf = SparkConf().setAppName("appName").setMaster(f"local[{cores}]")
            sc = SparkContext(conf=conf)

            print(f"---------- Starting execution with {cores} cores and {clusters} clusters ----------")

            start = time.time()

            # ---------- Execution ----------

            RDD_Xy = parallelReadFile("../data/tot_mnist_shuf.csv")
            Kmeans(RDD_Xy, clusters, 3)

            # ---------- Execution ----------

            end = time.time()
            print(f"---------- Finished execution with {cores} cores and {clusters} clusters ----------")
            elapsed_time = end - start
            elapsed_times.append(
                {
                    "Number of Cores": cores,
                    "Number of Clusters": clusters,
                    "Function used": Kmeans,
                    "elapsed_time": elapsed_time
                }
            )
            print(f"Elapsed time for cores {cores} is {elapsed_time} seconds")

---------- Starting execution with 1 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 3 clusters ----------
Elapsed time for cores 1 is 279.1488049030304 seconds
---------- Starting execution with 1 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 3 clusters ----------
Elapsed time for cores 1 is 112.05107283592224 seconds
---------- Starting execution with 1 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 5 clusters ----------
Elapsed time for cores 1 is 429.0091631412506 seconds
---------- Starting execution with 1 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 5 clusters ----------
Elapsed time for cores 1 is 148.6441731452942 seconds
---------- Starting execution with 1 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 7 clusters ----------
Elapsed time for cores 1 is 569.6051461696625 seconds
---------- Starting execution with 1 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 7 clusters ----------
Elapsed time for cores 1 is 170.84679698944092 seconds
---------- Starting execution with 1 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 8 clusters ----------
Elapsed time for cores 1 is 615.4648699760437 seconds
---------- Starting execution with 1 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 8 clusters ----------
Elapsed time for cores 1 is 194.22378706932068 seconds
---------- Starting execution with 1 cores and 9 clusters ----------




22/12/16 16:12:50 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 750341 ms exceeds timeout 120000 ms
22/12/16 16:12:51 WARN SparkContext: Killing executors is not supported by current scheduler.


                                                                                

---------- Finished execution with 1 cores and 9 clusters ----------
Elapsed time for cores 1 is 1392.9751591682434 seconds
---------- Starting execution with 1 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 9 clusters ----------
Elapsed time for cores 1 is 207.80528092384338 seconds
---------- Starting execution with 1 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 10 clusters ----------
Elapsed time for cores 1 is 771.407557964325 seconds
---------- Starting execution with 1 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 10 clusters ----------
Elapsed time for cores 1 is 229.60308599472046 seconds
---------- Starting execution with 1 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 11 clusters ----------
Elapsed time for cores 1 is 845.0362830162048 seconds
---------- Starting execution with 1 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 1 cores and 11 clusters ----------
Elapsed time for cores 1 is 236.55388808250427 seconds
---------- Starting execution with 2 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 3 clusters ----------
Elapsed time for cores 2 is 173.09139013290405 seconds
---------- Starting execution with 2 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 3 clusters ----------
Elapsed time for cores 2 is 69.3160457611084 seconds
---------- Starting execution with 2 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 5 clusters ----------
Elapsed time for cores 2 is 256.0083341598511 seconds
---------- Starting execution with 2 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 5 clusters ----------
Elapsed time for cores 2 is 85.20837712287903 seconds
---------- Starting execution with 2 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 7 clusters ----------
Elapsed time for cores 2 is 309.87539291381836 seconds
---------- Starting execution with 2 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 7 clusters ----------
Elapsed time for cores 2 is 107.90942692756653 seconds
---------- Starting execution with 2 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 8 clusters ----------
Elapsed time for cores 2 is 355.1063039302826 seconds
---------- Starting execution with 2 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 8 clusters ----------
Elapsed time for cores 2 is 122.28446292877197 seconds
---------- Starting execution with 2 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 9 clusters ----------
Elapsed time for cores 2 is 3691.2535531520844 seconds
---------- Starting execution with 2 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 9 clusters ----------
Elapsed time for cores 2 is 118.28654384613037 seconds
---------- Starting execution with 2 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 10 clusters ----------
Elapsed time for cores 2 is 437.4584047794342 seconds
---------- Starting execution with 2 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 10 clusters ----------
Elapsed time for cores 2 is 136.08910012245178 seconds
---------- Starting execution with 2 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 11 clusters ----------
Elapsed time for cores 2 is 444.30815267562866 seconds
---------- Starting execution with 2 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 2 cores and 11 clusters ----------
Elapsed time for cores 2 is 147.78764605522156 seconds
---------- Starting execution with 3 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 3 clusters ----------
Elapsed time for cores 3 is 152.3842990398407 seconds
---------- Starting execution with 3 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 3 clusters ----------
Elapsed time for cores 3 is 63.698806047439575 seconds
---------- Starting execution with 3 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 5 clusters ----------
Elapsed time for cores 3 is 236.87196683883667 seconds
---------- Starting execution with 3 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 5 clusters ----------
Elapsed time for cores 3 is 84.26367378234863 seconds
---------- Starting execution with 3 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 7 clusters ----------
Elapsed time for cores 3 is 290.1574468612671 seconds
---------- Starting execution with 3 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 7 clusters ----------
Elapsed time for cores 3 is 98.41826105117798 seconds
---------- Starting execution with 3 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 8 clusters ----------
Elapsed time for cores 3 is 353.71015191078186 seconds
---------- Starting execution with 3 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 8 clusters ----------
Elapsed time for cores 3 is 105.4617669582367 seconds
---------- Starting execution with 3 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 9 clusters ----------
Elapsed time for cores 3 is 382.1815969944 seconds
---------- Starting execution with 3 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 9 clusters ----------
Elapsed time for cores 3 is 114.29436707496643 seconds
---------- Starting execution with 3 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 10 clusters ----------
Elapsed time for cores 3 is 390.7089629173279 seconds
---------- Starting execution with 3 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 10 clusters ----------
Elapsed time for cores 3 is 122.08105707168579 seconds
---------- Starting execution with 3 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 11 clusters ----------
Elapsed time for cores 3 is 455.59767174720764 seconds
---------- Starting execution with 3 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 3 cores and 11 clusters ----------
Elapsed time for cores 3 is 131.15866017341614 seconds
---------- Starting execution with 4 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 3 clusters ----------
Elapsed time for cores 4 is 101.42005825042725 seconds
---------- Starting execution with 4 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 3 clusters ----------
Elapsed time for cores 4 is 44.050820112228394 seconds
---------- Starting execution with 4 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 5 clusters ----------
Elapsed time for cores 4 is 159.6912260055542 seconds
---------- Starting execution with 4 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 5 clusters ----------
Elapsed time for cores 4 is 56.523762941360474 seconds
---------- Starting execution with 4 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 7 clusters ----------
Elapsed time for cores 4 is 225.11983633041382 seconds
---------- Starting execution with 4 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 7 clusters ----------
Elapsed time for cores 4 is 74.71573114395142 seconds
---------- Starting execution with 4 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 8 clusters ----------
Elapsed time for cores 4 is 271.8269989490509 seconds
---------- Starting execution with 4 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 8 clusters ----------
Elapsed time for cores 4 is 85.71294689178467 seconds
---------- Starting execution with 4 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 9 clusters ----------
Elapsed time for cores 4 is 296.2749547958374 seconds
---------- Starting execution with 4 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 9 clusters ----------
Elapsed time for cores 4 is 94.88893103599548 seconds
---------- Starting execution with 4 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 10 clusters ----------
Elapsed time for cores 4 is 325.954204082489 seconds
---------- Starting execution with 4 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 10 clusters ----------
Elapsed time for cores 4 is 95.01070094108582 seconds
---------- Starting execution with 4 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 11 clusters ----------
Elapsed time for cores 4 is 384.4370319843292 seconds
---------- Starting execution with 4 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 4 cores and 11 clusters ----------
Elapsed time for cores 4 is 128.55190801620483 seconds
---------- Starting execution with 5 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 3 clusters ----------
Elapsed time for cores 5 is 128.96499180793762 seconds
---------- Starting execution with 5 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 3 clusters ----------
Elapsed time for cores 5 is 52.5340211391449 seconds
---------- Starting execution with 5 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 5 clusters ----------
Elapsed time for cores 5 is 183.7079770565033 seconds
---------- Starting execution with 5 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 5 clusters ----------
Elapsed time for cores 5 is 65.25269293785095 seconds
---------- Starting execution with 5 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 7 clusters ----------
Elapsed time for cores 5 is 241.96842694282532 seconds
---------- Starting execution with 5 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 7 clusters ----------
Elapsed time for cores 5 is 81.39327716827393 seconds
---------- Starting execution with 5 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 8 clusters ----------
Elapsed time for cores 5 is 277.8914442062378 seconds
---------- Starting execution with 5 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 8 clusters ----------
Elapsed time for cores 5 is 90.06709456443787 seconds
---------- Starting execution with 5 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 9 clusters ----------
Elapsed time for cores 5 is 286.05998396873474 seconds
---------- Starting execution with 5 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 9 clusters ----------
Elapsed time for cores 5 is 97.21924185752869 seconds
---------- Starting execution with 5 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 10 clusters ----------
Elapsed time for cores 5 is 315.15162992477417 seconds
---------- Starting execution with 5 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 10 clusters ----------
Elapsed time for cores 5 is 93.41108393669128 seconds
---------- Starting execution with 5 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 11 clusters ----------
Elapsed time for cores 5 is 360.19727897644043 seconds
---------- Starting execution with 5 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 5 cores and 11 clusters ----------
Elapsed time for cores 5 is 99.38373398780823 seconds
---------- Starting execution with 6 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 3 clusters ----------
Elapsed time for cores 6 is 118.82088685035706 seconds
---------- Starting execution with 6 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 3 clusters ----------
Elapsed time for cores 6 is 52.29796290397644 seconds
---------- Starting execution with 6 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 5 clusters ----------
Elapsed time for cores 6 is 168.37629008293152 seconds
---------- Starting execution with 6 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 5 clusters ----------
Elapsed time for cores 6 is 59.401636838912964 seconds
---------- Starting execution with 6 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 7 clusters ----------
Elapsed time for cores 6 is 199.05544209480286 seconds
---------- Starting execution with 6 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 7 clusters ----------
Elapsed time for cores 6 is 76.7110869884491 seconds
---------- Starting execution with 6 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 8 clusters ----------
Elapsed time for cores 6 is 231.69025874137878 seconds
---------- Starting execution with 6 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 8 clusters ----------
Elapsed time for cores 6 is 77.91008305549622 seconds
---------- Starting execution with 6 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 9 clusters ----------
Elapsed time for cores 6 is 278.30171489715576 seconds
---------- Starting execution with 6 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 9 clusters ----------
Elapsed time for cores 6 is 82.29273772239685 seconds
---------- Starting execution with 6 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 10 clusters ----------
Elapsed time for cores 6 is 308.61515188217163 seconds
---------- Starting execution with 6 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 10 clusters ----------
Elapsed time for cores 6 is 96.78138303756714 seconds
---------- Starting execution with 6 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 11 clusters ----------
Elapsed time for cores 6 is 312.47967314720154 seconds
---------- Starting execution with 6 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 6 cores and 11 clusters ----------
Elapsed time for cores 6 is 104.27900314331055 seconds
---------- Starting execution with 7 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 3 clusters ----------
Elapsed time for cores 7 is 107.90338611602783 seconds
---------- Starting execution with 7 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 3 clusters ----------
Elapsed time for cores 7 is 43.31272602081299 seconds
---------- Starting execution with 7 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 5 clusters ----------
Elapsed time for cores 7 is 167.49843788146973 seconds
---------- Starting execution with 7 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 5 clusters ----------
Elapsed time for cores 7 is 75.41898012161255 seconds
---------- Starting execution with 7 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 7 clusters ----------
Elapsed time for cores 7 is 220.31863498687744 seconds
---------- Starting execution with 7 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 7 clusters ----------
Elapsed time for cores 7 is 75.25219106674194 seconds
---------- Starting execution with 7 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 8 clusters ----------
Elapsed time for cores 7 is 266.49133491516113 seconds
---------- Starting execution with 7 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 8 clusters ----------
Elapsed time for cores 7 is 84.30078673362732 seconds
---------- Starting execution with 7 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 9 clusters ----------
Elapsed time for cores 7 is 285.64027309417725 seconds
---------- Starting execution with 7 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 9 clusters ----------
Elapsed time for cores 7 is 93.1087818145752 seconds
---------- Starting execution with 7 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 10 clusters ----------
Elapsed time for cores 7 is 333.00839591026306 seconds
---------- Starting execution with 7 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 10 clusters ----------
Elapsed time for cores 7 is 105.56222891807556 seconds
---------- Starting execution with 7 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 11 clusters ----------
Elapsed time for cores 7 is 2362.358920097351 seconds
---------- Starting execution with 7 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 7 cores and 11 clusters ----------
Elapsed time for cores 7 is 92.79925417900085 seconds
---------- Starting execution with 8 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 3 clusters ----------
Elapsed time for cores 8 is 102.98227405548096 seconds
---------- Starting execution with 8 cores and 3 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 3 clusters ----------
Elapsed time for cores 8 is 43.920774936676025 seconds
---------- Starting execution with 8 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 5 clusters ----------
Elapsed time for cores 8 is 168.0766899585724 seconds
---------- Starting execution with 8 cores and 5 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 5 clusters ----------
Elapsed time for cores 8 is 58.64411807060242 seconds
---------- Starting execution with 8 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 7 clusters ----------
Elapsed time for cores 8 is 220.87429308891296 seconds
---------- Starting execution with 8 cores and 7 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 7 clusters ----------
Elapsed time for cores 8 is 79.86669516563416 seconds
---------- Starting execution with 8 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 8 clusters ----------
Elapsed time for cores 8 is 4038.9029018878937 seconds
---------- Starting execution with 8 cores and 8 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 8 clusters ----------
Elapsed time for cores 8 is 1914.1870930194855 seconds
---------- Starting execution with 8 cores and 9 clusters ----------


[Stage 11:>                                                         (0 + 4) / 4]

22/12/17 01:39:18 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 905820 ms exceeds timeout 120000 ms
22/12/17 01:39:18 WARN SparkContext: Killing executors is not supported by current scheduler.


                                                                                

---------- Finished execution with 8 cores and 9 clusters ----------
Elapsed time for cores 8 is 4048.4058270454407 seconds
---------- Starting execution with 8 cores and 9 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 9 clusters ----------
Elapsed time for cores 8 is 986.861396074295 seconds
---------- Starting execution with 8 cores and 10 clusters ----------


[Stage 5:>                                                          (0 + 4) / 4]

22/12/17 02:27:25 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 906913 ms exceeds timeout 120000 ms
22/12/17 02:27:25 WARN SparkContext: Killing executors is not supported by current scheduler.


                                                                                

---------- Finished execution with 8 cores and 10 clusters ----------
Elapsed time for cores 8 is 4965.233966827393 seconds
---------- Starting execution with 8 cores and 10 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 10 clusters ----------
Elapsed time for cores 8 is 1040.8048272132874 seconds
---------- Starting execution with 8 cores and 11 clusters ----------


                                                                                

---------- Finished execution with 8 cores and 11 clusters ----------
Elapsed time for cores 8 is 4871.529038906097 seconds
---------- Starting execution with 8 cores and 11 clusters ----------




---------- Finished execution with 8 cores and 11 clusters ----------
Elapsed time for cores 8 is 1996.0898070335388 seconds


                                                                                

In [14]:
elapsed_times

[{'Number of Cores': 1,
  'Number of Clusters': 3,
  'Function used': <function __main__.parallelKMeans(data, K, n_iter)>,
  'elapsed_time': 279.1488049030304},
 {'Number of Cores': 1,
  'Number of Clusters': 3,
  'Function used': <function __main__.parallelKMeans_slower(data, K, n_iter)>,
  'elapsed_time': 112.05107283592224},
 {'Number of Cores': 1,
  'Number of Clusters': 5,
  'Function used': <function __main__.parallelKMeans(data, K, n_iter)>,
  'elapsed_time': 429.0091631412506},
 {'Number of Cores': 1,
  'Number of Clusters': 5,
  'Function used': <function __main__.parallelKMeans_slower(data, K, n_iter)>,
  'elapsed_time': 148.6441731452942},
 {'Number of Cores': 1,
  'Number of Clusters': 7,
  'Function used': <function __main__.parallelKMeans(data, K, n_iter)>,
  'elapsed_time': 569.6051461696625},
 {'Number of Cores': 1,
  'Number of Clusters': 7,
  'Function used': <function __main__.parallelKMeans_slower(data, K, n_iter)>,
  'elapsed_time': 170.84679698944092},
 {'Number o