# Transform
## Profile 
Objective of this notebook is to determine the optimal number of partitions vis-a-vis dataset size, number of cores, and spark memory requirements

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from deepcvr.data.transform import CommonFeatureTransformer, CoreFeatureTransformer

In [2]:
n_partitions = np.linspace(18,3000,20)
cores = 18
task_ids = np.arange(1,len(n_partitions)+1)
task_name = "CommonFeatureTransform"
filepath = "tests/data/preprocessed/train/cvr_common_features_train.csv"


In [3]:
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,common_features_index,num_features,features_list
0,023a8f5b7b8a3348,1052,110_1414381141.09861110_1418460592.07944...
1,030dab7c09c9213d,748,150_1438980633.37304150_1439196042.03693...
2,05b3fd32a3e72c87,852,127_1434944021.09861127_1438180851.09861...
3,09ed88afc2780752,459,150_1439081542.19722150_1438815952.99987...
4,0b7a30a3cacee086,459,150_1439261452.6390612134386581.0122343...


In [4]:
def profile(task_id, task_name, cores, n_partitions):
    start = datetime.now()
    params = {'cores': cores, 'n_partitions': n_partitions}
    x4m = CommonFeatureTransformer(task_id, task_name, params)
    x4m.execute(df)
    end = datetime.now()
    duration = end - start
    duration = duration.total_seconds()
    d = {'task_id': task_id, 'n_partitions': n_partitions, 'partition_size': df.shape[0]/ n_partitions, 'elapsed': duration}
    profile = pd.DataFrame(data=d, index=[0])
    return profile

In [5]:
profiles = pd.DataFrame()
for i in range(20):
    p  = profile(task_id=i, task_name=task_name, cores=cores, n_partitions=n_partitions[i])
    profiles = pd.concat([profiles,p], axis=0)
profiles


INFO:operator:CommonFeatureTransformer.execute called with dict_values([0, 'CommonFeatureTransform', {'cores': 18, 'n_partitions': 18.0}, 18])
DEBUG:deepcvr.data.transform:	Create Spark context
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
DEBUG:py4j.java_gateway:GatewayClient.address is deprecated and will be removed in version 1.0. Use GatewayParameters instead.
DEBUG:py4j.clientserver:Command to send: A
dbd00688f8a55f42d0b153c587d720dc4decde06405e5a35ce2b047820ca0a95

DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserver:Command to send: j
i
rj
org.apache.spark.SparkConf
e

DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserver:Command to send: j
i
rj
org.apache.spark.api.java.*
e

DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserver:Command to send: j
i
rj
org.apache.spark.api.python.*
e

DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserve

	Task 0:	CommonFeatureTransform complete.	Duration: 22.59 seconds.


DEBUG:py4j.clientserver:Answer received: !yro91
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonRDD
ro91
ro90
bTrue
bFalse
e

DEBUG:py4j.clientserver:Answer received: !yro92
DEBUG:py4j.clientserver:Command to send: c
o92
asJavaRDD
e

DEBUG:py4j.clientserver:Answer received: !yro93
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
pythonToJava
ro93
bTrue
e

DEBUG:py4j.clientserver:Answer received: !yro94
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
toJavaArray
ro94
e

DEBUG:py4j.clientserver:Answer received: !yro95
DEBUG:py4j.clientserver:Command to send: c
o95
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro96
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro96
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nulla

	Task 1:	CommonFeatureTransform complete.	Duration: 8.2 seconds.


DEBUG:py4j.clientserver:Answer received: !yro146
DEBUG:py4j.clientserver:Command to send: c
o146
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro147
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro147
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro148
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

	Task 2:	CommonFeatureTransform complete.	Duration: 7.08 seconds.


DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro198
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro199
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro200
DEBUG:py4j.clientserver:Command to send: r
u
PythonUtils
rj
e

DEBUG:py4j.clientserver:Answer received

	Task 3:	CommonFeatureTransform complete.	Duration: 7.45 seconds.


DEBUG:py4j.clientserver:Answer received: !yro249
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro249
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro250
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro251
DEBUG:py4j.clientserver:Command to send: r
u
PythonUt

	Task 4:	CommonFeatureTransform complete.	Duration: 7.17 seconds.


DEBUG:py4j.clientserver:Answer received: !yro300
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro300
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro301
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro302
DEBUG:py4j.clientserver:Command to send: r
u
PythonUt

	Task 5:	CommonFeatureTransform complete.	Duration: 6.83 seconds.


DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro351
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro352
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro353
DEBUG:py4j.clientserver:Command to send: r
u
PythonUtils
rj
e

DEBUG:py4j.clientserver:Answer received

	Task 6:	CommonFeatureTransform complete.	Duration: 7.39 seconds.


DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
toJavaArray
ro400
e

DEBUG:py4j.clientserver:Answer received: !yro401
DEBUG:py4j.clientserver:Command to send: c
o401
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro402
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro402
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro403
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer re

	Task 7:	CommonFeatureTransform complete.	Duration: 7.05 seconds.


DEBUG:py4j.clientserver:Command to send: i
java.util.HashMap
e

DEBUG:py4j.clientserver:Answer received: !yao444
DEBUG:py4j.clientserver:Command to send: c
o444
put
sPYTHONHASHSEED
s0
e

DEBUG:py4j.clientserver:Answer received: !yn
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo445
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo446
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonFunction
jgAWV6gMAAAAAAAAojB9weXNwYXJrLmNsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsCSwBLAEsCSwNLE0MOdAB0AYgAgwF8AYMCUwCUToWUjANtYXCUjBVmYWlsX29uX3N0b3BpdGVyYXRpb26UhpSMAV+UjAhpdGVyYXRvcpSGlIxML2hvbWUvam9obi9hbmFjb25kYTMvZW52cy9kZWVwY3ZyL2xpYi9weXRob24zLjgvc2l0ZS1wYWNrYWdlcy9weXNwYXJrL3JkZC5weZSMBGZ1bmOUTYABQwIAAZSMAWaUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjAdweXNwYXJrlIwIX19uYW1lX1+UjAtweXNwYXJrLnJkZJS

	Task 8:	CommonFeatureTransform complete.	Duration: 6.99 seconds.


DEBUG:py4j.clientserver:Answer received: !ylo496
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo497
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonFunction
jgAWV6gMAAAAAAAAojB9weXNwYXJrLmNsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsCSwBLAEsCSwNLE0MOdAB0AYgAgwF8AYMCUwCUToWUjANtYXCUjBVmYWlsX29uX3N0b3BpdGVyYXRpb26UhpSMAV+UjAhpdGVyYXRvcpSGlIxML2hvbWUvam9obi9hbmFjb25kYTMvZW52cy9kZWVwY3ZyL2xpYi9weXRob24zLjgvc2l0ZS1wYWNrYWdlcy9weXNwYXJrL3JkZC5weZSMBGZ1bmOUTYABQwIAAZSMAWaUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjAdweXNwYXJrlIwIX19uYW1lX1+UjAtweXNwYXJrLnJkZJSMCF9fZmlsZV9flGgRdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMJHB5c3BhcmsuY2xvdWRwaWNrbGUuY2xvdWRwaWNrbGVfZmFzdJSMEl9mdW5jdGlvbl9zZXRzdGF0ZZSTlGgjfZR9lChoG2gSjAxfX3F1YWxuYW1lX1+UjBVSREQubWFwLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZ

	Task 9:	CommonFeatureTransform complete.	Duration: 7.09 seconds.


DEBUG:py4j.clientserver:Answer received: !ylo548
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonFunction
jgAWV6gMAAAAAAAAojB9weXNwYXJrLmNsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsCSwBLAEsCSwNLE0MOdAB0AYgAgwF8AYMCUwCUToWUjANtYXCUjBVmYWlsX29uX3N0b3BpdGVyYXRpb26UhpSMAV+UjAhpdGVyYXRvcpSGlIxML2hvbWUvam9obi9hbmFjb25kYTMvZW52cy9kZWVwY3ZyL2xpYi9weXRob24zLjgvc2l0ZS1wYWNrYWdlcy9weXNwYXJrL3JkZC5weZSMBGZ1bmOUTYABQwIAAZSMAWaUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjAdweXNwYXJrlIwIX19uYW1lX1+UjAtweXNwYXJrLnJkZJSMCF9fZmlsZV9flGgRdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMJHB5c3BhcmsuY2xvdWRwaWNrbGUuY2xvdWRwaWNrbGVfZmFzdJSMEl9mdW5jdGlvbl9zZXRzdGF0ZZSTlGgjfZR9lChoG2gSjAxfX3F1YWxuYW1lX1+UjBVSREQubWFwLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgcjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOUaAUoaAgoSwFLAEsASwFLAUtTQwR8AFMAlGgKKYwBeJSFlGgRjAg8

	Task 10:	CommonFeatureTransform complete.	Duration: 7.0 seconds.


DEBUG:py4j.clientserver:Answer received: !yro607
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro608
DEBUG:py4j.clientserver:Command to send: r
u
PythonUtils
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.api.python.PythonUtils
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.api.python.PythonUtils
toSeq
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo609
DEBUG:py4j.clientserver:Command to send: c
o609
add
ro608
e

DEBUG:py4j.clientserver:Answer rec

	Task 11:	CommonFeatureTransform complete.	Duration: 7.69 seconds.


DEBUG:py4j.clientserver:Answer received: !yro651
DEBUG:py4j.clientserver:Command to send: r
u
PythonRDD
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.api.python.PythonRDD
DEBUG:py4j.clientserver:Command to send: c
o646
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro652
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonRDD
ro652
ro651
bTrue
bFalse
e

DEBUG:py4j.clientserver:Answer received: !yro653
DEBUG:py4j.clientserver:Command to send: c
o653
asJavaRDD
e

DEBUG:py4j.clientserver:Answer received: !yro654
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
pythonToJava
ro654
bTrue
e

DEBUG:py4j.clientserver:Answer received: !yro655
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
toJavaArray
ro655
e

DEBUG:py4j.clientserver:Answer received: !yro656
DEBUG:py4j.clientserver:Command to send: c
o656
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro657
DEBUG:py4j.clientse

	Task 12:	CommonFeatureTransform complete.	Duration: 7.22 seconds.


DEBUG:py4j.clientserver:Command to send: c
o707
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro708
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro708
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro709
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro71

	Task 13:	CommonFeatureTransform complete.	Duration: 7.0 seconds.


DEBUG:py4j.clientserver:Command to send: c
o758
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro759
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro759
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro760
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro76

	Task 14:	CommonFeatureTransform complete.	Duration: 7.96 seconds.


DEBUG:py4j.clientserver:Answer received: !ylo802
DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Command to send: m
d
o795
e

DEBUG:py4j.clientserver:Answer received: !ylo803
DEBUG:py4j.clientserver:Answer received: !yv
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonFunction
jgAWV6gMAAAAAAAAojB9weXNwYXJrLmNsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsCSwBLAEsCSwNLE0MOdAB0AYgAgwF8AYMCUwCUToWUjANtYXCUjBVmYWlsX29uX3N0b3BpdGVyYXRpb26UhpSMAV+UjAhpdGVyYXRvcpSGlIxML2hvbWUvam9obi9hbmFjb25kYTMvZW52cy9kZWVwY3ZyL2xpYi9weXRob24zLjgvc2l0ZS1wYWNrYWdlcy9weXNwYXJrL3JkZC5weZSMBGZ1bmOUTYABQwIAAZSMAWaUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjAdweXNwYXJrlIwIX19uYW1lX1+UjAtweXNwYXJrLnJkZJSMCF9fZmlsZV9flGgRdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMJHB5c3BhcmsuY2xvdWRwaWNrbGUuY2xvdWRwaWNrbGVfZmFzdJSMEl9mdW5jdGlvbl9zZXRzdGF0ZZSTlGgjfZR9lC

	Task 15:	CommonFeatureTransform complete.	Duration: 6.93 seconds.


DEBUG:py4j.clientserver:Command to send: c
o860
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro861
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro861
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro862
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.functions
col
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.sql.functions
col
spartition
e

DEBUG:py4j.clientserver:Answer received: !yro86

	Task 16:	CommonFeatureTransform complete.	Duration: 7.26 seconds.


DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
o901
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro902
DEBUG:py4j.clientserver:Command to send: c
o902
isBarrier
e

DEBUG:py4j.clientserver:Answer received: !ybfalse
DEBUG:py4j.clientserver:Command to send: r
u
SerDeUtil
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.api.python.SerDeUtil
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.api.python.SerDeUtil
pythonToJava
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: r
u
PythonUtils
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.api.python.PythonUtils
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.api.python.PythonUtils
getBroadcastThreshold
e

DEBUG:py4j.clientserver:Answer received: !ym
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.PythonUtils
getBroadcastThreshold
ro13
e

DEBUG:py4j.clientserver:Answer recei

	Task 17:	CommonFeatureTransform complete.	Duration: 8.14 seconds.


DEBUG:py4j.clientserver:Answer received: !yro961
DEBUG:py4j.clientserver:Command to send: c
z:org.apache.spark.api.python.SerDeUtil
toJavaArray
ro961
e

DEBUG:py4j.clientserver:Answer received: !yro962
DEBUG:py4j.clientserver:Command to send: c
o962
rdd
e

DEBUG:py4j.clientserver:Answer received: !yro963
DEBUG:py4j.clientserver:Command to send: c
o23
applySchemaToPythonRDD
ro963
s{"fields":[{"metadata":{},"name":"common_features_index","nullable":true,"type":"string"},{"metadata":{},"name":"num_features","nullable":true,"type":"long"},{"metadata":{},"name":"features_list","nullable":true,"type":"string"},{"metadata":{},"name":"partition","nullable":true,"type":"double"}],"type":"struct"}
e

DEBUG:py4j.clientserver:Answer received: !yro964
DEBUG:deepcvr.data.transform:	DataFrame created
DEBUG:py4j.clientserver:Command to send: r
u
functions
rj
e

DEBUG:py4j.clientserver:Answer received: !ycorg.apache.spark.sql.functions
DEBUG:py4j.clientserver:Command to send: r
m
org.apache.spark.sql.f

	Task 18:	CommonFeatureTransform complete.	Duration: 6.73 seconds.


DEBUG:py4j.clientserver:Answer received: !yn
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo1006
DEBUG:py4j.clientserver:Command to send: i
java.util.ArrayList
e

DEBUG:py4j.clientserver:Answer received: !ylo1007
DEBUG:py4j.clientserver:Command to send: i
org.apache.spark.api.python.PythonFunction
jgAWV6gMAAAAAAAAojB9weXNwYXJrLmNsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsCSwBLAEsCSwNLE0MOdAB0AYgAgwF8AYMCUwCUToWUjANtYXCUjBVmYWlsX29uX3N0b3BpdGVyYXRpb26UhpSMAV+UjAhpdGVyYXRvcpSGlIxML2hvbWUvam9obi9hbmFjb25kYTMvZW52cy9kZWVwY3ZyL2xpYi9weXRob24zLjgvc2l0ZS1wYWNrYWdlcy9weXNwYXJrL3JkZC5weZSMBGZ1bmOUTYABQwIAAZSMAWaUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjAdweXNwYXJrlIwIX19uYW1lX1+UjAtweXNwYXJrLnJkZJSMCF9fZmlsZV9flGgRdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMJHB5c3BhcmsuY2xvdWRwaWNrbGUuY2xvdWRwaWNrbGVfZmFzdJSMEl9mdW5jdGlvbl9zZXRzdGF0ZZSTlGgjfZR9lChoG2gSjAxfX3F1YWxuYW1lX1+UjBVS

	Task 19:	CommonFeatureTransform complete.	Duration: 7.15 seconds.


Unnamed: 0,task_id,n_partitions,partition_size,elapsed
0,0,18.0,40.56,22.65
0,1,174.95,4.17,8.26
0,2,331.89,2.2,7.14
0,3,488.84,1.49,7.51
0,4,645.79,1.13,7.24
0,5,802.74,0.91,6.89
0,6,959.68,0.76,7.45
0,7,1116.63,0.65,7.11
0,8,1273.58,0.57,7.05
0,9,1430.53,0.51,7.15


DEBUG:py4j.clientserver:Command to send: m
d
o1043
e

DEBUG:py4j.clientserver:Answer received: !yv
