In [1]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.ml.feature import RegexTokenizer
from pyspark.sql.functions import udf 
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StopWordsRemover
from pyspark.ml.feature import NGram
from pyspark.ml.feature import HashingTF, IDF
from pyspark.ml.feature import CountVectorizer, StringIndexer
from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier, NaiveBayes
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

## Load the data

In [2]:
spark = SparkSession.builder.appName("SMS").getOrCreate()

In [3]:
data = spark.read.format('text').load('SMSSpamCollection.txt')
split_col = pyspark.sql.functions.split(data['value'], '\t')
data = data.withColumn('label', split_col.getItem(0))
data = data.withColumn('text', split_col.getItem(1)).drop('value')

In [4]:
data.show(n=5)

+-----+--------------------+
|label|                text|
+-----+--------------------+
|  ham|Go until jurong p...|
|  ham|Ok lar... Joking ...|
| spam|Free entry in 2 a...|
|  ham|U dun say so earl...|
|  ham|Nah I don't think...|
+-----+--------------------+
only showing top 5 rows



## Tokenization

In [5]:
count_words = udf(lambda words: len(words), IntegerType())
tokenizer = RegexTokenizer(inputCol='text', outputCol='words', pattern='\\W')
regex_df = tokenizer.transform(data)
regex_tokenized_counts = regex_df.withColumn('freq', count_words('words'))
regex_tokenized_counts.show(n=5)

+-----+--------------------+--------------------+----+
|label|                text|               words|freq|
+-----+--------------------+--------------------+----+
|  ham|Go until jurong p...|[go, until, juron...|  20|
|  ham|Ok lar... Joking ...|[ok, lar, joking,...|   6|
| spam|Free entry in 2 a...|[free, entry, in,...|  33|
|  ham|U dun say so earl...|[u, dun, say, so,...|  11|
|  ham|Nah I don't think...|[nah, i, don, t, ...|  14|
+-----+--------------------+--------------------+----+
only showing top 5 rows



## Remove stopwords

In [6]:
remover = StopWordsRemover(inputCol='words', outputCol='tokens')
tokens_filtered = remover.transform(regex_tokenized_counts)
cleanDF= tokens_filtered.withColumn('count_tokens', count_words('tokens'))
cleanDF.show(n=5)

+-----+--------------------+--------------------+----+--------------------+------------+
|label|                text|               words|freq|              tokens|count_tokens|
+-----+--------------------+--------------------+----+--------------------+------------+
|  ham|Go until jurong p...|[go, until, juron...|  20|[go, jurong, poin...|          16|
|  ham|Ok lar... Joking ...|[ok, lar, joking,...|   6|[ok, lar, joking,...|           6|
| spam|Free entry in 2 a...|[free, entry, in,...|  33|[free, entry, 2, ...|          25|
|  ham|U dun say so earl...|[u, dun, say, so,...|  11|[u, dun, say, ear...|           9|
|  ham|Nah I don't think...|[nah, i, don, t, ...|  14|[nah, think, goes...|           7|
+-----+--------------------+--------------------+----+--------------------+------------+
only showing top 5 rows



## N-grams

In [7]:
ngram = NGram(n=2, inputCol='tokens', outputCol='2grams')
my_2ngrams =ngram.transform(cleanDF)
my_2ngrams.show(n=5)

+-----+--------------------+--------------------+----+--------------------+------------+--------------------+
|label|                text|               words|freq|              tokens|count_tokens|              2grams|
+-----+--------------------+--------------------+----+--------------------+------------+--------------------+
|  ham|Go until jurong p...|[go, until, juron...|  20|[go, jurong, poin...|          16|[go jurong, juron...|
|  ham|Ok lar... Joking ...|[ok, lar, joking,...|   6|[ok, lar, joking,...|           6|[ok lar, lar joki...|
| spam|Free entry in 2 a...|[free, entry, in,...|  33|[free, entry, 2, ...|          25|[free entry, entr...|
|  ham|U dun say so earl...|[u, dun, say, so,...|  11|[u, dun, say, ear...|           9|[u dun, dun say, ...|
|  ham|Nah I don't think...|[nah, i, don, t, ...|  14|[nah, think, goes...|           7|[nah think, think...|
+-----+--------------------+--------------------+----+--------------------+------------+--------------------+
only showi

## TF-IDF

In [8]:
tf = HashingTF(inputCol='tokens', outputCol='fs')
tf_df = tf.transform(my_2ngrams)
tf_df.select('fs').show(n=5)

+--------------------+
|                  fs|
+--------------------+
|(262144,[17222,26...|
|(262144,[122516,1...|
|(262144,[7958,122...|
|(262144,[28698,35...|
|(262144,[2710,259...|
+--------------------+
only showing top 5 rows



In [9]:
idf = IDF(inputCol='fs', outputCol='features')
idf_model = idf.fit(tf_df)
data = idf_model.transform(tf_df).select('label', 'features')
label_01 = udf(lambda lb: 1 if lb == 'ham' else 0, IntegerType()) #1 represents ham; 0 represents spam
data = data.withColumn('label', label_01('label'))
data.show()

+-----+--------------------+
|label|            features|
+-----+--------------------+
|    1|(262144,[17222,26...|
|    1|(262144,[122516,1...|
|    0|(262144,[7958,122...|
|    1|(262144,[28698,35...|
|    1|(262144,[2710,259...|
|    0|(262144,[8443,240...|
|    1|(262144,[2089,497...|
|    1|(262144,[38868,53...|
|    0|(262144,[34651,43...|
|    0|(262144,[10951,40...|
|    1|(262144,[6258,178...|
|    0|(262144,[7958,139...|
|    0|(262144,[7958,209...|
|    1|(262144,[55639,71...|
|    1|(262144,[81948,13...|
|    0|(262144,[66,7958,...|
|    1|(262144,[18910,63...|
|    1|(262144,[739,3511...|
|    1|(262144,[30913,61...|
|    0|(262144,[7958,415...|
+-----+--------------------+
only showing top 20 rows



## Classification

In [10]:
train, test = data.randomSplit([0.7, 0.3])

### LogisticRegression

In [11]:
lr = LogisticRegression()
gridBuilder = ParamGridBuilder().addGrid(lr.maxIter, [50, 100, 200]).addGrid(lr.regParam, [0, 1, 2]).build()
evaluator = BinaryClassificationEvaluator()

In [12]:
cv = CrossValidator(estimator=lr, 
                    estimatorParamMaps=gridBuilder, 
                    evaluator=evaluator)

In [13]:
cvm = cv.fit(train)
evaluator.evaluate(cvm.transform(test))

0.9947683489350155

### Decision Tree

In [16]:
stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
si_model = stringIndexer.fit(train)
train_st = si_model.transform(train)
dt = DecisionTreeClassifier(labelCol='indexed')

ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py", line 1028, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/Users/Nicolas/anaconda3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
ConnectionResetError: [Errno 54] Connection reset by peer

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py", line 883, in send_command
    response = connection.send_command(command)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py", line 1040, in send_command
    "Error while receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while receiving


Py4JError: org does not exist in the JVM

In [15]:
cvm = dt.fit(train_st)

Py4JJavaError: An error occurred while calling o2978.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 967.0 failed 1 times, most recent failure: Lost task 0.0 in stage 967.0 (TID 939, localhost, executor driver): java.lang.OutOfMemoryError: Java heap space
	at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:57)
	at java.nio.ByteBuffer.allocate(ByteBuffer.java:335)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anonfun$5.apply(ShuffleBlockFetcherIterator.scala:390)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anonfun$5.apply(ShuffleBlockFetcherIterator.scala:390)
	at org.apache.spark.util.io.ChunkedByteBufferOutputStream.allocateNewChunkIfNeeded(ChunkedByteBufferOutputStream.scala:87)
	at org.apache.spark.util.io.ChunkedByteBufferOutputStream.write(ChunkedByteBufferOutputStream.scala:75)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply$mcJ$sp(Utils.scala:342)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply(Utils.scala:327)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply(Utils.scala:327)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
	at org.apache.spark.util.Utils$.copyStream(Utils.scala:348)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:395)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:59)
	at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:154)
	at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:41)
	at org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:89)
	at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:105)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
	at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$collectAsMap$1.apply(PairRDDFunctions.scala:746)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$collectAsMap$1.apply(PairRDDFunctions.scala:745)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
	at org.apache.spark.rdd.PairRDDFunctions.collectAsMap(PairRDDFunctions.scala:745)
	at org.apache.spark.ml.tree.impl.RandomForest$.findSplitsBySorting(RandomForest.scala:928)
	at org.apache.spark.ml.tree.impl.RandomForest$.findSplits(RandomForest.scala:906)
	at org.apache.spark.ml.tree.impl.RandomForest$.run(RandomForest.scala:118)
	at org.apache.spark.ml.classification.DecisionTreeClassifier.train(DecisionTreeClassifier.scala:116)
	at org.apache.spark.ml.classification.DecisionTreeClassifier.train(DecisionTreeClassifier.scala:45)
	at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
	at sun.reflect.GeneratedMethodAccessor109.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.OutOfMemoryError: Java heap space
	at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:57)
	at java.nio.ByteBuffer.allocate(ByteBuffer.java:335)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anonfun$5.apply(ShuffleBlockFetcherIterator.scala:390)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anonfun$5.apply(ShuffleBlockFetcherIterator.scala:390)
	at org.apache.spark.util.io.ChunkedByteBufferOutputStream.allocateNewChunkIfNeeded(ChunkedByteBufferOutputStream.scala:87)
	at org.apache.spark.util.io.ChunkedByteBufferOutputStream.write(ChunkedByteBufferOutputStream.scala:75)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply$mcJ$sp(Utils.scala:342)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply(Utils.scala:327)
	at org.apache.spark.util.Utils$$anonfun$copyStream$1.apply(Utils.scala:327)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
	at org.apache.spark.util.Utils$.copyStream(Utils.scala:348)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:395)
	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:59)
	at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:154)
	at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:41)
	at org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:89)
	at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:105)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)


----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 50226)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 696, in __init__
    self.handle()
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/accumulators.py", line 235, in handle
    num_updates = read_int(self.rfile)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/serializers.py", line 577, in read_int
    raise EOFError
EOFError
---------------

In [14]:
evaluator.evaluate(cvm.transform(test))

----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 64109)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/Users/Nicolas/anaconda3/lib/python3.6/socketserver.py", line 696, in __init__
    self.handle()
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/accumulators.py", line 235, in handle
    num_updates = read_int(self.rfile)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/serializers.py", line 577, in read_int
    raise EOFError
EOFError
---------------

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

--- Logging error ---
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMa

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:64094)
Traceback (most recent call last):
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-24f7ad0102e4>", line 7, in <module>
    cvm = cv.fit(train)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 64, in fit
    return self._fit(dataset)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/tuning.py", line 232, in _fit
    models = est.fit(train, epm)
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in fit
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/Users/Nicolas/anaconda3/lib/python3.6/site-packages/pyspark/ml/base.py", line 59, in <listcomp>
    return [self.fit(dataset, paramMap) for paramMap in params]
  File "/

Py4JNetworkError: An error occurred while trying to connect to the Java server (127.0.0.1:64094)