#### Dependencies
____

In [16]:
from pyspark.sql import SparkSession
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator

#### Creation Session
____

In [3]:
spark = SparkSession.builder.appName('logr3').getOrCreate()

#### Load Data
___

In [4]:
data = spark.read.format('libsvm').load('resources/sample_libsvm_data.txt')
data.show(n=5)

+-----+--------------------+
|label|            features|
+-----+--------------------+
|  0.0|(692,[127,128,129...|
|  1.0|(692,[158,159,160...|
|  1.0|(692,[124,125,126...|
|  1.0|(692,[152,153,154...|
|  1.0|(692,[151,152,153...|
+-----+--------------------+
only showing top 5 rows



#### Split Training & Test Data
_____

In [12]:
train, test = data.randomSplit([0.7, 0.3])

#### Create Logistic Regression Model
____

In [13]:
model = LogisticRegression().fit(train)

#### Evaluate Model 
_____

In [15]:
result = model.evaluate(test)
result.predictions.select(['label', 'prediction']).show(n=10)

+-----+----------+
|label|prediction|
+-----+----------+
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
|  0.0|       0.0|
+-----+----------+
only showing top 10 rows



#### Model Evaluator
_____

In [18]:
evaluator = BinaryClassificationEvaluator()
evaluator.evaluate(result.predictions)  # area under the curve

1.0