# Fully connected neural network

In [12]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession \
    .builder \
    .appName('FCNN') \
    .getOrCreate()
sc = spark.sparkContext

In [21]:
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Load training data
data = spark.read.format("libsvm")\
    .load("/data/mllib/sample_multiclass_classification_data.txt")

# Split the data into train and test
splits = data.randomSplit([0.6, 0.4], 1234)
#splits = data.randomSplit([0.6, 0.4])
train = splits[0]
test = splits[1]

# specify layers for the neural network:
# input layer of size 4 (features), two intermediate of size 5 and 4
# and output of size 3 (classes)
layers = [4,5,7,5, 3]

# create the trainer and set its parameters
trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234)

# train the model
model = trainer.fit(train)

# compute accuracy on the test set
result = model.transform(test)
predictionAndLabels = result.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test set accuracy = " + str(evaluator.evaluate(predictionAndLabels)))

Test set accuracy = 0.8095238095238095


In [9]:
??MultilayerPerceptronClassifier

[0;31mInit signature:[0m
[0mMultilayerPerceptronClassifier[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfeaturesCol[0m[0;34m=[0m[0;34m'features'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlabelCol[0m[0;34m=[0m[0;34m'label'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpredictionCol[0m[0;34m=[0m[0;34m'prediction'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmaxIter[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m1e-06[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mseed[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlayers[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mblockSize[0m[0;34m=[0m[0;36m128[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstepSize[0m[0;34m=[0m[0;36m0.03[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msolver[0m[0;34m=[0m[0;34m'l-bfgs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitialWeights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m

In [20]:
data.count()

150

In [11]:
sc.stop()

https://towardsdatascience.com/how-to-train-your-neural-networks-in-parallel-with-keras-and-apache-spark-ea8a3f48cae6