In [6]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import Tokenizer,StopWordsRemover,Word2Vec
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.ml.tuning import TrainValidationSplit,ParamGridBuilder
from pyspark.sql.functions import UserDefinedFunction
from pyspark.sql.types import StringType

def removePunctuation(text):
    text = text.replace('.','')
    text = text.replace(',','')
    text = text.replace(';','')
    text = text.replace(':','')
    text = text.replace('ç','c')
    text = text.replace('ş','s')
    return text

spark = SparkSession.builder.getOrCreate()
textDF = spark.read.option('delimiter','\t').option('inferSchema','true').csv('datasets/movie_turkish_train.txt')
textDF = textDF.withColumnRenamed('_c0','orig')
textDF = textDF.withColumnRenamed('_c1','label')

myUDF = UserDefinedFunction(removePunctuation,StringType())

textDF = textDF.withColumn('Text',myUDF('orig'))


#textDF.show()
tokenizer = Tokenizer(inputCol='Text',outputCol='tokenized')
textDF = tokenizer.transform(textDF)

trStopWords = StopWordsRemover.loadDefaultStopWords('turkish')
sRemover = StopWordsRemover(inputCol='tokenized',outputCol='removed',stopWords=trStopWords)
textDF = sRemover.transform(textDF)

vSize = 3

vectorizer = Word2Vec(inputCol='removed',outputCol='features',vectorSize=vSize,windowSize=5,
                     maxIter=10)
textDF = vectorizer.fit(textDF).transform(textDF)

textDF = textDF.select('features','label')
trainDF, testDF = textDF.randomSplit([0.75,0.25],seed=123) 

mlpClassifier= MultilayerPerceptronClassifier(layers=[vSize,5,2])
model = mlpClassifier.fit(trainDF)

resultDF = model.transform(testDF)

eva = BinaryClassificationEvaluator()
successRate = eva.evaluate(resultDF)
print("Accuracy : ",successRate)
resultDF.show()

Accuracy :  0.800204425204425
+--------------------+-----+--------------------+--------------------+----------+
|            features|label|       rawPrediction|         probability|prediction|
+--------------------+-----+--------------------+--------------------+----------+
|[-0.6466463591371...|    0|[2.04106785944923...|[0.97041137738077...|       0.0|
|[-0.4951508641242...|    1|[-0.2567318771885...|[0.20177500399048...|       1.0|
|[-0.4108232321838...|    0|[1.99559202612476...|[0.96540562119095...|       0.0|
|[-0.4054810334928...|    0|[3.89905350282082...|[0.99928114021809...|       0.0|
|[-0.3880660108157...|    0|[2.61265551075986...|[0.98979449242936...|       0.0|
|[-0.3618335368541...|    0|[2.17231231739861...|[0.97498249595396...|       0.0|
|[-0.3441782164076...|    0|[1.52437469364426...|[0.90949084951578...|       0.0|
|[-0.3435934005039...|    0|[2.35039542525020...|[0.98234302427214...|       0.0|
|[-0.3370022118091...|    1|[-1.9218941759476...|[0.00709849580268..