In [None]:
import config
import pyspark
import vaderSentiment.vaderSentiment

In [None]:
spark = pyspark.sql.SparkSession.builder.getOrCreate()

In [None]:
df = ( spark.read
    .format('mongodb')
    .option('database'      , f'{config.MongoDb.database}')
    .option('collection'    , f'{config.MongoDb.readCollection}')
    .option('connection.uri', f'mongodb://{config.MongoDb.user}:{config.MongoDb.password}@{config.MongoDb.host}:{config.MongoDb.port}')
	.load()
)
df.show(10)

In [None]:
sentimentIntensityAnalyzer = vaderSentiment.vaderSentiment.SentimentIntensityAnalyzer()

@pyspark.sql.functions.udf(
    returnType= pyspark.sql.types.MapType(
        pyspark.sql.types.StringType(), 
        pyspark.sql.types.FloatType()
))
def sentiment(text2):
    return sentimentIntensityAnalyzer.polarity_scores(text2)


In [None]:
df = df.select(
    '*',
    sentiment('text').alias('sentiment')
)
df.show(10)

In [None]:
df.printSchema()

In [None]:
df = df.select(
    '*',
    pyspark.sql.functions.col('sentiment').getItem('neg').alias('negative'),
    pyspark.sql.functions.col('sentiment').getItem('neu').alias('neutral'),
    pyspark.sql.functions.col('sentiment').getItem('pos').alias('positive'),
    pyspark.sql.functions.col('sentiment').getItem('compound').alias('compound')
)
df.show(10)

In [None]:
df = df.drop('text', 'sentiment')

In [None]:
( df.write
    .format('mongodb')
    .option('database'      , f'{config.MongoDb.database}')
    .option('collection'    , f'{config.MongoDb.writeCollection}')
    .option('connection.uri', f'mongodb://{config.MongoDb.user}:{config.MongoDb.password}@{config.MongoDb.host}:{config.MongoDb.port}')
    .mode('overwrite')
	.save()
)