In [0]:
df_sentiment = spark.sql("SELECT * FROM silver_dataprocessing.default.silver_agoda_reviews_details")

In [0]:
%pip install nltk

import nltk
nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pyspark.sql.functions import udf, when
from pyspark.sql.types import *

analyser = SentimentIntensityAnalyzer()

# define a function to get the sentiment scores
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score
  
# sentiment analysis using VADER
def vader_analyze_review(df):
  # create a UDF to apply the sentiment analysis function
  sentiment_analyzer_udf = udf(lambda x: sentiment_analyzer_scores(str(x)), MapType(StringType(), FloatType()))
  
  # apply the UDF to the DataFrame column
  df = df.withColumn('vader_scores', sentiment_analyzer_udf(df['content_no_emojis']))

  # extract compound score
  df = df.withColumn('compound', df['vader_scores']['compound'])

  # determine sentiment based on the compound score
  df = df.withColumn('sentiment', when(df['compound'] >= 0.05, 'Positive').when(df['compound'] <= -0.05, 'Negative').otherwise('Neutral'))

  return df

df_sentiment = vader_analyze_review(df_sentiment)
df_sentiment = df_sentiment.drop('vader_scores', 'compound')
display(df_sentiment)

In [0]:
df_sentiment.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("silver_dataprocessing.default.silver_agoda_reviews_details_2")

In [0]:
%python
spark.sql("GRANT SELECT ON TABLE silver_dataprocessing.default.silver_agoda_reviews_details_2 TO `23105612@siswa365.um.edu.my`")

DataFrame[]

In [0]:
%sql
SELECT userName, content_no_emojis, language, sentiment FROM silver_dataprocessing.default.silver_agoda_reviews_details_2