# News Sentiment Analysis


### Read the News Table from the Lakehouse DB

In [1]:
df = spark.sql("SELECT * FROM bing_lake_db.tbl_latest_news")
display(df)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 3, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 2c59c359-2f5b-4df6-8f52-52ea655ef757)

### Import Synapse ML

In [2]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 4, Finished, Available, Finished)

### Configure Synapse ML

In [3]:
# Import the model and configure the input and output columns
model = (AnalyzeText()
        .setTextCol("description")
        .setKind("SentimentAnalysis")
        .setOutputCol("response")
        .setErrorCol("error"))

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 5, Finished, Available, Finished)

### Apply the model

In [4]:
# Apply the model to our dataframe
result = model.transform(df)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 6, Finished, Available, Finished)

In [5]:
display(result)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 7, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, e6efcc0e-8e38-44e2-816c-2fdb4ec72162)

### Grab sentiment from the response

In [6]:
# Create Sentiment Column
from pyspark.sql.functions import col

sentiment_df = result.withColumn("sentiment", col("response.documents.sentiment"))

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 8, Finished, Available, Finished)

In [7]:
display(sentiment_df)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 9, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, de0586fd-5a9a-4d3a-8f3a-7ab973eb83cf)

### Remove unwanted columns

In [8]:
sentiment_df_final = sentiment_df.drop("error","response")

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 10, Finished, Available, Finished)

In [9]:
display(sentiment_df_final)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 11, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 4abf4b49-3b87-4f12-abb5-795792af6384)

In [10]:
from pyspark.sql.functions import col, to_date

sentiment_df_final = sentiment_df_final.withColumn("datePublished", to_date(col("datePublished"), "dd-MMM-yyyy"))

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 12, Finished, Available, Finished)

### Type 1 Merge

In [11]:
from pyspark.sql.utils import AnalysisException

try:

    table_name = 'bing_lake_db.tbl_sentiment_analysis'

    sentiment_df_final.write.format("delta").saveAsTable(table_name)

except AnalysisException:

    print("Table Already Exists")

    sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")

    spark.sql(f"""  MERGE INTO {table_name} target_table
                    USING vw_sentiment_df_final source_view

                    ON source_view.url = target_table.url
                    
                    WHEN MATCHED AND 
                    source_view.title <> target_table.title OR
                    source_view.description <> target_table.description OR
                    source_view.category <> target_table.category OR
                    source_view.image <> target_table.image OR
                    source_view.provider <> target_table.provider OR
                    source_view.datePublished <> target_table.datePublished   

                    THEN UPDATE SET *

                    WHEN NOT MATCHED THEN INSERT *

                """)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 13, Finished, Available, Finished)

In [12]:
display(sentiment_df_final)

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 14, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 2823f5d9-e799-475f-b82b-e198363a4ec4)

In [13]:
%%sql

SELECT count(*) from bing_lake_db.tbl_sentiment_analysis

StatementMeta(, b9a60bc6-a20e-4434-a15f-d4c741ea818a, 15, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 1 fields>