

![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN.ipynb)




# **Find sentiment in text for Vaccine topic**

## 1. Colab Setup

In [None]:
# Install java
!apt-get update -qq
!apt-get install -y openjdk-8-jdk-headless -qq > /dev/null
!java -version

# Install pyspark
!pip install --ignore-installed -q pyspark==2.4.4

# Install Sparknlp
!pip install --ignore-installed spark-nlp

openjdk version "1.8.0_282"
OpenJDK Runtime Environment (build 1.8.0_282-8u282-b08-0ubuntu1~18.04-b08)
OpenJDK 64-Bit Server VM (build 25.282-b08, mixed mode)
[K     |████████████████████████████████| 215.7MB 66kB/s 
[K     |████████████████████████████████| 204kB 13.6MB/s 
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
Collecting spark-nlp
[?25l  Downloading https://files.pythonhosted.org/packages/1b/d9/44fd438e15fa9a02c0e3b3ca9eaffc509fc626592f7a03ce05d8f156d448/spark_nlp-2.7.5-py2.py3-none-any.whl (139kB)
[K     |████████████████████████████████| 143kB 4.1MB/s 
[?25hInstalling collected packages: spark-nlp
Successfully installed spark-nlp-2.7.5


In [None]:
import pandas as pd
import numpy as np
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
import json
from pyspark.ml import Pipeline
from pyspark.ml import PipelineModel
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from sparknlp.annotator import *
from sparknlp.base import *
import sparknlp
from sparknlp.pretrained import PretrainedPipeline

In [None]:
from google.colab import files
uploaded = files.upload()

Saving COVID19Tweets_HA_final_v2.csv to COVID19Tweets_HA_final_v2.csv


In [None]:
# Importing vaccine dataset - This dataset contains only key words from this topic
import io
tweet_df = pd.read_csv(io.BytesIO(uploaded['Vaccine_DataFrame.csv']))

In [None]:
tweet_df = tweet_df.iloc[:,1:]

In [None]:
text_list = tweet_df['text']
text_list.shape

(34081,)

## 2. Start Spark Session

In [None]:
spark = sparknlp.start()

## 3. Select the DL model and re-run cells below

In [None]:
MODEL_NAME='sentimentdl_use_twitter'

## 4. Define Spark NLP pipleline

In [None]:
documentAssembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")
    
use = UniversalSentenceEncoder.pretrained(name="tfhub_use", lang="en")\
 .setInputCols(["document"])\
 .setOutputCol("sentence_embeddings")


sentimentdl = SentimentDLModel.pretrained(name=MODEL_NAME, lang="en")\
    .setInputCols(["sentence_embeddings"])\
    .setOutputCol("sentiment")

nlpPipeline = Pipeline(
      stages = [
          documentAssembler,
          use,
          sentimentdl
      ])


tfhub_use download started this may take some time.
Approximate size to download 923.7 MB
[OK!]
sentimentdl_use_twitter download started this may take some time.
Approximate size to download 11.4 MB
[OK!]


## 5. Run the pipeline

In [None]:
empty_df = spark.createDataFrame([['']]).toDF("text")

pipelineModel = nlpPipeline.fit(empty_df)

df = spark.createDataFrame(pd.DataFrame({"text":text_list}))
result = pipelineModel.transform(df)

## 6. Visualize results

In [None]:

result.select(F.explode(F.arrays_zip('document.result', 'sentiment.result')).alias("cols")) \
.select(F.expr("cols['0']").alias("document"),
        F.expr("cols['1']").alias("sentiment")).show(truncate=False)

+---------------------------------------------------------------------------------------------------------------------------------+---------+
|document                                                                                                                         |sentiment|
+---------------------------------------------------------------------------------------------------------------------------------+---------+
|I m watching one of my facebook followers in Brazil get her vaccine shot  she posted the video COVID19                           |negative |
| A domestic digital vaccine permit could easily be used to   Restrict access to services   Limit no  of check ins to             |negative |
| No libertarian would ever dream of giving the abhorrent concept of domestic  vaccine passports  any credence whatsoever         |negative |
| Rep   and I agree  with less than 6  of COVID19 vaccines going to Black Floridians   must                                       |negative |
| The 

In [None]:
result

DataFrame[text: string, document: array<struct<annotatorType:string,begin:int,end:int,result:string,metadata:map<string,string>,embeddings:array<float>>>, sentence_embeddings: array<struct<annotatorType:string,begin:int,end:int,result:string,metadata:map<string,string>,embeddings:array<float>>>, sentiment: array<struct<annotatorType:string,begin:int,end:int,result:string,metadata:map<string,string>,embeddings:array<float>>>]

In [None]:
result_df = result.toPandas()

In [None]:
# Exporting CSV for further analysis
from google.colab import files
result_df.to_csv('twitter_sentiment.csv') 
files.download('twitter_sentiment.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>