In [6]:
! pip install -q pyspark==3.1.2 spark-nlp

[0m

In [7]:
import sparknlp
spark = sparknlp.start()

from sparknlp.base import *
from sparknlp.annotator import *

from pyspark.ml import Pipeline
from sparknlp.pretrained import PretrainedPipeline

#import statements
import pandas as pd
import matplotlib.pyplot as plt

from pyspark.ml.feature import Bucketizer

from pyspark.sql.functions import *

import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
spark.conf.set("spark.sql.caseSensitive", "true")

In [8]:
spark = SparkSession.builder.enableHiveSupport().appName('AmazonDataEmotion').getOrCreate()
sc = spark.sparkContext

In [9]:
path = 'gs://smbigdata1/main_final.csv'
df6 = spark.read.csv(path)

In [10]:
df6 = df6.withColumnRenamed("_c0", "uniqueID") \
.withColumnRenamed("_c1", "productID") \
.withColumnRenamed("_c2", "overall") \
.withColumnRenamed("_c3", "reviewText") \
.withColumnRenamed("_c4", "reviewTime") \
.withColumnRenamed("_c5", "reviewerID") \
.withColumnRenamed("_c6", "summary") \
.withColumnRenamed("_c7", "unixReviewTime") \
.withColumnRenamed("_c8", "verified") \
.withColumnRenamed("_c9", "Category") \
.withColumnRenamed("_c10", "brand") \
.withColumnRenamed("_c11", "date") \
.withColumnRenamed("_c12", "price") \
.withColumnRenamed("_c13", "rank") \
.withColumnRenamed("_c14", "title") \
.withColumnRenamed("_c15", "timestamp") \
.withColumnRenamed("_c16", "year") \
.withColumnRenamed("_c17", "month") \
.withColumnRenamed("_c18", "count")

In [11]:
nlp_df = df6.select("uniqueID", "reviewText")

In [12]:
nlp_df.show(20)

+--------------------+--------------------+
|            uniqueID|          reviewText|
+--------------------+--------------------+
|B00001OGXKA3SKEKM...|The wig was a wil...|
|B00001OGXKA3BE6HC...|Lots of fun and g...|
|B00001OGXKA20E4R0...|The wig was actua...|
|B00001OGXKA1ZQJ9B...|My son dressed up...|
|B00001OGXKA4KUVVA...|The pants are rid...|
|B00001OGXKAGD0SXS...|Teeth weren't rea...|
|B00001OGXKA3GSDXX...|         Great stuff|
|B00001OGXKA1DN6FU...|I bought this to ...|
|B00001OGXKA3876S0...|            Garbage!|
|B00001OGXKA3CNG3K...|Wig was great! Ot...|
|B00001OGXKA3G2F9E...|A must to complim...|
|B00001OGXKA3EIPA6...|Wig is too dark, ...|
|B00001OGXKA1KKKRC...|Did the job. Won ...|
|B00001OGXKA1CMXTU...|Ok product but cheap|
|B00001OGXKA2F7ZMK...|Wig sucks.  The t...|
|B00001OGXKA2RPC3J...|Excellent deal fo...|
|B00001OGXKA1ADQD7...|the teeth made th...|
|B00001OGXKA3B7BSL...|The wig was awful...|
|B00001OGXKA2ICSFE...|       Looking Good!|
|B00001OGXKA122X0T...|Very cheap

## Emotion Analysis

In [13]:
MODEL_NAME='classifierdl_use_emotion'

documentAssembler = DocumentAssembler()\
    .setInputCol("reviewText")\
    .setOutputCol("document")
    
use = UniversalSentenceEncoder.pretrained(name="tfhub_use")\
 .setInputCols(["document"])\
 .setOutputCol("sentence_embeddings")


sentimentdl = ClassifierDLModel.pretrained(name=MODEL_NAME)\
    .setInputCols(["sentence_embeddings"])\
    .setOutputCol("sentiment")

nlpPipeline = Pipeline(
      stages = [
          documentAssembler,
          use,
          sentimentdl
      ])


tfhub_use download started this may take some time.
Approximate size to download 923.7 MB
[ | ]tfhub_use download started this may take some time.
Approximate size to download 923.7 MB
[ | ]Download done! Loading the resource.
[ / ]

                                                                                

[ | ]

2022-03-14 20:21:32.552021: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-14 20:21:33.215143: I external/org_tensorflow/tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299995000 Hz


[OK!]
classifierdl_use_emotion download started this may take some time.
Approximate size to download 21.3 MB
[ | ]classifierdl_use_emotion download started this may take some time.
Approximate size to download 21.3 MB
Download done! Loading the resource.
[OK!]


In [14]:
empty_df = spark.createDataFrame([['']]).toDF("reviewText")

pipelineModel = nlpPipeline.fit(nlp_df)

result = pipelineModel.transform(nlp_df)

In [15]:
result.show()

[Stage 7:>                                                          (0 + 1) / 1]

+--------------------+--------------------+--------------------+--------------------+--------------------+
|            uniqueID|          reviewText|            document| sentence_embeddings|           sentiment|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|B00001OGXKA3SKEKM...|The wig was a wil...|[{document, 0, 20...|[{sentence_embedd...|[{category, 0, 20...|
|B00001OGXKA3BE6HC...|Lots of fun and g...|[{document, 0, 97...|[{sentence_embedd...|[{category, 0, 97...|
|B00001OGXKA20E4R0...|The wig was actua...|[{document, 0, 23...|[{sentence_embedd...|[{category, 0, 23...|
|B00001OGXKA1ZQJ9B...|My son dressed up...|[{document, 0, 96...|[{sentence_embedd...|[{category, 0, 96...|
|B00001OGXKA4KUVVA...|The pants are rid...|[{document, 0, 31...|[{sentence_embedd...|[{category, 0, 31...|
|B00001OGXKAGD0SXS...|Teeth weren't rea...|[{document, 0, 51...|[{sentence_embedd...|[{category, 0, 51...|
|B00001OGXKA3GSDXX...|         Great 

                                                                                

In [16]:
final= result.select(F.explode(F.arrays_zip('document.result', 'sentiment.result')).alias("cols")) \
.select(F.expr("cols['0']").alias("document"),
        F.expr("cols['1']").alias("sentiment"))

In [17]:
final.show()

[Stage 8:>                                                          (0 + 1) / 1]

+--------------------+---------+
|            document|sentiment|
+--------------------+---------+
|The wig was a wil...|  sadness|
|Lots of fun and g...|  sadness|
|The wig was actua...|  sadness|
|My son dressed up...| surprise|
|The pants are rid...| surprise|
|Teeth weren't rea...|  sadness|
|         Great stuff|      joy|
|I bought this to ...|  sadness|
|            Garbage!|     fear|
|Wig was great! Ot...| surprise|
|A must to complim...| surprise|
|Wig is too dark, ...|  sadness|
|Did the job. Won ...| surprise|
|Ok product but cheap|  sadness|
|Wig sucks.  The t...|  sadness|
|Excellent deal fo...|      joy|
|the teeth made th...| surprise|
|The wig was awful...|      joy|
|       Looking Good!| surprise|
|Very cheap lookin...| surprise|
+--------------------+---------+
only showing top 20 rows



                                                                                

In [16]:
final.show(truncate=False)

+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+
|document                                                                                                                                                                                                                                                          

## Sentiment Analysis

In [18]:
MODEL_NAME='sentimentdl_use_imdb'

documentAssembler = DocumentAssembler()\
    .setInputCol("reviewText")\
    .setOutputCol("document")
    
use = UniversalSentenceEncoder.pretrained(name="tfhub_use", lang="en")\
 .setInputCols(["document"])\
 .setOutputCol("sentence_embeddings")


sentimentdl = SentimentDLModel.pretrained(name=MODEL_NAME, lang="en")\
    .setInputCols(["sentence_embeddings"])\
    .setOutputCol("sentiment")

nlpPipeline = Pipeline(
      stages = [
          documentAssembler,
          use,
          sentimentdl
      ])

tfhub_use download started this may take some time.
Approximate size to download 923.7 MB
[OK!]
sentimentdl_use_imdb download started this may take some time.
Approximate size to download 12 MB
[ | ]sentimentdl_use_imdb download started this may take some time.
Approximate size to download 12 MB
[ / ]Download done! Loading the resource.


22/03/14 20:23:16 WARN org.apache.hadoop.util.concurrent.ExecutorHelper: Thread (Thread[GetFileInfo #1,5,main]) interrupted: 
java.lang.InterruptedException
	at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:510)
	at com.google.common.util.concurrent.FluentFuture$TrustedFuture.get(FluentFuture.java:88)
	at org.apache.hadoop.util.concurrent.ExecutorHelper.logThrowableFromAfterExecute(ExecutorHelper.java:48)
	at org.apache.hadoop.util.concurrent.HadoopThreadPoolExecutor.afterExecute(HadoopThreadPoolExecutor.java:90)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1157)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)

[ â€” ]

                                                                                

[OK!]


In [19]:
empty_df = spark.createDataFrame([['']]).toDF("reviewText")

pipelineModel = nlpPipeline.fit(nlp_df)

result = pipelineModel.transform(nlp_df)

In [20]:
result.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+
|            uniqueID|          reviewText|            document| sentence_embeddings|           sentiment|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|B00001OGXKA3SKEKM...|The wig was a wil...|[{document, 0, 20...|[{sentence_embedd...|[{category, 0, 20...|
|B00001OGXKA3BE6HC...|Lots of fun and g...|[{document, 0, 97...|[{sentence_embedd...|[{category, 0, 97...|
|B00001OGXKA20E4R0...|The wig was actua...|[{document, 0, 23...|[{sentence_embedd...|[{category, 0, 23...|
|B00001OGXKA1ZQJ9B...|My son dressed up...|[{document, 0, 96...|[{sentence_embedd...|[{category, 0, 96...|
|B00001OGXKA4KUVVA...|The pants are rid...|[{document, 0, 31...|[{sentence_embedd...|[{category, 0, 31...|
|B00001OGXKAGD0SXS...|Teeth weren't rea...|[{document, 0, 51...|[{sentence_embedd...|[{category, 0, 51...|
|B00001OGXKA3GSDXX...|         Great 

                                                                                

In [21]:
final= result.select(F.explode(F.arrays_zip('document.result', 'sentiment.result')).alias("cols")) \
.select(F.expr("cols['0']").alias("document"),
        F.expr("cols['1']").alias("sentiment"))

In [22]:
final.show()

+--------------------+---------+
|            document|sentiment|
+--------------------+---------+
|The wig was a wil...|      neg|
|Lots of fun and g...|      pos|
|The wig was actua...|      pos|
|My son dressed up...|      pos|
|The pants are rid...|      pos|
|Teeth weren't rea...|      neg|
|         Great stuff|      pos|
|I bought this to ...|      neg|
|            Garbage!|      neg|
|Wig was great! Ot...|      pos|
|A must to complim...|      pos|
|Wig is too dark, ...|      neg|
|Did the job. Won ...|      pos|
|Ok product but cheap|      neg|
|Wig sucks.  The t...|      neg|
|Excellent deal fo...|      pos|
|the teeth made th...|      neg|
|The wig was awful...|      neg|
|       Looking Good!|      pos|
|Very cheap lookin...|      neg|
+--------------------+---------+
only showing top 20 rows



In [23]:
final.show(2, truncate=False)

+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+
|document                                                                                                                                                                                                 |sentiment|
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+
|The wig was a wild mess, one of the screws was missing from the glasses (I had to buy another pair) and the teeth didn't fit. Basically all that was usable was the necklace, which wasn't great looking.|neg      |
|Lots of fun and great addition to the costume.  Wish it would just come with the costume, however.                                             