In [None]:
import os
# Find the latest version of spark 3.2  from http://www.apache.org/dist/spark/ and enter as the spark version
# For example:
# spark_version = 'spark-3.2.2'
spark_version = 'spark-3.<enter version>'

os.environ['SPARK_VERSION']=spark_version

# Install Spark and Java
!apt-get update
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q http://www.apache.org/dist/spark/$SPARK_VERSION/$SPARK_VERSION-bin-hadoop2.7.tgz
!tar xf $SPARK_VERSION-bin-hadoop2.7.tgz
!pip install -q findspark

# Set Environment Variables
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = f"/content/{spark_version}-bin-hadoop2.7"

# Start a SparkSession
import findspark
findspark.init()

In [24]:
# Start Spark session
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("StopWords").getOrCreate()

In [25]:
from pyspark.ml.feature import Tokenizer, StopWordsRemover

In [26]:
# Read in data from S3 Buckets
from pyspark import SparkFiles
url ="https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-classroom/v1.1/22-big-data/day_2/poem_sentiment.csv"
spark.sparkContext.addFile(url)
df = spark.read.csv(SparkFiles.get("poem_sentiment.csv"), sep=",", header=True)

# Show DataFrame
df.show()

+---+--------------------+---------+
| id|                text|sentiment|
+---+--------------------+---------+
|  0|to water, cloudli...|        0|
|  1|shall yet be glad...|        1|
|  2|on its windy site...|        0|
|  3|(if haply the dar...|       -1|
|  4|jehovah, jove, or...|        0|
|  5|when the brow is ...|       -1|
|  6|taking and giving...|        1|
|  7|press hard the ho...|       -1|
|  8|his head is bowed...|        0|
|  9|with england if t...|        0|
| 10|turn in the door ...|        0|
| 11|and ever the rock...|       -1|
| 12|that to the next ...|        0|
| 13|and all the honor...|        0|
| 14|a level space of ...|        0|
| 15|from his lady's w...|        0|
| 16|in three distingu...|        0|
| 17|a orn'ment o' sac...|        0|
| 18|ef 'twarn't for s...|        0|
| 19|for ever, if that...|        0|
+---+--------------------+---------+
only showing top 20 rows



In [27]:
# Tokenize DataFrame
sentiment_data = Tokenizer(inputCol="text", outputCol="Words")

In [28]:
# Transform DataFrame
sentiment = sentiment_data.transform(df)
sentiment.show()

+---+--------------------+---------+--------------------+
| id|                text|sentiment|               Words|
+---+--------------------+---------+--------------------+
|  0|to water, cloudli...|        0|[to, water,, clou...|
|  1|shall yet be glad...|        1|[shall, yet, be, ...|
|  2|on its windy site...|        0|[on, its, windy, ...|
|  3|(if haply the dar...|       -1|[(if, haply, the,...|
|  4|jehovah, jove, or...|        0|[jehovah,, jove,,...|
|  5|when the brow is ...|       -1|[when, the, brow,...|
|  6|taking and giving...|        1|[taking, and, giv...|
|  7|press hard the ho...|       -1|[press, hard, the...|
|  8|his head is bowed...|        0|[his, head, is, b...|
|  9|with england if t...|        0|[with, england, i...|
| 10|turn in the door ...|        0|[turn, in, the, d...|
| 11|and ever the rock...|       -1|[and, ever, the, ...|
| 12|that to the next ...|        0|[that, to, the, n...|
| 13|and all the honor...|        0|[and, all, the, h...|
| 14|a level s

In [29]:
# Remove stop words
remover = StopWordsRemover(inputCol="Words", outputCol="filtered")

In [30]:
# Transform new DataFrame
newFrame = remover.transform(sentiment)
newFrame.show()

+---+--------------------+---------+--------------------+--------------------+
| id|                text|sentiment|               Words|            filtered|
+---+--------------------+---------+--------------------+--------------------+
|  0|to water, cloudli...|        0|[to, water,, clou...|[water,, cloudlik...|
|  1|shall yet be glad...|        1|[shall, yet, be, ...|[shall, yet, glad...|
|  2|on its windy site...|        0|[on, its, windy, ...|[windy, site, upl...|
|  3|(if haply the dar...|       -1|[(if, haply, the,...|[(if, haply, dark...|
|  4|jehovah, jove, or...|        0|[jehovah,, jove,,...|[jehovah,, jove,,...|
|  5|when the brow is ...|       -1|[when, the, brow,...|[brow, cold, marb...|
|  6|taking and giving...|        1|[taking, and, giv...|[taking, giving, ...|
|  7|press hard the ho...|       -1|[press, hard, the...|[press, hard, hos...|
|  8|his head is bowed...|        0|[his, head, is, b...|[head, bowed., th...|
|  9|with england if t...|        0|[with, england, 

In [31]:
# Show simplified review
newFrame.select("filtered").show(truncate=False)

+-------------------------------------------------------------+
|filtered                                                     |
+-------------------------------------------------------------+
|[water,, cloudlike, bush, afar,]                             |
|[shall, yet, glad, him,, shall, bless]                       |
|[windy, site, uplifting, gabled, roof, palisade,]            |
|[(if, haply, dark, fate]                                     |
|[jehovah,, jove,, lord!]                                     |
|[brow, cold, marble, stone,]                                 |
|[taking, giving, radiance,, slopes]                          |
|[press, hard, hostile, towers!]                              |
|[head, bowed., thinks, men, kings.]                          |
|[england, day, go, hard,]                                    |
|[turn, door, turn]                                           |
|[ever, rocks', disdain;]                                     |
|[next, may, resign, roome]             