In [1]:
import os
import json
import boto3
import sklearn
import socket
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql import functions as F


print('user:', os.environ['JUPYTERHUB_SERVICE_PREFIX'])

def uiWebUrl(self):
    from urllib.parse import urlparse
    web_url = self._jsc.sc().uiWebUrl().get()
    port = urlparse(web_url).port
    return "{}proxy/{}/jobs/".format(os.environ['JUPYTERHUB_SERVICE_PREFIX'], port)

# small fix to enable UI views
SparkContext.uiWebUrl = property(uiWebUrl)

# spark configurtion in local regime 
conf = SparkConf().set('spark.master', 'local[*]').set('spark.driver.memory', '8g')

#some needed objects
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
spark

user: /user/st057275/


# Dataframe

In [2]:
result_prefix = "malyutin_demo_hw1"

filepath = "file:///home/jovyan/shared/lectures_folder/84-0.txt"
from pyspark.sql.functions import monotonically_increasing_id

dataframe = sc.textFile(f"{filepath}")\
    .map(lambda x: (x,))\
    .toDF()\
    .select(F.col("_1").alias("text"))\
    .withColumn("id", monotonically_increasing_id())

dataframe.show()

+--------------------+---+
|                text| id|
+--------------------+---+
|The Project Guten...|  0|
|                    |  1|
|This eBook is for...|  2|
|most other parts ...|  3|
|whatsoever. You m...|  4|
|of the Project Gu...|  5|
|www.gutenberg.org...|  6|
|will have to chec...|  7|
|   using this eBook.|  8|
|                    |  9|
| Title: Frankenstein| 10|
|       or, The Mo...| 11|
|                    | 12|
|Author: Mary Woll...| 13|
|                    | 14|
|Release Date: 31,...| 15|
|[Most recently up...| 16|
|                    | 17|
|   Language: English| 18|
|                    | 19|
+--------------------+---+
only showing top 20 rows



In [3]:
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType, StringType
import string
import re

def process_string(data):
    """
    basic preprocessing function:
    - removes punctuation
    - lower
    - split by space
    """
    punct_removed = re.sub(r'[^\w\s]','',data)
    words = punct_removed.lower().split(" ")
    
    
    return list(filter(lambda x: len(x) > 0, words))

# spark udf -- user defined function (~ mapper)

process_string_udf = udf(lambda z: process_string(z), ArrayType(StringType()))

In [4]:
documents = dataframe\
    .select(process_string_udf(F.col("text")).alias("document"))\
    .where(F.size(F.col("document")) > 1)\
    .withColumn("doc_id", monotonically_increasing_id())


documents.show()

+--------------------+------+
|            document|doc_id|
+--------------------+------+
|[the, project, gu...|     0|
|[this, ebook, is,...|     1|
|[most, other, par...|     2|
|[whatsoever, you,...|     3|
|[of, the, project...|     4|
|[wwwgutenbergorg,...|     5|
|[will, have, to, ...|     6|
|[using, this, ebook]|     7|
|[title, frankenst...|     8|
|[or, the, modern,...|     9|
|[author, mary, wo...|    10|
|[release, date, 3...|    11|
|[most, recently, ...|    12|
| [language, english]|    13|
|[character, set, ...|    14|
|[produced, by, ju...|    15|
|[further, correct...|    16|
|[start, of, the, ...|    17|
|[or, the, modern,...|    18|
|[by, mary, wollst...|    19|
+--------------------+------+
only showing top 20 rows



# tf

In [6]:
tokenswithtf = documents.select('doc_id', F.explode(F.col("document")).alias('token'), 'document')\
    .groupBy('document','doc_id','token').count()\
    .withColumnRenamed("count", 'tf')\
    .orderBy('doc_id','token')


tokenswithtf.show()

+--------------------+------+--------------+---+
|            document|doc_id|         token| tf|
+--------------------+------+--------------+---+
|[the, project, gu...|     0|            by|  1|
|[the, project, gu...|     0|         ebook|  1|
|[the, project, gu...|     0|  frankenstein|  1|
|[the, project, gu...|     0|        godwin|  1|
|[the, project, gu...|     0|     gutenberg|  1|
|[the, project, gu...|     0|          mary|  1|
|[the, project, gu...|     0|            of|  1|
|[the, project, gu...|     0|       project|  1|
|[the, project, gu...|     0|       shelley|  1|
|[the, project, gu...|     0|           the|  1|
|[the, project, gu...|     0|wollstonecraft|  1|
|[this, ebook, is,...|     1|           and|  1|
|[this, ebook, is,...|     1|        anyone|  1|
|[this, ebook, is,...|     1|      anywhere|  1|
|[this, ebook, is,...|     1|         ebook|  1|
|[this, ebook, is,...|     1|           for|  1|
|[this, ebook, is,...|     1|            in|  1|
|[this, ebook, is,..

# doc frequency 

In [7]:
from pyspark.sql.functions import countDistinct
tokenswithdf = tokenswithtf\
    .groupBy("token")\
    .agg(countDistinct("doc_id").alias ('df'))

In [9]:
tokenswithdf.orderBy(F.col("df").desc()).show()

+-----+----+
|token|  df|
+-----+----+
|  the|3282|
|  and|2702|
|   of|2435|
|    i|2354|
|   to|1896|
|   my|1534|
|    a|1310|
|   in|1126|
| that| 971|
|  was| 948|
|   me| 792|
| with| 694|
|  but| 681|
|  had| 649|
|which| 554|
|  you| 549|
|   he| 545|
|   it| 533|
|  not| 519|
|  for| 505|
+-----+----+
only showing top 20 rows



# inversed document frequency 

In [10]:
import math
from pyspark.sql.functions import log10

print((dataframe.count(), len(dataframe.columns)))
   

(7743, 2)


In [12]:
tokenswithIDF = tokenswithdf.withColumn('idf', log10(7744/(tokenswithdf.df)))
tokenswithIDF.show()

+-----------+---+------------------+
|      token| df|               idf|
+-----------+---+------------------+
|       some|145|1.7275973420653623|
|      those| 91|1.9299239519792437|
|        art|  7|3.0438673042860804|
|   tortured|  4| 3.286905352972375|
|     waters| 11| 2.847572659142112|
|        few| 62|2.0965736548020835|
|       hope| 49|2.1987692642718235|
|     voyage| 15| 2.712874085244656|
|      still| 65|2.0760519876574817|
|  arguments|  7|3.0438673042860804|
|  destitute|  1|3.8889653443003374|
|    barrier|  4| 3.286905352972375|
|transaction|  1|3.8889653443003374|
|   ignominy|  4| 3.286905352972375|
|     online|  4| 3.286905352972375|
|     travel|  4| 3.286905352972375|
|  connected|  7|3.0438673042860804|
|     doubts|  1|3.8889653443003374|
|     spared|  5|3.1899953399643186|
|    embrace|  1|3.8889653443003374|
+-----------+---+------------------+
only showing top 20 rows



 # tf - idf

In [13]:
tfidf = tokenswithtf\
    .join(tokenswithIDF, on = ("token"), how = "left")\
    .withColumn("tf_idf", F.col("idf") * F.col("tf"))\
    .drop('document', 'doc_id')\

tfidf.show()

+-------------+---+---+------------------+------------------+
|        token| tf| df|               idf|            tf_idf|
+-------------+---+---+------------------+------------------+
| accumulation|  1|  2| 3.587935348636356| 3.587935348636356|
| accumulation|  1|  2| 3.587935348636356| 3.587935348636356|
|apprehensions|  1|  2| 3.587935348636356| 3.587935348636356|
|apprehensions|  1|  2| 3.587935348636356| 3.587935348636356|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|          art|  1|  7|3.0438673042860804|3.0438673042860804|
|          art|  1|  7|3.0438673042860804|3.0438673042860804|
|       

# RDD

In [14]:
rddText = sc.textFile(f"{filepath}").repartition(1).zipWithIndex()
rddText.take(20)

[('The Project Gutenberg eBook of Frankenstein, by Mary Wollstonecraft (Godwin) Shelley',
  0),
 ('', 1),
 ('This eBook is for the use of anyone anywhere in the United States and', 2),
 ('most other parts of the world at no cost and with almost no restrictions',
  3),
 ('whatsoever. You may copy it, give it away or re-use it under the terms', 4),
 ('of the Project Gutenberg License included with this eBook or online at', 5),
 ('www.gutenberg.org. If you are not located in the United States, you', 6),
 ('will have to check the laws of the country where you are located before',
  7),
 ('using this eBook.', 8),
 ('', 9),
 ('Title: Frankenstein', 10),
 ('       or, The Modern Prometheus', 11),
 ('', 12),
 ('Author: Mary Wollstonecraft (Godwin) Shelley', 13),
 ('', 14),
 ('Release Date: 31, 1993 [eBook #84]', 15),
 ('[Most recently updated: November 13, 2020]', 16),
 ('', 17),
 ('Language: English', 18),
 ('', 19)]

In [15]:
rdd2 = rddText.map(lambda line:line[0].lower().split(" ")).filter(lambda line:len(line[0])>0).zipWithIndex()

In [16]:
rdd2.take(19)

[(['the',
   'project',
   'gutenberg',
   'ebook',
   'of',
   'frankenstein,',
   'by',
   'mary',
   'wollstonecraft',
   '(godwin)',
   'shelley'],
  0),
 (['this',
   'ebook',
   'is',
   'for',
   'the',
   'use',
   'of',
   'anyone',
   'anywhere',
   'in',
   'the',
   'united',
   'states',
   'and'],
  1),
 (['most',
   'other',
   'parts',
   'of',
   'the',
   'world',
   'at',
   'no',
   'cost',
   'and',
   'with',
   'almost',
   'no',
   'restrictions'],
  2),
 (['whatsoever.',
   'you',
   'may',
   'copy',
   'it,',
   'give',
   'it',
   'away',
   'or',
   're-use',
   'it',
   'under',
   'the',
   'terms'],
  3),
 (['of',
   'the',
   'project',
   'gutenberg',
   'license',
   'included',
   'with',
   'this',
   'ebook',
   'or',
   'online',
   'at'],
  4),
 (['www.gutenberg.org.',
   'if',
   'you',
   'are',
   'not',
   'located',
   'in',
   'the',
   'united',
   'states,',
   'you'],
  5),
 (['will',
   'have',
   'to',
   'check',
   'the',
   'laws',


In [17]:
rdd3 = rdd2.map(lambda x: (x[1], x[0])).flatMapValues(lambda x: x).collect()
rdd3 

[(0, 'the'),
 (0, 'project'),
 (0, 'gutenberg'),
 (0, 'ebook'),
 (0, 'of'),
 (0, 'frankenstein,'),
 (0, 'by'),
 (0, 'mary'),
 (0, 'wollstonecraft'),
 (0, '(godwin)'),
 (0, 'shelley'),
 (1, 'this'),
 (1, 'ebook'),
 (1, 'is'),
 (1, 'for'),
 (1, 'the'),
 (1, 'use'),
 (1, 'of'),
 (1, 'anyone'),
 (1, 'anywhere'),
 (1, 'in'),
 (1, 'the'),
 (1, 'united'),
 (1, 'states'),
 (1, 'and'),
 (2, 'most'),
 (2, 'other'),
 (2, 'parts'),
 (2, 'of'),
 (2, 'the'),
 (2, 'world'),
 (2, 'at'),
 (2, 'no'),
 (2, 'cost'),
 (2, 'and'),
 (2, 'with'),
 (2, 'almost'),
 (2, 'no'),
 (2, 'restrictions'),
 (3, 'whatsoever.'),
 (3, 'you'),
 (3, 'may'),
 (3, 'copy'),
 (3, 'it,'),
 (3, 'give'),
 (3, 'it'),
 (3, 'away'),
 (3, 'or'),
 (3, 're-use'),
 (3, 'it'),
 (3, 'under'),
 (3, 'the'),
 (3, 'terms'),
 (4, 'of'),
 (4, 'the'),
 (4, 'project'),
 (4, 'gutenberg'),
 (4, 'license'),
 (4, 'included'),
 (4, 'with'),
 (4, 'this'),
 (4, 'ebook'),
 (4, 'or'),
 (4, 'online'),
 (4, 'at'),
 (5, 'www.gutenberg.org.'),
 (5, 'if'),
 (5, 

In [18]:
rdd4= sc.parallelize(rdd3)
rdd4

ParallelCollectionRDD[155] at readRDDFromFile at PythonRDD.scala:262

In [19]:
map1=rdd4.flatMap(lambda x: [((x[0],i),1) for i in x[1].split()])
map1.take(6)

[((0, 'the'), 1),
 ((0, 'project'), 1),
 ((0, 'gutenberg'), 1),
 ((0, 'ebook'), 1),
 ((0, 'of'), 1),
 ((0, 'frankenstein,'), 1)]

In [20]:
reduce=map1.reduceByKey(lambda x,y:x+y)
reduce.take(5)

[((0, 'project'), 1),
 ((1, 'and'), 1),
 ((2, 'of'), 1),
 ((4, 'this'), 1),
 ((4, 'online'), 1)]

# tf

In [21]:
tf=reduce.map(lambda x: (x[0][1],(x[0][0],x[1])))
tf.take(5)

[('project', (0, 1)),
 ('and', (1, 1)),
 ('of', (2, 1)),
 ('this', (4, 1)),
 ('online', (4, 1))]

In [22]:
map3=reduce.map(lambda x: (x[0][1],(x[0][0],x[1],1)))
map3.collect()


[('project', (0, 1, 1)),
 ('and', (1, 1, 1)),
 ('of', (2, 1, 1)),
 ('this', (4, 1, 1)),
 ('online', (4, 1, 1)),
 ('at', (4, 1, 1)),
 ('you', (5, 2, 1)),
 ('have', (6, 1, 1)),
 ('country', (6, 1, 1)),
 ('date:', (10, 1, 1)),
 ('project', (16, 1, 1)),
 ('1', (20, 1, 1)),
 ('mrs.', (21, 1, 1)),
 ('will', (23, 1, 1)),
 ('am', (28, 1, 1)),
 ('as', (28, 1, 1)),
 ('a', (29, 1, 1)),
 ('do', (30, 1, 1)),
 ('i', (34, 1, 1)),
 ('of', (34, 1, 1)),
 ('trust', (39, 1, 1)),
 ('the', (41, 1, 1)),
 ('only', (46, 1, 1)),
 ('world', (48, 1, 1)),
 ('visited,', (49, 1, 1)),
 ('and', (49, 1, 1)),
 ('imprinted', (49, 1, 1)),
 ('of', (50, 1, 1)),
 ('are', (50, 2, 1)),
 ('all', (51, 1, 1)),
 ('me', (51, 1, 1)),
 ('conjectures', (54, 1, 1)),
 ('inestimable', (55, 1, 1)),
 ('reach', (57, 1, 1)),
 ('reflections', (60, 1, 1)),
 ('of', (66, 1, 1)),
 ('day', (71, 1, 1)),
 ('my', (71, 1, 1)),
 ('own', (76, 1, 1)),
 ('that', (77, 1, 1)),
 ('a', (77, 1, 1)),
 ('failure', (79, 1, 1)),
 ('i', (82, 2, 1)),
 ('day', (87, 1

In [23]:
map4=map3.map(lambda x:(x[0],x[1][2]))
map4.collect()

[('project', 1),
 ('and', 1),
 ('of', 1),
 ('this', 1),
 ('online', 1),
 ('at', 1),
 ('you', 1),
 ('have', 1),
 ('country', 1),
 ('date:', 1),
 ('project', 1),
 ('1', 1),
 ('mrs.', 1),
 ('will', 1),
 ('am', 1),
 ('as', 1),
 ('a', 1),
 ('do', 1),
 ('i', 1),
 ('of', 1),
 ('trust', 1),
 ('the', 1),
 ('only', 1),
 ('world', 1),
 ('visited,', 1),
 ('and', 1),
 ('imprinted', 1),
 ('of', 1),
 ('are', 1),
 ('all', 1),
 ('me', 1),
 ('conjectures', 1),
 ('inestimable', 1),
 ('reach', 1),
 ('reflections', 1),
 ('of', 1),
 ('day', 1),
 ('my', 1),
 ('own', 1),
 ('that', 1),
 ('a', 1),
 ('failure', 1),
 ('i', 1),
 ('day', 1),
 ('my', 1),
 ('whaler,', 1),
 ('own', 1),
 ('would', 1),
 ('emergencies', 1),
 ('raise', 1),
 ('for', 1),
 ('stagecoach.', 1),
 ('the', 1),
 ('to', 1),
 ('ship', 1),
 ('i', 1),
 ('intend', 1),
 ('may', 1),
 ('march,', 1),
 ('taken', 1),
 ('i', 1),
 ('on', 1),
 ('can', 1),
 ('which', 1),
 ('have', 1),
 ('no', 1),
 ('endeavour', 1),
 ('shall', 1),
 ('but', 1),
 ('could', 1),
 ('w

# df

In [24]:
tokenswithdf=map4.reduceByKey(lambda x,y:x+y)
tokenswithdf.collect()

[('date:', 1),
 ('but', 660),
 ('passionate', 3),
 ('letter', 21),
 ('incidents', 6),
 ('vessel,', 1),
 ('“to', 5),
 ('creature', 27),
 ('burst', 8),
 ('like', 57),
 ('evils', 2),
 ('them.', 17),
 ('other', 83),
 ('eldest', 2),
 ('result', 3),
 ('adventure.', 1),
 ('governments,', 2),
 ('gentleness.', 1),
 ('circumstances', 10),
 ('study;', 2),
 ('nights', 4),
 ('yet', 137),
 ('frame', 6),
 ('reception', 2),
 ('resolved,', 4),
 ('unremitting', 3),
 ('pale', 4),
 ('exception', 2),
 ('work', 50),
 ('senses', 5),
 ('house;', 3),
 ('returns', 2),
 ('set', 18),
 ('progress,', 2),
 ('mountains,', 12),
 ('imagination', 14),
 ('behaviour', 2),
 ('boy,', 2),
 ('gates', 5),
 ('unhappy', 10),
 ('believe', 25),
 ('rely', 3),
 ('out', 33),
 ('again', 64),
 ('themselves', 8),
 ('native', 28),
 ('castles', 4),
 ('glittering', 3),
 ('unearthly', 3),
 ('ugliness', 1),
 ('condemned.', 2),
 ('urged', 11),
 ('walked', 15),
 ('herb', 1),
 ('fire,', 5),
 ('cottagers,', 8),
 ('sometimes', 49),
 ('once', 28),

In [25]:
rddText.count()

7743

# idf 

In [26]:
import math
from pyspark.sql.functions import *
idf=tokenswithdf.map(lambda x: (x[0], math.log10(7744/x[1])))
idf.collect()

[('date:', 3.8889653443003374),
 ('but', 1.0694214087584686),
 ('passionate', 3.4118440895806748),
 ('letter', 2.566746049566418),
 ('incidents', 3.1108140939166935),
 ('vessel,', 3.8889653443003374),
 ('“to', 3.1899953399643186),
 ('creature', 2.45760158014135),
 ('burst', 2.9858753573083936),
 ('like', 2.133090488627846),
 ('evils', 3.587935348636356),
 ('them.', 2.6585164229220632),
 ('other', 1.9698872519242634),
 ('eldest', 3.587935348636356),
 ('result', 3.4118440895806748),
 ('adventure.', 3.8889653443003374),
 ('governments,', 3.587935348636356),
 ('gentleness.', 3.8889653443003374),
 ('circumstances', 2.8889653443003374),
 ('study;', 3.587935348636356),
 ('nights', 3.286905352972375),
 ('yet', 1.7522447771439305),
 ('frame', 3.1108140939166935),
 ('reception', 3.587935348636356),
 ('resolved,', 3.286905352972375),
 ('unremitting', 3.4118440895806748),
 ('pale', 3.286905352972375),
 ('exception', 3.587935348636356),
 ('work', 2.1899953399643186),
 ('senses', 3.1899953399643186)

In [27]:
rdd_fin=tf.join(idf)
rdd_fin.collect()

[('passionate', ((218, 1), 3.4118440895806748)),
 ('passionate', ((638, 1), 3.4118440895806748)),
 ('passionate', ((2271, 1), 3.4118440895806748)),
 ('letter', ((234, 1), 2.566746049566418)),
 ('letter', ((1480, 1), 2.566746049566418)),
 ('letter', ((1720, 1), 2.566746049566418)),
 ('letter', ((1473, 1), 2.566746049566418)),
 ('letter', ((1681, 1), 2.566746049566418)),
 ('letter', ((4750, 1), 2.566746049566418)),
 ('letter', ((6078, 1), 2.566746049566418)),
 ('letter', ((5351, 1), 2.566746049566418)),
 ('letter', ((1483, 1), 2.566746049566418)),
 ('letter', ((5339, 1), 2.566746049566418)),
 ('letter', ((265, 1), 2.566746049566418)),
 ('letter', ((4409, 1), 2.566746049566418)),
 ('letter', ((1718, 1), 2.566746049566418)),
 ('letter', ((1782, 1), 2.566746049566418)),
 ('letter', ((5254, 1), 2.566746049566418)),
 ('letter', ((5302, 1), 2.566746049566418)),
 ('letter', ((239, 1), 2.566746049566418)),
 ('letter', ((20, 1), 2.566746049566418)),
 ('letter', ((5252, 1), 2.566746049566418)),
 (

# tf-idf

In [28]:
rdd=rdd_fin.map(lambda x: (x[1][0][0],(x[0],x[1][0][1],x[1][1],x[1][0][1]*x[1][1]))).sortByKey()
rdd.collect()

[(0, ('by', 1, 1.2141042035625258, 1.2141042035625258)),
 (0, ('shelley', 1, 3.4118440895806748, 3.4118440895806748)),
 (0, ('mary', 1, 3.4118440895806748, 3.4118440895806748)),
 (0, ('wollstonecraft', 1, 3.4118440895806748, 3.4118440895806748)),
 (0, ('gutenberg', 1, 2.566746049566418, 2.566746049566418)),
 (0, ('(godwin)', 1, 3.4118440895806748, 3.4118440895806748)),
 (0, ('the', 1, 0.3778844987612188, 0.3778844987612188)),
 (0, ('frankenstein,', 1, 2.9858753573083936, 2.9858753573083936)),
 (0, ('ebook', 1, 3.1108140939166935, 3.1108140939166935)),
 (0, ('of', 1, 0.5056886938926869, 0.5056886938926869)),
 (0, ('project', 1, 2.019733624569361, 2.019733624569361)),
 (1, ('this', 1, 1.2869053529723748, 1.2869053529723748)),
 (1, ('united', 1, 2.6336928391970313, 2.6336928391970313)),
 (1, ('anyone', 1, 3.1108140939166935, 3.1108140939166935)),
 (1, ('for', 1, 1.1961184250231072, 1.1961184250231072)),
 (1, ('is', 1, 1.4220977239462278, 1.4220977239462278)),
 (1, ('use', 1, 2.65851642292

In [29]:
rdd=rdd.map(lambda x: (x[0],x[1][0],x[1][1],x[1][2],x[1][3]))
rdd.toDF(["DocumentId","Token","TF","IDF","TF-IDF"]).show()

+----------+--------------+---+------------------+------------------+
|DocumentId|         Token| TF|               IDF|            TF-IDF|
+----------+--------------+---+------------------+------------------+
|         0|            by|  1|1.2141042035625258|1.2141042035625258|
|         0|       shelley|  1|3.4118440895806748|3.4118440895806748|
|         0|          mary|  1|3.4118440895806748|3.4118440895806748|
|         0|wollstonecraft|  1|3.4118440895806748|3.4118440895806748|
|         0|     gutenberg|  1| 2.566746049566418| 2.566746049566418|
|         0|      (godwin)|  1|3.4118440895806748|3.4118440895806748|
|         0|           the|  1|0.3778844987612188|0.3778844987612188|
|         0| frankenstein,|  1|2.9858753573083936|2.9858753573083936|
|         0|         ebook|  1|3.1108140939166935|3.1108140939166935|
|         0|            of|  1|0.5056886938926869|0.5056886938926869|
|         0|       project|  1| 2.019733624569361| 2.019733624569361|
|         1|        