In [1]:
import os
import json
import boto3
import sklearn
import socket
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql import functions as F


print('user:', os.environ['JUPYTERHUB_SERVICE_PREFIX'])

def uiWebUrl(self):
    from urllib.parse import urlparse
    web_url = self._jsc.sc().uiWebUrl().get()
    port = urlparse(web_url).port
    return "{}proxy/{}/jobs/".format(os.environ['JUPYTERHUB_SERVICE_PREFIX'], port)

# small fix to enable UI views
SparkContext.uiWebUrl = property(uiWebUrl)

# spark configurtion in local regime 
conf = SparkConf().set('spark.master', 'local[*]').set('spark.driver.memory', '8g')

#some needed objects
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
spark

user: /user/st057275/


# dataframe

In [2]:
result_prefix = "malyutin_demo_hw1"

filepath = "file:///home/jovyan/shared/lectures_folder/84-0.txt"
from pyspark.sql.functions import monotonically_increasing_id

dataframe = sc.textFile(f"{filepath}")\
    .map(lambda x: (x,))\
    .toDF()\
    .select(F.col("_1").alias("text"))\
    .withColumn("id", monotonically_increasing_id())

dataframe.show()

+--------------------+---+
|                text| id|
+--------------------+---+
|The Project Guten...|  0|
|                    |  1|
|This eBook is for...|  2|
|most other parts ...|  3|
|whatsoever. You m...|  4|
|of the Project Gu...|  5|
|www.gutenberg.org...|  6|
|will have to chec...|  7|
|   using this eBook.|  8|
|                    |  9|
| Title: Frankenstein| 10|
|       or, The Mo...| 11|
|                    | 12|
|Author: Mary Woll...| 13|
|                    | 14|
|Release Date: 31,...| 15|
|[Most recently up...| 16|
|                    | 17|
|   Language: English| 18|
|                    | 19|
+--------------------+---+
only showing top 20 rows



In [3]:
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType, StringType
import string
import re

def process_string(data):
    """
    basic preprocessing function:
    - removes punctuation
    - lower
    - split by space
    """
    punct_removed = re.sub(r'[^\w\s]','',data)
    words = punct_removed.lower().split(" ")
    
    
    return list(filter(lambda x: len(x) > 0, words))

# spark udf -- user defined function (~ mapper)

process_string_udf = udf(lambda z: process_string(z), ArrayType(StringType()))

In [4]:
documents = dataframe\
    .select(process_string_udf(F.col("text")).alias("document"))\
    .where(F.size(F.col("document")) > 1)\
    .withColumn("doc_id", monotonically_increasing_id())


documents.show()

+--------------------+------+
|            document|doc_id|
+--------------------+------+
|[the, project, gu...|     0|
|[this, ebook, is,...|     1|
|[most, other, par...|     2|
|[whatsoever, you,...|     3|
|[of, the, project...|     4|
|[wwwgutenbergorg,...|     5|
|[will, have, to, ...|     6|
|[using, this, ebook]|     7|
|[title, frankenst...|     8|
|[or, the, modern,...|     9|
|[author, mary, wo...|    10|
|[release, date, 3...|    11|
|[most, recently, ...|    12|
| [language, english]|    13|
|[character, set, ...|    14|
|[produced, by, ju...|    15|
|[further, correct...|    16|
|[start, of, the, ...|    17|
|[or, the, modern,...|    18|
|[by, mary, wollst...|    19|
+--------------------+------+
only showing top 20 rows



# tf

In [5]:
tokenswithtf = documents.select('doc_id', F.explode(F.col("document")).alias('token'), 'document')\
    .groupBy('document','doc_id','token').count()\
    .withColumnRenamed("count", 'tf')\
    .orderBy('doc_id','token')


tokenswithtf.show()

+--------------------+------+--------------+---+
|            document|doc_id|         token| tf|
+--------------------+------+--------------+---+
|[the, project, gu...|     0|            by|  1|
|[the, project, gu...|     0|         ebook|  1|
|[the, project, gu...|     0|  frankenstein|  1|
|[the, project, gu...|     0|        godwin|  1|
|[the, project, gu...|     0|     gutenberg|  1|
|[the, project, gu...|     0|          mary|  1|
|[the, project, gu...|     0|            of|  1|
|[the, project, gu...|     0|       project|  1|
|[the, project, gu...|     0|       shelley|  1|
|[the, project, gu...|     0|           the|  1|
|[the, project, gu...|     0|wollstonecraft|  1|
|[this, ebook, is,...|     1|           and|  1|
|[this, ebook, is,...|     1|        anyone|  1|
|[this, ebook, is,...|     1|      anywhere|  1|
|[this, ebook, is,...|     1|         ebook|  1|
|[this, ebook, is,...|     1|           for|  1|
|[this, ebook, is,...|     1|            in|  1|
|[this, ebook, is,..

# doc frequency

In [6]:
from pyspark.sql.functions import countDistinct
tokenswithdf = tokenswithtf\
    .groupBy("token")\
    .agg(countDistinct("doc_id").alias ('df'))

In [7]:
tokenswithdf.orderBy(F.col("df").desc()).show()

+-----+----+
|token|  df|
+-----+----+
|  the|3282|
|  and|2702|
|   of|2435|
|    i|2354|
|   to|1896|
|   my|1534|
|    a|1310|
|   in|1126|
| that| 971|
|  was| 948|
|   me| 792|
| with| 694|
|  but| 681|
|  had| 649|
|which| 554|
|  you| 549|
|   he| 545|
|   it| 533|
|  not| 519|
|  for| 505|
+-----+----+
only showing top 20 rows



# idf

In [8]:
import math
from pyspark.sql.functions import log10

print((dataframe.count(), len(dataframe.columns)))

(7743, 2)


In [9]:
tokenswithIDF = tokenswithdf.withColumn('idf', log10(7744/(tokenswithdf.df)))
tokenswithIDF.show()

+-----------+---+------------------+
|      token| df|               idf|
+-----------+---+------------------+
|       some|145|1.7275973420653623|
|      those| 91|1.9299239519792437|
|        art|  7|3.0438673042860804|
|   tortured|  4| 3.286905352972375|
|     waters| 11| 2.847572659142112|
|        few| 62|2.0965736548020835|
|       hope| 49|2.1987692642718235|
|     voyage| 15| 2.712874085244656|
|      still| 65|2.0760519876574817|
|  arguments|  7|3.0438673042860804|
|  destitute|  1|3.8889653443003374|
|    barrier|  4| 3.286905352972375|
|transaction|  1|3.8889653443003374|
|   ignominy|  4| 3.286905352972375|
|     online|  4| 3.286905352972375|
|     travel|  4| 3.286905352972375|
|  connected|  7|3.0438673042860804|
|     doubts|  1|3.8889653443003374|
|     spared|  5|3.1899953399643186|
|    embrace|  1|3.8889653443003374|
+-----------+---+------------------+
only showing top 20 rows



# tf-idf 

In [10]:
tfidf = tokenswithtf\
    .join(tokenswithIDF, on = ("token"), how = "left")\
    .withColumn("tf_idf", F.col("idf") * F.col("tf"))\
    .drop('document', 'doc_id')\

tfidf.show()

+-------------+---+---+------------------+------------------+
|        token| tf| df|               idf|            tf_idf|
+-------------+---+---+------------------+------------------+
| accumulation|  1|  2| 3.587935348636356| 3.587935348636356|
| accumulation|  1|  2| 3.587935348636356| 3.587935348636356|
|apprehensions|  1|  2| 3.587935348636356| 3.587935348636356|
|apprehensions|  1|  2| 3.587935348636356| 3.587935348636356|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|    arguments|  1|  7|3.0438673042860804|3.0438673042860804|
|          art|  1|  7|3.0438673042860804|3.0438673042860804|
|          art|  1|  7|3.0438673042860804|3.0438673042860804|
|       

# rdd

In [11]:
rddText = sc.textFile(f"{filepath}").repartition(1).zipWithIndex().repartition(5)



rddText.take(20)

[(' Letter 2', 40),
 (' Letter 3', 41),
 (' Letter 4', 42),
 (' Chapter 1', 43),
 (' Chapter 2', 44),
 (' Chapter 3', 45),
 (' Chapter 4', 46),
 (' Chapter 5', 47),
 (' Chapter 6', 48),
 (' Chapter 7', 49),
 ('Inspirited by this wind of promise, my daydreams become more fervent', 90),
 ('and vivid. I try in vain to be persuaded that the pole is the seat of', 91),
 ('frost and desolation; it ever presents itself to my imagination as the',
  92),
 ('region of beauty and delight. There, Margaret, the sun is for ever', 93),
 ('visible, its broad disk just skirting the horizon and diffusing a', 94),
 ('perpetual splendour. There—for with your leave, my sister, I will put', 95),
 ('some trust in preceding navigators—there snow and frost are banished;', 96),
 ('and, sailing over a calm sea, we may be wafted to a land surpassing in',
  97),
 ('wonders and in beauty every region hitherto discovered on the habitable',
  98),
 ('globe. Its productions and features may be without example, as the',

In [12]:
rdd2 = rddText.map(lambda line:line[0].lower().split(" "))
abc = rdd2.map(lambda l: [re.sub(r"[^a-zA-Z$]+", " ", x) for x in l])
rdd2 = abc.filter(lambda line:len(line[0])>0).zipWithIndex()
rdd2.take(15)

[(['inspirited',
   'by',
   'this',
   'wind',
   'of',
   'promise ',
   'my',
   'daydreams',
   'become',
   'more',
   'fervent'],
  0),
 (['and',
   'vivid ',
   'i',
   'try',
   'in',
   'vain',
   'to',
   'be',
   'persuaded',
   'that',
   'the',
   'pole',
   'is',
   'the',
   'seat',
   'of'],
  1),
 (['frost',
   'and',
   'desolation ',
   'it',
   'ever',
   'presents',
   'itself',
   'to',
   'my',
   'imagination',
   'as',
   'the'],
  2),
 (['region',
   'of',
   'beauty',
   'and',
   'delight ',
   'there ',
   'margaret ',
   'the',
   'sun',
   'is',
   'for',
   'ever'],
  3),
 (['visible ',
   'its',
   'broad',
   'disk',
   'just',
   'skirting',
   'the',
   'horizon',
   'and',
   'diffusing',
   'a'],
  4),
 (['perpetual',
   'splendour ',
   'there for',
   'with',
   'your',
   'leave ',
   'my',
   'sister ',
   'i',
   'will',
   'put'],
  5),
 (['some',
   'trust',
   'in',
   'preceding',
   'navigators there',
   'snow',
   'and',
   'frost',
   

In [16]:
rdd2.count()

6653

In [17]:
rdd3 = rdd2.map(lambda x: (x[1], x[0])).flatMapValues(lambda x: x).collect()
rdd3

[(0, 'inspirited'),
 (0, 'by'),
 (0, 'this'),
 (0, 'wind'),
 (0, 'of'),
 (0, 'promise '),
 (0, 'my'),
 (0, 'daydreams'),
 (0, 'become'),
 (0, 'more'),
 (0, 'fervent'),
 (1, 'and'),
 (1, 'vivid '),
 (1, 'i'),
 (1, 'try'),
 (1, 'in'),
 (1, 'vain'),
 (1, 'to'),
 (1, 'be'),
 (1, 'persuaded'),
 (1, 'that'),
 (1, 'the'),
 (1, 'pole'),
 (1, 'is'),
 (1, 'the'),
 (1, 'seat'),
 (1, 'of'),
 (2, 'frost'),
 (2, 'and'),
 (2, 'desolation '),
 (2, 'it'),
 (2, 'ever'),
 (2, 'presents'),
 (2, 'itself'),
 (2, 'to'),
 (2, 'my'),
 (2, 'imagination'),
 (2, 'as'),
 (2, 'the'),
 (3, 'region'),
 (3, 'of'),
 (3, 'beauty'),
 (3, 'and'),
 (3, 'delight '),
 (3, 'there '),
 (3, 'margaret '),
 (3, 'the'),
 (3, 'sun'),
 (3, 'is'),
 (3, 'for'),
 (3, 'ever'),
 (4, 'visible '),
 (4, 'its'),
 (4, 'broad'),
 (4, 'disk'),
 (4, 'just'),
 (4, 'skirting'),
 (4, 'the'),
 (4, 'horizon'),
 (4, 'and'),
 (4, 'diffusing'),
 (4, 'a'),
 (5, 'perpetual'),
 (5, 'splendour '),
 (5, 'there for'),
 (5, 'with'),
 (5, 'your'),
 (5, 'leave '

In [18]:
rdd4= sc.parallelize(rdd3)

In [19]:
map1=rdd4.flatMap(lambda x: [((x[0],i),1) for i in x[1].split()])
map1.take(6)

[((0, 'inspirited'), 1),
 ((0, 'by'), 1),
 ((0, 'this'), 1),
 ((0, 'wind'), 1),
 ((0, 'of'), 1),
 ((0, 'promise'), 1)]

In [20]:
reduce=map1.reduceByKey(lambda x,y:x+y)
reduce.take(5)

[((0, 'inspirited'), 1),
 ((0, 'more'), 1),
 ((1, 'and'), 1),
 ((1, 'vain'), 1),
 ((1, 'to'), 1)]

# tf

In [21]:
tf=reduce.map(lambda x: (x[0][1],(x[0][0],x[1])))
tf.take(5)

[('inspirited', (0, 1)),
 ('more', (0, 1)),
 ('and', (1, 1)),
 ('vain', (1, 1)),
 ('to', (1, 1))]

In [22]:
map3=reduce.map(lambda x: (x[0][1],(x[0][0],x[1],1)))
map3.collect()


[('inspirited', (0, 1, 1)),
 ('more', (0, 1, 1)),
 ('and', (1, 1, 1)),
 ('vain', (1, 1, 1)),
 ('to', (1, 1, 1)),
 ('ever', (2, 1, 1)),
 ('perpetual', (5, 1, 1)),
 ('there', (6, 1, 1)),
 ('frost', (6, 1, 1)),
 ('banished', (6, 1, 1)),
 ('surpassing', (7, 1, 1)),
 ('be', (9, 1, 1)),
 ('the', (9, 1, 1)),
 ('six', (11, 1, 1)),
 ('dedicated', (12, 1, 1)),
 ('myself', (12, 1, 1)),
 ('great', (13, 1, 1)),
 ('sea', (14, 1, 1)),
 ('often', (15, 1, 1)),
 ('to', (17, 1, 1)),
 ('and', (17, 1, 1)),
 ('of', (18, 1, 1)),
 ('mrs', (21, 1, 1)),
 ('whale', (25, 1, 1)),
 ('integrity', (30, 1, 1)),
 ('youth', (31, 1, 1)),
 ('is', (32, 1, 1)),
 ('and', (33, 1, 1)),
 ('i', (34, 2, 1)),
 ('you', (37, 1, 1)),
 ('have', (38, 1, 1)),
 ('for', (39, 1, 1)),
 ('poets', (40, 1, 1)),
 ('well', (47, 1, 1)),
 ('and', (49, 1, 1)),
 ('dogs', (50, 1, 1)),
 ('it', (55, 1, 1)),
 ('his', (56, 1, 1)),
 ('attention', (57, 1, 1)),
 ('own', (60, 1, 1)),
 ('a', (61, 1, 1)),
 ('trifling', (65, 1, 1)),
 ('lighted', (65, 1, 1)),
 (

In [23]:
map4=map3.map(lambda x:(x[0],x[1][2]))
map4.collect()

[('inspirited', 1),
 ('more', 1),
 ('and', 1),
 ('vain', 1),
 ('to', 1),
 ('ever', 1),
 ('perpetual', 1),
 ('there', 1),
 ('frost', 1),
 ('banished', 1),
 ('surpassing', 1),
 ('be', 1),
 ('the', 1),
 ('six', 1),
 ('dedicated', 1),
 ('myself', 1),
 ('great', 1),
 ('sea', 1),
 ('often', 1),
 ('to', 1),
 ('and', 1),
 ('of', 1),
 ('mrs', 1),
 ('whale', 1),
 ('integrity', 1),
 ('youth', 1),
 ('is', 1),
 ('and', 1),
 ('i', 1),
 ('you', 1),
 ('have', 1),
 ('for', 1),
 ('poets', 1),
 ('well', 1),
 ('and', 1),
 ('dogs', 1),
 ('it', 1),
 ('his', 1),
 ('attention', 1),
 ('own', 1),
 ('a', 1),
 ('trifling', 1),
 ('lighted', 1),
 ('up', 1),
 ('of', 1),
 ('sweetness', 1),
 ('i', 1),
 ('he', 1),
 ('had', 1),
 ('rawness', 1),
 ('someone', 1),
 ('my', 1),
 ('journal', 1),
 ('what', 1),
 ('occurrence', 1),
 ('except', 1),
 ('yet', 1),
 ('conciliating', 1),
 ('they', 1),
 ('that', 1),
 ('hands', 1),
 ('quivered', 1),
 ('drunk', 1),
 ('reveal', 1),
 ('cup', 1),
 ('failing', 1),
 ('for', 1),
 ('august', 1)

# df

In [24]:
tokenswithdf=map4.reduceByKey(lambda x,y:x+y)
tokenswithdf.collect()

[('yet', 151),
 ('conciliating', 2),
 ('afford', 10),
 ('but', 681),
 ('native', 28),
 ('sixth', 1),
 ('gates', 5),
 ('clerval', 58),
 ('confessed', 5),
 ('sometimes', 57),
 ('boat', 17),
 ('oppressed', 7),
 ('where', 81),
 ('let', 27),
 ('other', 90),
 ('latter', 13),
 ('again', 80),
 ('roarings', 2),
 ('far', 39),
 ('perceived', 26),
 ('continued', 34),
 ('something', 17),
 ('horrible', 15),
 ('endow', 1),
 ('destroying', 4),
 ('deletions', 1),
 ('gutenberg', 85),
 ('work', 72),
 ('enlightened', 3),
 ('imagination', 23),
 ('enveloped', 3),
 ('once', 31),
 ('believe', 33),
 ('senses', 8),
 ('like', 60),
 ('insufficient', 1),
 ('fuel', 2),
 ('conversed', 3),
 ('family', 31),
 ('domes', 2),
 ('mighty', 13),
 ('charge', 14),
 ('devoted', 14),
 ('leaf', 1),
 ('came', 42),
 ('length', 27),
 ('roses', 1),
 ('creature', 44),
 ('singular', 3),
 ('acquainted', 20),
 ('victim', 16),
 ('s', 93),
 ('suggested', 2),
 ('good', 37),
 ('montal', 1),
 ('conception', 6),
 ('miseries', 9),
 ('task', 19)

In [25]:
rdd2.count()

6653

# idf

In [26]:
import math
from pyspark.sql.functions import *
idf=tokenswithdf.map(lambda x: (x[0], math.log10(6654/x[1])))
idf.collect()

[('yet', 1.6441058492396343),
 ('conciliating', 3.5220528008688223),
 ('afford', 2.8230827965328036),
 ('but', 0.9899356846200186),
 ('native', 2.3759247651905846),
 ('sixth', 3.8230827965328036),
 ('gates', 3.124112792196785),
 ('clerval', 2.0596548029698662),
 ('confessed', 3.124112792196785),
 ('sometimes', 2.067207940860312),
 ('boat', 2.59263387515453),
 ('oppressed', 2.9779847565185467),
 ('where', 1.914597777654154),
 ('let', 2.3917190323738167),
 ('other', 1.8688402870934788),
 ('latter', 2.709139444225967),
 ('again', 1.91999280954086),
 ('roarings', 3.5220528008688223),
 ('far', 2.2320181895063045),
 ('perceived', 2.408109448561986),
 ('continued', 2.2916038794905487),
 ('something', 2.59263387515453),
 ('horrible', 2.6469915374771222),
 ('endow', 3.8230827965328036),
 ('destroying', 3.2210228052048415),
 ('deletions', 3.8230827965328036),
 ('gutenberg', 1.893663870818511),
 ('work', 1.9657503001015353),
 ('enlightened', 3.3459615418131414),
 ('imagination', 2.461354960515211

In [27]:
rdd_fin=tf.join(idf)
rdd_fin.collect()

[('conciliating', ((74, 1), 3.5220528008688223)),
 ('conciliating', ((4585, 1), 3.5220528008688223)),
 ('native', ((234, 1), 2.3759247651905846)),
 ('native', ((826, 1), 2.3759247651905846)),
 ('native', ((4186, 1), 2.3759247651905846)),
 ('native', ((4330, 1), 2.3759247651905846)),
 ('native', ((4739, 1), 2.3759247651905846)),
 ('native', ((4849, 1), 2.3759247651905846)),
 ('native', ((5649, 1), 2.3759247651905846)),
 ('native', ((6545, 1), 2.3759247651905846)),
 ('native', ((3710, 1), 2.3759247651905846)),
 ('native', ((3726, 1), 2.3759247651905846)),
 ('native', ((359, 1), 2.3759247651905846)),
 ('native', ((2983, 1), 2.3759247651905846)),
 ('native', ((1004, 1), 2.3759247651905846)),
 ('native', ((3276, 1), 2.3759247651905846)),
 ('native', ((866, 1), 2.3759247651905846)),
 ('native', ((1154, 1), 2.3759247651905846)),
 ('native', ((971, 1), 2.3759247651905846)),
 ('native', ((3995, 1), 2.3759247651905846)),
 ('native', ((5355, 1), 2.3759247651905846)),
 ('native', ((128, 1), 2.3759

# tf-idf

In [28]:
rdd=rdd_fin.map(lambda x: (x[1][0][0],(x[0],x[1][0][1],x[1][1],x[1][0][1]*x[1][1]))).sortByKey()
rdd.collect()

[(0, ('by', 1, 1.1436548999206848, 1.1436548999206848)),
 (0, ('wind', 1, 2.3917190323738167, 2.3917190323738167)),
 (0, ('this', 1, 1.1806182762906823, 1.1806182762906823)),
 (0, ('become', 1, 2.304568856654916, 2.304568856654916)),
 (0, ('my', 1, 0.6366915808373105, 0.6366915808373105)),
 (0, ('more', 1, 1.618962813876879, 1.618962813876879)),
 (0, ('daydreams', 1, 3.5220528008688223, 3.5220528008688223)),
 (0, ('of', 1, 0.4390880070910708, 0.4390880070910708)),
 (0, ('fervent', 1, 3.5220528008688223, 3.5220528008688223)),
 (0, ('promise', 1, 2.4806601157105974, 2.4806601157105974)),
 (0, ('inspirited', 1, 3.3459615418131414, 3.3459615418131414)),
 (1, ('vivid', 1, 3.3459615418131414, 3.3459615418131414)),
 (1, ('persuaded', 1, 2.9779847565185467, 2.9779847565185467)),
 (1, ('i', 1, 0.4499868094540767, 0.4499868094540767)),
 (1, ('that', 1, 0.8349699562644518, 0.8349699562644518)),
 (1, ('seat', 1, 3.124112792196785, 3.124112792196785)),
 (1, ('vain', 1, 2.709139444225967, 2.70913944

In [29]:
rdd=rdd.map(lambda x: (x[0],x[1][0],x[1][1],x[1][2],x[1][3]))
rdd.toDF(["DocumentId","Token","TF","IDF","TF-IDF"]).show()

+----------+----------+---+------------------+------------------+
|DocumentId|     Token| TF|               IDF|            TF-IDF|
+----------+----------+---+------------------+------------------+
|         0|        by|  1|1.1436548999206848|1.1436548999206848|
|         0|      wind|  1|2.3917190323738167|2.3917190323738167|
|         0|      this|  1|1.1806182762906823|1.1806182762906823|
|         0|    become|  1| 2.304568856654916| 2.304568856654916|
|         0|        my|  1|0.6366915808373105|0.6366915808373105|
|         0|      more|  1| 1.618962813876879| 1.618962813876879|
|         0| daydreams|  1|3.5220528008688223|3.5220528008688223|
|         0|        of|  1|0.4390880070910708|0.4390880070910708|
|         0|   fervent|  1|3.5220528008688223|3.5220528008688223|
|         0|   promise|  1|2.4806601157105974|2.4806601157105974|
|         0|inspirited|  1|3.3459615418131414|3.3459615418131414|
|         1|     vivid|  1|3.3459615418131414|3.3459615418131414|
|         