In [1]:
from pyspark.sql import SparkSession

In [2]:
from dotenv import load_dotenv
import os
load_dotenv()
key_filepath = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")


In [3]:
spark = SparkSession.builder \
    .master("local[8]") \
    .appName("US Accidents") \
    .config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
    .config("spark.hadoop.google.cloud.auth.service.account.json.keyfile",key_filepath  ) \
    .config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
    .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


24/04/30 22:59:58 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [35]:
spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY")

In [4]:

# Read Parquet files from GCS
parquet_files = [
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2016/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2017/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2018/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2019/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2020/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2021/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2022/933c14c388864f19a17c514e311a69b1-0.parquet",
                "gs://us-accidents-bucket/us_accidents_data/Start_Year=2023/933c14c388864f19a17c514e311a69b1-0.parquet"
                ]

df_list = [spark.read.option("header", "true").option("inferSchema", "true").parquet(file) for file in parquet_files]

merged_df = df_list[0]
for df in df_list[1:]:
    merged_df = merged_df.unionAll(df)



                                                                                

In [5]:
merged_df.show()

24/04/30 23:00:18 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


[Stage 8:>                                                          (0 + 1) / 1]

+----+-------+--------+-------------------+-------------------+-----------------+------------------+------------+--------------------+--------------------+------------+----------+-----+-------+--------------+-------------+-----------+------------+--------------+--------------+---------------+-----------------+--------+-------+-------+---------------+--------------+--------------+
|  ID| Source|Severity|         Start_Time|           End_Time|        Start_Lat|         Start_Lng|Distance(mi)|         Description|              Street|        City|    County|State|Country|Temperature(F)|Wind_Chill(F)|Humidity(%)|Pressure(in)|Visibility(mi)|Wind_Direction|Wind_Speed(mph)|Weather_Condition|Crossing|Railway|Station|Traffic_Calming|Traffic_Signal|Sunrise_Sunset|
+----+-------+--------+-------------------+-------------------+-----------------+------------------+------------+--------------------+--------------------+------------+----------+-----+-------+--------------+-------------+-----------+

                                                                                

# Cleaning the data

### Steps:
> Removing unimportnant columns

> Renaming the columns

> Convert the values of the columns into easy handled values

In [6]:
# Iterate through each column in the DataFrame
#for col in merged_df.columns:
    # Get the unique values for the current column
    #unique_values = merged_df.select(col).distinct().collect()
    
    # Print the column name and its unique values
    #print(f"Column: {col}, has {len(unique_values)} distinct values")
    # for val in unique_values:
    #     print(val[col])
    # print()

In [7]:
# List of column names to drop
columns_to_drop = ['Country']

# Drop the unimportant columns
cleaned_df = merged_df.drop(*columns_to_drop)


In [8]:
from pyspark.sql.functions import to_timestamp

# Convert "Start_Time" and "End_Time" columns to datetime format
cleaned_df = cleaned_df.withColumn("Start_Time", to_timestamp(cleaned_df["Start_Time"], "yyyy-MM-dd HH:mm:ss"))
cleaned_df = cleaned_df.withColumn("End_Time", to_timestamp(cleaned_df["End_Time"], "yyyy-MM-dd HH:mm:ss"))


In [9]:
from pyspark.sql.functions import col

# Convert "Distance(mi)" column from miles to meters
cleaned_df = cleaned_df.withColumn("Distance(m)", col("Distance(mi)") * 1609.34)

# Drop the original "Distance(mi)" column if needed
cleaned_df = cleaned_df.drop("Distance(mi)")


In [37]:
from pyspark.sql.types import IntegerType

# Convert "Severity" column from string to integer
cleaned_df = cleaned_df.withColumn("Severity", cleaned_df["Severity"].cast(IntegerType()))


In [38]:
# TODO: If the rows that have at least one null value remove them, and if they are large number 
# replace the null with the average of its column
cleaned_df = cleaned_df.dropna()

In [39]:
cleaned_df.count()

                                                                                

5656839

## We have dropped about 2 millions records because of nulls :)

In [40]:
# Create a new column called Duration = End_Time - Start_Time
# To indicate the duration of the accident
from pyspark.sql.functions import col, unix_timestamp

# Convert "Start_Time" and "End_Time" columns to Unix timestamp (seconds since the epoch)
cleaned_df = cleaned_df.withColumn("Start_Time_unix", unix_timestamp("Start_Time"))
cleaned_df = cleaned_df.withColumn("End_Time_unix", unix_timestamp("End_Time"))

# Calculate the duration (in seconds)
cleaned_df = cleaned_df.withColumn("Duration", col("End_Time_unix") - col("Start_Time_unix"))


In [41]:
cleaned_df.show()

[Stage 19:>                                                         (0 + 1) / 1]

+----+-------+--------+-------------------+-------------------+---------+------------------+--------------------+--------------------+------------+----------+-----+--------------+-------------+-----------+------------+--------------+--------------+---------------+-----------------+--------+-------+-------+---------------+--------------+--------------+-----------+---------------+-------------+--------+
|  ID| Source|Severity|         Start_Time|           End_Time|Start_Lat|         Start_Lng|         Description|              Street|        City|    County|State|Temperature(F)|Wind_Chill(F)|Humidity(%)|Pressure(in)|Visibility(mi)|Wind_Direction|Wind_Speed(mph)|Weather_Condition|Crossing|Railway|Station|Traffic_Calming|Traffic_Signal|Sunrise_Sunset|Distance(m)|Start_Time_unix|End_Time_unix|Duration|
+----+-------+--------+-------------------+-------------------+---------+------------------+--------------------+--------------------+------------+----------+-----+--------------+-----------

                                                                                

In [42]:
cleaned_df.columns

['ID',
 'Source',
 'Severity',
 'Start_Time',
 'End_Time',
 'Start_Lat',
 'Start_Lng',
 'Description',
 'Street',
 'City',
 'County',
 'State',
 'Temperature(F)',
 'Wind_Chill(F)',
 'Humidity(%)',
 'Pressure(in)',
 'Visibility(mi)',
 'Wind_Direction',
 'Wind_Speed(mph)',
 'Weather_Condition',
 'Crossing',
 'Railway',
 'Station',
 'Traffic_Calming',
 'Traffic_Signal',
 'Sunrise_Sunset',
 'Distance(m)',
 'Start_Time_unix',
 'End_Time_unix',
 'Duration']

# Predictive Analytics

1. [classification]➔ Predicting the severity of an accident based on the
factors involved.

2. [Regression]➔ Predicting accident Duration as indicator of impact on
traffic flow.

## Classification

We will discard some columns that will not affect directly in predicting the severity

- ID
- Description

Also the severity will be dropped from features and set as the target variable 

In [43]:
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import LogisticRegression

In [44]:
dropped_columns = ["Description", "ID"]

classification_df = cleaned_df.drop(*dropped_columns)

In [45]:
from pyspark.ml.feature import VectorAssembler, OneHotEncoder

# Define the columns to be included in the feature vector
input_columns = ['Source', 'Start_Time', 'End_Time', 'Street', 'City', 'County', 'State', 'Wind_Direction', 'Weather_Condition', 'Sunrise_Sunset']

# Apply one-hot encoding to string index columns
encoders = [OneHotEncoder(inputCol=column+"_index", outputCol=column+"_encoded") for column in input_columns if classification_df.schema[column].dataType == "string"]

# Assemble the remaining columns (numeric columns) into a vector
numeric_columns = [column for column in classification_df.columns if column not in input_columns]
assembler = VectorAssembler(inputCols=numeric_columns, outputCol="numeric_features")

# Combine all features into a single vector
final_assembler = VectorAssembler(inputCols=[column+"_encoded" for column in input_columns if classification_df.schema[column].dataType == "string"] + ["numeric_features"], outputCol="features")

# Pipeline stages
stages =  encoders + [assembler, final_assembler]

# Create a pipeline
from pyspark.ml import Pipeline
pipeline = Pipeline(stages=stages)

# Fit the pipeline to the data
pipeline_model = pipeline.fit(classification_df)

# Transform the data
output = pipeline_model.transform(classification_df)


In [49]:
# Show the features column
finalised_data = output.select("Severity","features")
finalised_data.show()

[Stage 23:>                                                         (0 + 1) / 1]

+--------+--------------------+
|Severity|            features|
+--------+--------------------+
|       2|[2.0,39.063148,-8...|
|       3|[3.0,39.747753,-8...|
|       2|[2.0,39.627781,-8...|
|       3|[3.0,40.10059,-82...|
|       2|[2.0,39.758274,-8...|
|       3|[3.0,39.770382,-8...|
|       3|[3.0,40.10059,-82...|
|       3|[3.0,39.952812,-8...|
|       3|[3.0,39.932709,-8...|
|       2|[2.0,39.79076,-84...|
|       2|[2.0,39.972038,-8...|
|       2|[2.0,39.752174,-8...|
|       2|[2.0,39.740669,-8...|
|       2|[2.0,39.790703,-8...|
|       2|[2.0,40.052509,-8...|
|       2|[2.0,39.773346,-8...|
|       2|[2.0,39.628288,-8...|
|       3|[3.0,40.023487,-8...|
|       2|[2.0,39.761379,-8...|
|       2|[2.0,40.158024,-8...|
+--------+--------------------+
only showing top 20 rows



                                                                                

In [50]:
train, test = finalised_data.randomSplit([0.7, 0.3])

In [56]:
lr = LogisticRegression(labelCol="Severity")
lr_model = lr.fit(train)

# Model evaluation
predictions = lr_model.transform(test)
predictions.show()



24/04/30 23:58:30 WARN MemoryStore: Not enough space to cache rdd_812_44 in memory! (computed 17.0 MiB so far)
24/04/30 23:58:31 WARN BlockManager: Persisting block rdd_812_44 to disk instead.




24/04/30 23:58:31 WARN MemoryStore: Not enough space to cache rdd_812_42 in memory! (computed 17.0 MiB so far)
24/04/30 23:58:31 WARN BlockManager: Persisting block rdd_812_42 to disk instead.




24/04/30 23:58:48 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)
24/04/30 23:58:48 WARN BlockManager: Persisting block rdd_812_49 to disk instead.
24/04/30 23:58:48 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)




24/04/30 23:58:49 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 17.0 MiB so far)
24/04/30 23:58:49 WARN BlockManager: Persisting block rdd_812_54 to disk instead.
24/04/30 23:58:49 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 17.0 MiB so far)


                                                                                

24/04/30 23:58:53 WARN MemoryStore: Not enough space to cache rdd_812_19 in memory! (computed 2.6 MiB so far)
24/04/30 23:58:53 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:53 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:58:53 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_16 in memory.
24/04/30 23:58:53 WARN MemoryStore: Not enough space to cache rdd_812_16 in memory! (computed 384.0 B so far)
24/04/30 23:58:53 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 9.7 MiB so far)
24/04/30 23:58:53 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:58:53 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:58:53 WARN MemoryStore: Failed to

                                                                                

24/04/30 23:58:54 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:54 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:58:54 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:58:54 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:58:54 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:58:54 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:58:54 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:54 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:58:54 WARN MemoryStore: Failed

                                                                                

24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:55 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:58:55 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:58:55 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:58:55 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:55

                                                                                

24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:55 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)
24/04/30 23:58:56 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_20 in memory.
24/04/30 23:58:56 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:58:56 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:56 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:58:56 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 384.0 B so far)
24/04/30 23:58:56 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:58:5

                                                                                

24/04/30 23:58:57 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:57 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:58:57 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:58:57 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:58:57 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:58:57 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:58:57 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:57 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:58:57 WARN MemoryStore: Failed

                                                                                

24/04/30 23:58:58 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:58:58 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:58:58 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:58 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:58 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:58:58 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:58:58 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:58:58 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:58 WARN MemoryStore: Not

                                                                                

24/04/30 23:58:59 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:58:59 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 1024.6 KiB so far)
24/04/30 23:58:59 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:58:59 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:58:59 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:58:59 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:58:59 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:58:59 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:58:59 WARN MemoryStore: Failed



24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)


                                                                                

24/04/30 23:59:00 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:00 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:00 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:00 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:00 WARN MemoryStore: Fai

                                                                                

24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:01 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:01 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:01 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:01 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:01

                                                                                

24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:01 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_28 in memory! (computed 384.0 B so far)
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:02

                                                                                

24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:02 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:02 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:02 WARN MemoryStore: Failed

                                                                                

24/04/30 23:59:03 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:03 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:03 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:03 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:03 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:03 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:03 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:03 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB 



24/04/30 23:59:03 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:03 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)


                                                                                

24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:04 WARN MemoryStore: Fai

                                                                                

24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:04 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:04

                                                                                

24/04/30 23:59:05 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:05 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:05 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:05 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:05 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:05 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:05 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:05 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:05 WARN MemoryStore: Failed

                                                                                

24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:06 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:06 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:06 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:06 WARN MemoryStore: Not en



24/04/30 23:59:06 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)


                                                                                

24/04/30 23:59:07 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:07 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:07 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:07 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:07

                                                                                

24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:07 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:07 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:08 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:08 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:08 WARN MemoryStore: Failed

                                                                                

24/04/30 23:59:08 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:08 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:08 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:08 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in 

                                                                                

24/04/30 23:59:09 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:09 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:09 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:09 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:09 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:09 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:09 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:09 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:09

                                                                                

24/04/30 23:59:10 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)
24/04/30 23:59:10 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:10 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:10 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:10 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:10 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:10 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_20 in memory.
24/04/30 23:59:10 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:1

                                                                                

24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:11 WARN MemoryStore: Not en

                                                                                

24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:11 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:11 WA

                                                                                

24/04/30 23:59:12 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:12 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:12 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:12 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:12 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:12 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:12 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:12 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:12 WARN MemoryStore: Failed



24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)
24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)


                                                                                

24/04/30 23:59:13 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:13 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:13 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:13 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_20 in memory.
24/04/30 23:59:13 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 384.0 B so far)
24/04/30 23:59:13 WA

                                                                                

24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Failed

                                                                                

24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:14 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_20 in memory.
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 384.0 B so far)
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:14 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 13.2 MiB so far)
24/04/30 23:59:14 WARN MemoryStore: Faile

                                                                                

24/04/30 23:59:15 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:15 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:15 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:15 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:15 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:15 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:15 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:15 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in mem

                                                                                

24/04/30 23:59:16 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:16 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:16 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:16 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:16 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:16 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:16 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:16 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:16

                                                                                

24/04/30 23:59:17 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:17 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:17 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:17 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_20 in memory.
24/04/30 23:59:17 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:17 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:17 WARN MemoryStore: Not enough space to cache rdd_812_26 in memory! (computed 384.0 B so far)
24/04/30 23:59:17 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 384.0 B so far)
24/04/30 23:59:17 WA

                                                                                

24/04/30 23:59:18 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:18 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:18 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:18 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_25 in memory.
24/04/30 23:59:18 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:18 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:18 WARN MemoryStore: Not enough space to cache rdd_812_25 in memory! (computed 384.0 B so far)
24/04/30 23:59:18 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:18



24/04/30 23:59:18 WARN MemoryStore: Not enough space to cache rdd_812_54 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_49 in memory! (computed 17.0 MiB so far)


                                                                                

24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 3.7 MiB so far)
24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_21 in memory.
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 384.0 B so far)
24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_27 in memory! (computed 384.0 B so far)
24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_28 in memory.
24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_26 in memory.
24/04/30 23:59:19

                                                                                

24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_23 in memory.
24/04/30 23:59:19 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_24 in memory.
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_23 in memory! (computed 384.0 B so far)
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_21 in memory! (computed 3.5 MiB so far)
24/04/30 23:59:19 WARN MemoryStore: Not enough space to cache rdd_812_20 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:20 WARN MemoryStore: Not enough space to cache rdd_812_24 in memory! (computed 384.0 B so far)
24/04/30 23:59:20 WARN MemoryStore: Not enough space to cache rdd_812_28 in memory! (computed 1024.6 KiB so far)
24/04/30 23:59:20 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_812_27 in memory.
24/04/30 23:59:20 WARN MemoryStore: Fai

[Stage 182:>                                                        (0 + 1) / 1]

+--------+--------------------+--------------------+--------------------+----------+
|Severity|            features|       rawPrediction|         probability|prediction|
+--------+--------------------+--------------------+--------------------+----------+
|       1|[1.0,40.018669,-8...|[-10.067093438968...|[2.79305112923801...|       1.0|
|       1|[1.0,41.395805,-8...|[-10.067056006610...|[7.23698203409434...|       1.0|
|       2|[2.0,33.293147999...|[-10.067154738848...|[4.13890213494106...|       2.0|
|       2|[2.0,33.625645,-1...|[-10.067293628552...|[7.96058156083586...|       2.0|
|       2|[2.0,33.729725,-1...|[-10.067274645071...|[1.24733813811372...|       2.0|
|       2|[2.0,33.736885,-1...|[-10.067205355280...|[9.14869983834938...|       2.0|
|       2|[2.0,33.738293,-1...|[-10.067271319605...|[1.52789055326399...|       2.0|
|       2|[2.0,33.738293,-1...|[-10.067273478523...|[1.33199129685503...|       2.0|
|       2|[2.0,33.743271,-1...|[-10.067223127613...|[8.8415779759

                                                                                

In [57]:
# Statistical summary of predictions
predictions.describe().show()



+-------+-------------------+-------------------+
|summary|           Severity|         prediction|
+-------+-------------------+-------------------+
|  count|            1696469|            1696469|
|   mean| 2.1614123217105647| 2.1614123217105647|
| stddev|0.45605204821128736|0.45605204821128736|
|    min|                  1|                1.0|
|    max|                  4|                4.0|
+-------+-------------------+-------------------+



                                                                                

In [58]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Define the evaluator
evaluator = MulticlassClassificationEvaluator(labelCol="Severity", predictionCol="prediction", metricName="accuracy")

# Calculate accuracy
accuracy = evaluator.evaluate(predictions)
print("Accuracy:", accuracy)

# Other metrics: precision, recall, F1-score
precision = evaluator.evaluate(predictions, {evaluator.metricName: "weightedPrecision"})
recall = evaluator.evaluate(predictions, {evaluator.metricName: "weightedRecall"})
f1_score = evaluator.evaluate(predictions, {evaluator.metricName: "f1"})
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1_score)

# Area under the ROC curve (AUC)
from pyspark.ml.evaluation import BinaryClassificationEvaluator

binary_evaluator = BinaryClassificationEvaluator(labelCol="Severity", rawPredictionCol="prediction")
auc = binary_evaluator.evaluate(predictions)
print("Area under the ROC curve (AUC):", auc)


                                                                                

Accuracy: 1.0


                                                                                

Precision: 1.0
Recall: 1.0
F1-score: 1.0




Area under the ROC curve (AUC): 1.0


                                                                                