In [1]:
import findspark
findspark.init('/home/ductien/spark-3.3.2-bin-hadoop3')
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import *
import mysql.connector

In [2]:
#Create SparkSession
spark = SparkSession.builder \
                    .config("spark.jars", "./mysql-connector-j-8.0.32.jar")\
                    .appName("MySQL_loader").getOrCreate()

23/04/28 11:27:26 WARN Utils: Your hostname, DT-Kubuntu resolves to a loopback address: 127.0.1.1; using 192.168.207.69 instead (on interface wlp3s0)
23/04/28 11:27:26 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
23/04/28 11:27:27 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [3]:
mark_2017_df = spark.read.csv("./CleanedDatasets/Mark2017", header= True, inferSchema= True)

                                                                                

In [4]:
mark_18_22_df = spark.read.csv("./CleanedDatasets/Mark_2018_2022", header= True, inferSchema= True)

                                                                                

In [5]:
benchmark_df = spark.read.csv("./CleanedDatasets/cleaned_uni_mark", header= True, inferSchema= True)

In [4]:
from pyspark.sql.functions import col

<h1> Spark dataframe to MySQL database

In [6]:
import os
from dotenv import load_dotenv
load_dotenv(".env")
dbName = os.getenv("DB_NAME")
dbUser = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")

<h3> Load to province table

In [7]:
mark_2017_df.createTempView("PROVINCE")
provinceDF = spark.sql("SELECT DISTINCT province_code, province FROM PROVINCE")

In [8]:
provinceDF.printSchema()

root
 |-- province_code: integer (nullable = true)
 |-- province: string (nullable = true)



In [9]:
provinceDF = provinceDF.sort("province_code")

In [10]:
provinceDF.count()

                                                                                

61

In [11]:
from pyspark.sql.functions import col

In [12]:
newRow = spark.createDataFrame([(39,'phuyen')], provinceDF.columns)
provinceDF = provinceDF.union(newRow)

In [13]:
newRow = spark.createDataFrame([(45,'ninhthuan')], provinceDF.columns)
provinceDF = provinceDF.union(newRow)

In [14]:
provinceDF = provinceDF.sort('province_code')

In [15]:
provinceDF = provinceDF.withColumnRenamed('province', 'ProvinceName') \
                        .withColumnRenamed('province_code', 'provinceCode')

In [16]:
provinceDF.printSchema()

root
 |-- provinceCode: long (nullable = true)
 |-- ProvinceName: string (nullable = true)



In [17]:
provinceDF.count()

                                                                                

63

In [44]:
provinceDF.write.format('jdbc').options(
    url = 'jdbc:mysql://localhost:3306/equivalent_score',
    driver = 'com.mysql.jdbc.Driver',
    dbtable = "DimProvince",
    user = dbUser,
    password = password
).mode('append').save()

<h3> Load to Year table

In [40]:
from pyspark.sql import Row

In [41]:
yearDF = spark.createDataFrame([
    Row(year = 2018),
    Row(year = 2019),
    Row(year = 2020),
    Row(year = 2021),
    Row(year = 2022)
])

In [42]:
yearDF.show()

+----+
|year|
+----+
|2018|
|2019|
|2020|
|2021|
|2022|
+----+



In [45]:
yearDF.write.format('jdbc').options(
    url = 'jdbc:mysql://localhost:3306/equivalent_score',
    driver = 'com.mysql.jdbc.Driver',
    dbtable = "DimYear",
    user = dbUser,
    password = password
).mode('append').save()

<h2> Load to DimUniverity

In [18]:
benchmark_df.createTempView('UNI')

In [20]:
benchmark_df.printSchema()

root
 |-- uni_code: string (nullable = true)
 |-- uni_name: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- major_code: string (nullable = true)
 |-- major_name: string (nullable = true)
 |-- benchmark: double (nullable = true)
 |-- subject_group: string (nullable = true)



In [46]:
uniDF = spark.sql("SELECT uni_code AS UniCode, uni_name AS UniName, \
                   major_code AS MajorCode, major_name AS MajorName, \
                   subject_group AS SubjectGroup, benchmark AS BenchMark, year AS Year FROM UNI")

In [48]:
from pyspark.sql.functions import count, when, isnan, isnull

In [49]:
uniDF.select([count(when(isnan(c) | isnull(c), c)).alias(c) for c in uniDF.columns]).show()

+-------+-------+---------+---------+------------+---------+----+
|UniCode|UniName|MajorCode|MajorName|SubjectGroup|BenchMark|Year|
+-------+-------+---------+---------+------------+---------+----+
|      0|      0|       37|        0|           0|        0|   0|
+-------+-------+---------+---------+------------+---------+----+



                                                                                

In [50]:
uniDF = uniDF.dropna(subset= uniDF.columns)

In [51]:
uniDF.select([count(when(isnan(c) | isnull(c), c)).alias(c) for c in uniDF.columns]).show()

+-------+-------+---------+---------+------------+---------+----+
|UniCode|UniName|MajorCode|MajorName|SubjectGroup|BenchMark|Year|
+-------+-------+---------+---------+------------+---------+----+
|      0|      0|        0|        0|           0|        0|   0|
+-------+-------+---------+---------+------------+---------+----+



In [58]:
from pyspark.sql.functions import max

In [60]:
uniDF = uniDF.filter(col('BenchMark') <= 32.75)

In [67]:
spark.sql('SELECT max(length(major_code)) FROM UNI').show()

+-----------------------+
|max(length(major_code))|
+-----------------------+
|                     25|
+-----------------------+



                                                                                

In [69]:
uniDF.count()

34734

In [68]:
uniDF.write.format('jdbc').options(
    url = 'jdbc:mysql://localhost:3306/equivalent_score',
    driver = 'com.mysql.jdbc.Driver',
    dbtable = "University",
    user = dbUser,
    password = password
).mode('append').save()

                                                                                

<h1> Load to FactScore

In [70]:
mark_18_22_df.createTempView('student1822')

In [71]:
mark_18_22_df.printSchema()

root
 |-- student_id: integer (nullable = true)
 |-- mathematics: double (nullable = true)
 |-- literature: double (nullable = true)
 |-- english: double (nullable = true)
 |-- physics: double (nullable = true)
 |-- chemistry: double (nullable = true)
 |-- biology: double (nullable = true)
 |-- history: double (nullable = true)
 |-- geography: double (nullable = true)
 |-- civic_education: double (nullable = true)
 |-- Year: integer (nullable = true)
 |-- province_code: integer (nullable = true)
 |-- combined_natural_sciences: double (nullable = true)
 |-- combined_social_sciences: double (nullable = true)



In [72]:
student1822 = spark.sql("SELECT student_id as studentID, literature, mathematics as math, \
                         english, physics, chemistry, biology, history, geography, \
                         civic_education as civil, province_code as ProvinceCode, Year as year FROM student1822")

In [73]:
student1822 = student1822.distinct()

In [74]:
student1822.select('ProvinceCode').distinct().count()

                                                                                

63

In [8]:
distinctProv = spark.sql("SELECT DISTINCT province_code FROM student1822")

In [None]:
distinctProv.count()

In [75]:
student1822.count()

                                                                                

3815783

In [76]:
student1822.select([count(when(isnan(c) | isnull(c), c)).alias(c) for c in student1822.columns]).show()



+---------+----------+----+-------+-------+---------+-------+-------+---------+-------+------------+----+
|studentID|literature|math|english|physics|chemistry|biology|history|geography|  civil|ProvinceCode|year|
+---------+----------+----+-------+-------+---------+-------+-------+---------+-------+------------+----+
|        0|         0|   0|      0|2327577|  2327932|2329854|1450340|  1452611|1459756|           0|   0|
+---------+----------+----+-------+-------+---------+-------+-------+---------+-------+------------+----+



                                                                                

In [79]:
student1822.select(max(col('literature'))).show()



+---------------+
|max(literature)|
+---------------+
|           10.0|
+---------------+



                                                                                

In [82]:
student1822.write.format('jdbc').options(
    url = 'jdbc:mysql://localhost:3306/equivalent_score',
    driver = 'com.mysql.jdbc.Driver',
    dbtable = "FactScore",
    user = dbUser,
    password = password
).mode('append').save()

[Stage 166:>                                                      (0 + 16) / 17]

23/04/28 12:31:06 ERROR Executor: Exception in task 9.0 in stage 166.0 (TID 573)
java.sql.BatchUpdateException: Error writing file '/tmp/MLfd=70' (OS errno 28 - No space left on device)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
	at com.mysql.cj.util.Util.handleNewInstance(Util.java:192)
	at com.mysql.cj.util.Util.getInstance(Util.java:167)
	at com.mysql.cj.util.Util.getInstance(Util.java:174)
	at com.mysql.cj.jdbc.exceptions.SQLError.createBatchUpdateException(SQLError.java:224)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchSerially(ClientPreparedStatement.java:816)
	at com.mysql.cj.jdbc.ClientPreparedStatement.

[Stage 166:>                                                      (0 + 11) / 17]

Py4JJavaError: An error occurred while calling o611.save.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 9 in stage 166.0 failed 1 times, most recent failure: Lost task 9.0 in stage 166.0 (TID 573) (192.168.207.69 executor driver): java.sql.BatchUpdateException: Error writing file '/tmp/MLfd=70' (OS errno 28 - No space left on device)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
	at com.mysql.cj.util.Util.handleNewInstance(Util.java:192)
	at com.mysql.cj.util.Util.getInstance(Util.java:167)
	at com.mysql.cj.util.Util.getInstance(Util.java:174)
	at com.mysql.cj.jdbc.exceptions.SQLError.createBatchUpdateException(SQLError.java:224)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchSerially(ClientPreparedStatement.java:816)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchInternal(ClientPreparedStatement.java:418)
	at com.mysql.cj.jdbc.StatementImpl.executeBatch(StatementImpl.java:795)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:708)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$saveTable$1(JdbcUtils.scala:868)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$saveTable$1$adapted(JdbcUtils.scala:867)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1011)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1011)
	at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2278)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.sql.SQLException: Error writing file '/tmp/MLfd=70' (OS errno 28 - No space left on device)
	at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:129)
	at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:916)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1061)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchSerially(ClientPreparedStatement.java:795)
	... 16 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2238)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2259)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2278)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2303)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$1(RDD.scala:1011)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
	at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:1009)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.saveTable(JdbcUtils.scala:867)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:70)
	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:116)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:363)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:247)
	at jdk.internal.reflect.GeneratedMethodAccessor188.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.sql.BatchUpdateException: Error writing file '/tmp/MLfd=70' (OS errno 28 - No space left on device)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
	at com.mysql.cj.util.Util.handleNewInstance(Util.java:192)
	at com.mysql.cj.util.Util.getInstance(Util.java:167)
	at com.mysql.cj.util.Util.getInstance(Util.java:174)
	at com.mysql.cj.jdbc.exceptions.SQLError.createBatchUpdateException(SQLError.java:224)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchSerially(ClientPreparedStatement.java:816)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchInternal(ClientPreparedStatement.java:418)
	at com.mysql.cj.jdbc.StatementImpl.executeBatch(StatementImpl.java:795)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:708)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$saveTable$1(JdbcUtils.scala:868)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$saveTable$1$adapted(JdbcUtils.scala:867)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1011)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1011)
	at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2278)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	... 1 more
Caused by: java.sql.SQLException: Error writing file '/tmp/MLfd=70' (OS errno 28 - No space left on device)
	at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:129)
	at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:916)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1061)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeBatchSerially(ClientPreparedStatement.java:795)
	... 16 more


[Stage 166:>                                                       (0 + 3) / 17]

23/04/28 12:31:07 WARN TaskSetManager: Lost task 6.0 in stage 166.0 (TID 570) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 8.0 in stage 166.0 (TID 572) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 1.0 in stage 166.0 (TID 565) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 3.0 in stage 166.0 (TID 567) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 4.0 in stage 166.0 (TID 568) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 7.0 in stage 166.0 (TID 571) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12:31:07 WARN TaskSetManager: Lost task 11.0 in stage 166.0 (TID 575) (192.168.207.69 executor driver): TaskKilled (Stage cancelled)
23/04/28 12: