# 05 - Feature Engineering

CrÃ©ation des features pour le Machine Learning (Silver â†’ Silver_ML).

## Configuration

In [1]:
from pyspark.sql.functions import col, lag, avg, stddev, row_number, when, sqrt, pow, lit, min as spark_min, broadcast
from pyspark.sql.window import Window
from config import get_s3_path, create_spark_session

SILVER_PATH = get_s3_path("silver", "flights")
SILVER_ML_PATH = get_s3_path("silver", "flights_ml")
AIRPORTS_CSV = "./data/airports.csv"

spark = create_spark_session("FeatureEngineering")

print(f"âœ… Input:  {SILVER_PATH}")
print(f"âœ… Output: {SILVER_ML_PATH}")

âœ… Configuration chargÃ©e depuis .env
:: loading settings :: url = jar:file:/opt/conda/lib/python3.12/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/jovyan/.ivy2/cache
The jars for the packages stored in: /home/jovyan/.ivy2/jars
org.apache.hadoop#hadoop-aws added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
org.apache.spark#spark-hadoop-cloud_2.12 added as a dependency
io.delta#delta-spark_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-cb51a625-9040-4372-9cd6-2f77d02498ab;1.0
	confs: [default]
	found org.apache.hadoop#hadoop-aws;3.3.4 in central
	found com.amazonaws#aws-java-sdk-bundle;1.12.262 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
	found org.apache.spark#spark-hadoop-cloud_2.12;3.5.3 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.4 in central
	found org.apache.hadoop#hadoop-client-api;3.3.4 in central
	found org.xerial.snappy#snappy-java;1.1.10.5 in central
	found org.slf4j#slf4j-api;2.0.7 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.c

âœ… Spark Session 'FeatureEngineering' configurÃ©e
âœ… Input:  s3a://datalake/silver/flights
âœ… Output: s3a://datalake/silver/flights_ml


## Lecture et nettoyage

In [2]:
df = spark.read.format("delta").load(SILVER_PATH)

df_clean = df \
    .filter(col("icao24").isNotNull()) \
    .filter(col("altitude_meters").between(-500, 15000)) \
    .filter(col("velocity_kmh").between(0, 1200))

print(f"ðŸ“Š {df_clean.count():,} lignes aprÃ¨s nettoyage")

26/01/23 15:41:53 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
26/01/23 15:42:02 ERROR NonFateSharingFuture: Failed to get result from future  
scala.runtime.NonLocalReturnControl
26/01/23 15:42:03 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.

ðŸ“Š 569,971 lignes aprÃ¨s nettoyage


                                                                                

## Features temporelles (Window Functions)

In [3]:
window_aircraft = Window.partitionBy("icao24").orderBy("event_timestamp")

df_temporal = df_clean \
    .withColumn("prev_altitude", lag("altitude_meters", 1).over(window_aircraft)) \
    .withColumn("prev_velocity", lag("velocity_kmh", 1).over(window_aircraft)) \
    .withColumn("altitude_change", col("altitude_meters") - col("prev_altitude")) \
    .withColumn("velocity_change", col("velocity_kmh") - col("prev_velocity")) \
    .withColumn("observation_rank", row_number().over(window_aircraft))

print("âœ… Features temporelles crÃ©Ã©es")

âœ… Features temporelles crÃ©Ã©es


## Jointure avec les aÃ©roports

In [4]:
df_airports = spark.read.option("header", "true").csv(AIRPORTS_CSV).select(
    col("ident").alias("airport_icao"),
    col("name").alias("airport_name"),
    col("iso_country").alias("airport_country"),
    col("latitude_deg").cast("double").alias("airport_lat"),
    col("longitude_deg").cast("double").alias("airport_lon")
).filter(col("type").isin("large_airport", "medium_airport"))

df_on_ground = df_temporal.filter(col("on_ground") == True)
df_in_flight = df_temporal.filter(col("on_ground") == False)

df_with_airports = df_on_ground.crossJoin(broadcast(df_airports)).withColumn(
    "dist", sqrt(pow(col("latitude") - col("airport_lat"), 2) + pow(col("longitude") - col("airport_lon"), 2))
)

w = Window.partitionBy("icao24", "event_timestamp")
df_closest = df_with_airports.withColumn("min_dist", spark_min("dist").over(w)) \
    .filter(col("dist") == col("min_dist")) \
    .drop("dist", "min_dist", "airport_lat", "airport_lon")

df_enriched = df_closest.unionByName(
    df_in_flight.withColumn("airport_icao", lit(None))
                .withColumn("airport_name", lit(None))
                .withColumn("airport_country", lit(None)),
    allowMissingColumns=True
)

print("âœ… Jointure aÃ©roports rÃ©alisÃ©e")
print("\nðŸ“Š Top 10 des aÃ©roports avec le plus d'avions au sol (dÃ©collage/atterrissage) :")
df_closest.groupBy("airport_icao", "airport_name", "airport_country") \
    .count() \
    .orderBy("count", ascending=False) \
    .show(10, truncate=False)

âœ… Jointure aÃ©roports rÃ©alisÃ©e

ðŸ“Š Top 10 des aÃ©roports avec le plus d'avions au sol (dÃ©collage/atterrissage) :




+------------+-------------------------------------------------+---------------+-----+
|airport_icao|airport_name                                     |airport_country|count|
+------------+-------------------------------------------------+---------------+-----+
|CYYZ        |Toronto Pearson International Airport            |CA             |603  |
|LSZH        |ZÃ¼rich Airport                                   |CH             |389  |
|VECC        |Netaji Subhash Chandra Bose International Airport|IN             |138  |
|EDDF        |Frankfurt Airport                                |DE             |63   |
|EDDS        |Stuttgart Airport                                |DE             |58   |
|EYVI        |Vilnius International Airport                    |LT             |29   |
|MGGT        |La Aurora International Airport                  |GT             |26   |
|EPRZ        |RzeszÃ³w-Jasionka Airport                         |PL             |24   |
|OTHH        |Hamad International Airport

                                                                                

## Features rolling window

In [5]:
rolling_window = Window.partitionBy("icao24").orderBy("event_timestamp").rowsBetween(-5, 0)

df_rolling = df_enriched \
    .withColumn("rolling_avg_altitude", avg("altitude_meters").over(rolling_window)) \
    .withColumn("rolling_std_altitude", stddev("altitude_meters").over(rolling_window)) \
    .withColumn("rolling_avg_velocity", avg("velocity_kmh").over(rolling_window))

print("âœ… Features rolling crÃ©Ã©es")
print("\nðŸ“Š AperÃ§u des features rolling (5 premiers avions) :")
df_rolling.select(
    "icao24", "event_timestamp", "altitude_meters", 
    "rolling_avg_altitude", "rolling_std_altitude", "rolling_avg_velocity"
).show(10, truncate=False)

âœ… Features rolling crÃ©Ã©es

ðŸ“Š AperÃ§u des features rolling (5 premiers avions) :


[Stage 34:>                                                         (0 + 1) / 1]

+------+-------------------+---------------+--------------------+--------------------+--------------------+
|icao24|event_timestamp    |altitude_meters|rolling_avg_altitude|rolling_std_altitude|rolling_avg_velocity|
+------+-------------------+---------------+--------------------+--------------------+--------------------+
|008a8d|2026-01-23 14:54:04|2247.9         |2247.89990234375    |NULL                |179.35              |
|008a8d|2026-01-23 14:54:24|2255.52        |2251.7099609375     |5.388236536717413   |179.28              |
|008a8d|2026-01-23 14:54:35|2255.52        |2252.97998046875    |4.399476709459619   |179.5666666666667   |
|008a8d|2026-01-23 14:54:55|2255.52        |2253.614990234375   |3.81005859375       |180.2775            |
|008a8d|2026-01-23 14:55:07|2255.52        |2253.99599609375    |3.407820005552902   |180.25              |
|008a8d|2026-01-23 14:55:26|2255.52        |2254.25             |3.1108998149311824  |180.61              |
|008a8d|2026-01-23 14:55:47|

                                                                                

## Label flight_phase

In [7]:
df_ml = df_rolling.withColumn(
    "flight_phase",
    when(col("on_ground") == True, "GROUND")
    .when((col("altitude_change") > 50) & (col("altitude_meters") < 3000), "TAKEOFF")
    .when(col("altitude_change") > 20, "CLIMB")
    .when(col("altitude_change").between(-20, 20) & (col("altitude_meters") > 8000), "CRUISE")
    .when(col("altitude_change") < -20, "DESCENT")
    .otherwise("TRANSITION")
)

print("ðŸ“Š Distribution :")
df_ml.groupBy("flight_phase").count().orderBy("count", ascending=False).show()

ðŸ“Š Distribution :


26/01/23 15:47:17 ERROR NonFateSharingFuture: Failed to get result from future  
scala.runtime.NonLocalReturnControl

+------------+------+
|flight_phase| count|
+------------+------+
|      CRUISE|297038|
|  TRANSITION|175330|
|     DESCENT|124610|
|       CLIMB| 83066|
|     TAKEOFF| 23894|
|      GROUND|  1709|
+------------+------+



                                                                                

## Sauvegarde

In [8]:
df_ml.write.format("delta").mode("overwrite").save(SILVER_ML_PATH)

print(f"âœ… {df_ml.count():,} lignes sauvegardÃ©es")

26/01/23 15:50:40 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
26/01/23 15:51:25 ERROR NonFateSharingFuture: Failed to get result from future  
scala.runtime.NonLocalReturnControl
                                                                                

Py4JJavaError: An error occurred while calling o249.save.
: io.delta.exceptions.ConcurrentAppendException: Files were added to the root of the table by a concurrent update. Please try the operation again.
Conflicting commit: {"timestamp":1769183332701,"operation":"WRITE","operationParameters":{"mode":Append,"partitionBy":[]},"readVersion":46,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"10","numOutputRows":"18069","numOutputBytes":"839292"},"engineInfo":"Apache-Spark/3.5.3 Delta-Lake/3.0.0","txnId":"80f77428-dad0-43db-b394-0102db3580d0"}
Refer to https://docs.delta.io/latest/concurrency-control.html for more details.
	at org.apache.spark.sql.delta.DeltaErrorsBase.concurrentAppendException(DeltaErrors.scala:2293)
	at org.apache.spark.sql.delta.DeltaErrorsBase.concurrentAppendException$(DeltaErrors.scala:2284)
	at org.apache.spark.sql.delta.DeltaErrors$.concurrentAppendException(DeltaErrors.scala:3039)
	at org.apache.spark.sql.delta.ConflictChecker.$anonfun$checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn$1(ConflictChecker.scala:291)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.sql.delta.ConflictChecker.recordTime(ConflictChecker.scala:485)
	at org.apache.spark.sql.delta.ConflictChecker.checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn(ConflictChecker.scala:262)
	at org.apache.spark.sql.delta.ConflictChecker.checkConflicts(ConflictChecker.scala:140)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.checkForConflictsAgainstVersion(OptimisticTransaction.scala:1784)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.checkForConflictsAgainstVersion$(OptimisticTransaction.scala:1774)
	at org.apache.spark.sql.delta.OptimisticTransaction.checkForConflictsAgainstVersion(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$checkForConflicts$4(OptimisticTransaction.scala:1763)
	at scala.runtime.java8.JFunction1$mcVJ$sp.apply(JFunction1$mcVJ$sp.java:23)
	at scala.collection.immutable.NumericRange.foreach(NumericRange.scala:75)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$checkForConflicts$1(OptimisticTransaction.scala:1759)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:140)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:138)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordFrameProfile(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:133)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:128)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:117)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:132)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:122)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:112)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.checkForConflicts(OptimisticTransaction.scala:1738)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.checkForConflicts$(OptimisticTransaction.scala:1730)
	at org.apache.spark.sql.delta.OptimisticTransaction.checkForConflicts(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$4(OptimisticTransaction.scala:1571)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:140)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:138)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordFrameProfile(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:133)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:128)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:117)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:132)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:122)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:112)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$3(OptimisticTransaction.scala:1569)
	at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:158)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$2(OptimisticTransaction.scala:1565)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:140)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:138)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordFrameProfile(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:133)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:128)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:117)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:132)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:122)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:112)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$1(OptimisticTransaction.scala:1565)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.lockCommitIfEnabled(OptimisticTransaction.scala:1543)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively(OptimisticTransaction.scala:1559)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively$(OptimisticTransaction.scala:1555)
	at org.apache.spark.sql.delta.OptimisticTransaction.doCommitRetryIteratively(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.liftedTree1$1(OptimisticTransaction.scala:1064)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$commitImpl$1(OptimisticTransaction.scala:992)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:140)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:138)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordFrameProfile(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:133)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:128)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:117)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:132)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:122)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:112)
	at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.commitImpl(OptimisticTransaction.scala:989)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.commitImpl$(OptimisticTransaction.scala:984)
	at org.apache.spark.sql.delta.OptimisticTransaction.commitImpl(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.commitIfNeeded(OptimisticTransaction.scala:946)
	at org.apache.spark.sql.delta.OptimisticTransactionImpl.commitIfNeeded$(OptimisticTransaction.scala:942)
	at org.apache.spark.sql.delta.OptimisticTransaction.commitIfNeeded(OptimisticTransaction.scala:141)
	at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:106)
	at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1$adapted(WriteIntoDelta.scala:96)
	at org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:240)
	at org.apache.spark.sql.delta.commands.WriteIntoDelta.run(WriteIntoDelta.scala:96)
	at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:200)
	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:307)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:569)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:840)
