# Unit 3: INSERT INTO COW tables

In this unit, we will learn INSERT operations into COW tables.<br>


This unit takes about 5 minutes to complete.

In [1]:
from pyspark.sql.functions import lit
from functools import reduce
from pyspark.sql.types import LongType
import pyspark.sql.functions as F
from datetime import datetime

### Initialize Spark Session

In [2]:
spark = SparkSession.builder \
  .appName("Hudi-Learning-Unit-05-PySpark") \
  .master("yarn")\
  .enableHiveSupport()\
  .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.hudi.catalog.HoodieCatalog") \
  .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") \
  .getOrCreate()

23/07/30 02:12:35 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [3]:
spark

### Declare & define variables

In [4]:
PROJECT_ID_OUTPUT=!gcloud config get-value core/project
PROJECT_ID=PROJECT_ID_OUTPUT[0]

PROJECT_NBR_OUTPUT=!gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
PROJECT_NBR=PROJECT_NBR_OUTPUT[0]

print(f"Project ID is {PROJECT_ID}")
print(f"Project Number is {PROJECT_NBR}")

PERSIST_TO_BUCKET = f"gs://gaia_data_bucket-{PROJECT_NBR}"
HUDI_COW_BASE_GCS_URI = f"{PERSIST_TO_BUCKET}/nyc-taxi-trips-hudi-cow"
DATABASE_NAME = "taxi_db"
COW_TABLE_NAME = "nyc_taxi_trips_hudi_cow"
TRIP_DATE="2020-01-30"


Project ID is apache-hudi-lab
Project Number is 623600433888


## 1. [HUDI INSERT FEATURE] Insert into CoW table

### 1.1. Determine trip ID to clone

In [5]:
ORIGINAL_TRIP_ID=spark.sql(f"select trip_id  from {DATABASE_NAME}.{COW_TABLE_NAME} WHERE trip_date=\"{TRIP_DATE}\" LIMIT 1").collect()[0][0]
print(f"ID of the trip cloned: {ORIGINAL_TRIP_ID}")

ivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR,/etc/hive/conf.dist/ivysettings.xml will be used
23/07/30 02:12:40 WARN GhfsStorageStatistics: Detected potential high latency for operation op_open. latencyMs=125; previousMaxLatencyMs=0; operationCount=1; context=gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/.hoodie/hoodie.properties
[Stage 3:>                                                          (0 + 1) / 1]

ID of the trip cloned: 60129542187


                                                                                

### 1.2. Review the Hudi metadata

In [6]:
!gsutil cat $HUDI_COW_BASE_GCS_URI/.hoodie/hoodie.properties

#Properties saved on 2023-07-29T06:08:44.414701Z
#Sat Jul 29 06:08:44 UTC 2023
hoodie.compaction.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
hoodie.table.type=MERGE_ON_READ
hoodie.table.metadata.partitions=files
hoodie.table.precombine.field=pickup_datetime
hoodie.table.partition.fields=trip_date
hoodie.archivelog.folder=archived
hoodie.table.create.schema={"type"\:"record","name"\:"topLevelRecord","fields"\:[{"name"\:"_hoodie_commit_time","type"\:["string","null"]},{"name"\:"_hoodie_commit_seqno","type"\:["string","null"]},{"name"\:"_hoodie_record_key","type"\:["string","null"]},{"name"\:"_hoodie_partition_path","type"\:["string","null"]},{"name"\:"_hoodie_file_name","type"\:["string","null"]},{"name"\:"taxi_type","type"\:["string","null"]},{"name"\:"trip_year","type"\:["int","null"]},{"name"\:"trip_month","type"\:["int","null"]},{"name"\:"trip_day","type"\:["int","null"]},{"name"\:"trip_hour","type"\:["int","null"]},{"name"\:"trip_minute","type"\:["int",

### 1.3. Study the data

#### a) Layout and size

In [7]:
# File system layout, files, types & counts by types, byte sizes
!gsutil ls -alh $HUDI_COW_BASE_GCS_URI/trip_date=$TRIP_DATE/

     373 B  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/.hoodie_partition_metadata.parquet#1690682384882374  metageneration=1
  4.28 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/227545de-46ed-488d-b47a-0ee72d38243e-0_228-38-12650_20230729055218169.parquet#1690682384910365  metageneration=1
  4.27 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/350cf38c-df1d-4b7a-b388-b544ee406a5f-0_229-38-12659_20230729055218169.parquet#1690682384887569  metageneration=1
805.84 KiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-931f9e04c368-0_230-38-12661_20230729055218169.parquet#1690682384933498  metageneration=1
TOTAL: 4 objects, 9788217 bytes (9.33 MiB)


#### b) Record count

In [8]:
print(f"Trip Date: {TRIP_DATE}")
spark.sql(f"SELECT count(*) as trip_count from {DATABASE_NAME}.{COW_TABLE_NAME} where trip_date=\"{TRIP_DATE}\"").show()

Trip Date: 2020-01-30




+----------+
|trip_count|
+----------+
|    257927|
+----------+



                                                                                

### 1.4. Create a record / taxi trip that we will use for our insert trial
We'll grab a record and change the hour of pickup and dropoff to be 5 hours later:<br>

#### 1.4.1. Generate a new trip ID to use for the record

In [9]:
NEW_TRIP_ID=spark.sql(f"select max(trip_id) as max_trip_id from {DATABASE_NAME}.{COW_TABLE_NAME} WHERE trip_date=\"{TRIP_DATE}\"").collect()[0][0] + 1
print(f"New trip ID is: {NEW_TRIP_ID}")



New trip ID is: 309237772005


                                                                                

#### 1.4.2. Identify a record to use that we will morph and insert

In [10]:
# This query returns exactly one record
candidateTripDFCow=spark.sql(f"SELECT * FROM {DATABASE_NAME}.{COW_TABLE_NAME} WHERE trip_date=\"{TRIP_DATE}\" AND trip_id={ORIGINAL_TRIP_ID}")

In [11]:
candidateTripDFCow.show(truncate=False)

23/07/30 02:13:15 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+-------------------+-----------------------+------------------+----------------------+-----------------------------------------------------------------------------+---------+---------+----------+--------+---------+-----------+---------+-------------------+-------------------+-----------------+---------+------------------+-------------------+---------------+-------------+------------+-----------+-----------+-----------+------------+---------------------+------------+-----------------+--------------------+---------+---------+--------------+------------------------+--------------------+-----------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno   |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                                                            |taxi_type|trip_year|trip_month|trip_day|trip_hour|trip_minute|vendor_id|pickup_datetime    |dropoff_datetime   |store_and_forward|rate_code|pickup_location_id|dropoff_location_id|passenger_count|trip_distance|fare_amount

In [12]:
candidateTripDFCow.printSchema

<bound method DataFrame.printSchema of DataFrame[_hoodie_commit_time: string, _hoodie_commit_seqno: string, _hoodie_record_key: string, _hoodie_partition_path: string, _hoodie_file_name: string, taxi_type: string, trip_year: int, trip_month: int, trip_day: int, trip_hour: int, trip_minute: int, vendor_id: string, pickup_datetime: timestamp, dropoff_datetime: timestamp, store_and_forward: string, rate_code: string, pickup_location_id: string, dropoff_location_id: string, passenger_count: bigint, trip_distance: decimal(38,9), fare_amount: decimal(38,9), surcharge: decimal(38,9), mta_tax: decimal(38,9), tip_amount: decimal(38,9), tolls_amount: decimal(38,9), improvement_surcharge: decimal(10,0), total_amount: decimal(38,9), payment_type_code: string, congestion_surcharge: decimal(10,0), trip_type: string, ehail_fee: decimal(10,0), partition_date: date, distance_between_service: decimal(38,9), time_between_service: bigint, trip_id: bigint, trip_date: string]>

In [13]:
insertTripDFCow = candidateTripDFCow.withColumn('pickup_datetime', candidateTripDFCow.pickup_datetime + F.expr('INTERVAL 5 HOURS')) \
                                    .withColumn('dropoff_datetime', candidateTripDFCow.dropoff_datetime + F.expr('INTERVAL 5 HOURS')) \
                                    .withColumn('trip_hour', candidateTripDFCow.trip_hour + 5) \
                                    .withColumn('trip_id', lit(NEW_TRIP_ID)) \
                                    .drop("_hoodie_commit_time") \
                                    .drop("_hoodie_commit_seqno") \
                                    .drop("_hoodie_record_key") \
                                    .drop("_hoodie_partition_path") \
                                    .drop("_hoodie_file_name")

In [14]:
insertTripDFCow.show(truncate=False)



+---------+---------+----------+--------+---------+-----------+---------+-------------------+-------------------+-----------------+---------+------------------+-------------------+---------------+-------------+------------+-----------+-----------+-----------+------------+---------------------+------------+-----------------+--------------------+---------+---------+--------------+------------------------+--------------------+------------+----------+
|taxi_type|trip_year|trip_month|trip_day|trip_hour|trip_minute|vendor_id|pickup_datetime    |dropoff_datetime   |store_and_forward|rate_code|pickup_location_id|dropoff_location_id|passenger_count|trip_distance|fare_amount |surcharge  |mta_tax    |tip_amount |tolls_amount|improvement_surcharge|total_amount|payment_type_code|congestion_surcharge|trip_type|ehail_fee|partition_date|distance_between_service|time_between_service|trip_id     |trip_date |
+---------+---------+----------+--------+---------+-----------+---------+-------------------+---

                                                                                

In [15]:
# Original record
spark.sql(f"SELECT trip_id,taxi_type,vendor_id,pickup_datetime,dropoff_datetime,pickup_location_id,dropoff_location_id,trip_date " \
          f" FROM {DATABASE_NAME}.{COW_TABLE_NAME} "\
          f" WHERE trip_date=\"{TRIP_DATE}\" AND trip_id={ORIGINAL_TRIP_ID}") \
        .show(truncate=False)

+-----------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+
|trip_id    |taxi_type|vendor_id|pickup_datetime    |dropoff_datetime   |pickup_location_id|dropoff_location_id|trip_date |
+-----------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+
|60129542187|yellow   |2        |2020-01-30 14:58:56|2020-01-30 15:08:10|260               |138                |2020-01-30|
+-----------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+



In [16]:
# The record we want to insert - note its pickup_datetime and dropoff_datetime are different
insertTripDFCow.select("trip_id","taxi_type","vendor_id","pickup_datetime","dropoff_datetime","pickup_location_id","dropoff_location_id","trip_date") \
               .show(truncate=False)

+------------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+
|trip_id     |taxi_type|vendor_id|pickup_datetime    |dropoff_datetime   |pickup_location_id|dropoff_location_id|trip_date |
+------------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+
|309237772005|yellow   |2        |2020-01-30 19:58:56|2020-01-30 20:08:10|260               |138                |2020-01-30|
+------------+---------+---------+-------------------+-------------------+------------------+-------------------+----------+



In [17]:
# The full record we will insert
insertTripDFCow.show(truncate=False)

                                                                                

+---------+---------+----------+--------+---------+-----------+---------+-------------------+-------------------+-----------------+---------+------------------+-------------------+---------------+-------------+------------+-----------+-----------+-----------+------------+---------------------+------------+-----------------+--------------------+---------+---------+--------------+------------------------+--------------------+------------+----------+
|taxi_type|trip_year|trip_month|trip_day|trip_hour|trip_minute|vendor_id|pickup_datetime    |dropoff_datetime   |store_and_forward|rate_code|pickup_location_id|dropoff_location_id|passenger_count|trip_distance|fare_amount |surcharge  |mta_tax    |tip_amount |tolls_amount|improvement_surcharge|total_amount|payment_type_code|congestion_surcharge|trip_type|ehail_fee|partition_date|distance_between_service|time_between_service|trip_id     |trip_date |
+---------+---------+----------+--------+---------+-----------+---------+-------------------+---

### 1.4. Insert the record

In [18]:
hudi_options = {
            'hoodie.database.name': DATABASE_NAME,
            'hoodie.table.name': COW_TABLE_NAME,
            'hoodie.datasource.write.table.name': COW_TABLE_NAME,
            'hoodie.datasource.write.table.type': 'COPY_ON_WRITE',
            'hoodie.datasource.write.recordkey.field': 'trip_id',
            'hoodie.datasource.write.partitionpath.field': 'trip_date',
            'hoodie.datasource.write.precombine.field': 'pickup_datetime',
            'hoodie.datasource.write.hive_style_partitioning': 'true',
            'hoodie.partition.metafile.use.base.format': 'true', 
            'hoodie.datasource.write.drop.partition.columns': 'true',
            'hoodie.datasource.write.operation': 'insert'   
        }

In [19]:
# Insert in append mode to the base path
insertTripDFCow.write.format("hudi"). \
                options(**hudi_options). \
                mode("append"). \
                save(HUDI_COW_BASE_GCS_URI)

23/07/30 02:13:28 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=147; previousMaxLatencyMs=0; operationCount=1; context=gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/.hoodie/20230730021326494.deltacommit.requested
23/07/30 02:13:34 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=143; previousMaxLatencyMs=101; operationCount=3; context=gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/.hoodie/20230730021326494.deltacommit.inflight
23/07/30 02:13:43 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=153; previousMaxLatencyMs=0; operationCount=1; context=gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/.hoodie/metadata/.hoodie/.temp/20230730021326494
23/07/30 02:13:45 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=219; previousMaxLate

In [20]:
# Refresh Hive Metsatore Metadata
spark.sql(f"REFRESH TABLE {DATABASE_NAME}.{COW_TABLE_NAME};").show(truncate=False)

++
||
++
++



In [21]:
# Run a count to ensure that the record count increased
# It should be one more than before (count before insert is captured further up in this notebook)
spark.sql(f"SELECT COUNT(*) as trip_count FROM {DATABASE_NAME}.{COW_TABLE_NAME} WHERE trip_date=\"{TRIP_DATE}\"").show(truncate=False)

                                                                                

+----------+
|trip_count|
+----------+
|257928    |
+----------+



In [22]:
# We started off a certain number of parquets, because its CoW and small data, we should see just another file
!gsutil ls -alh $HUDI_COW_BASE_GCS_URI/trip_date=$TRIP_DATE

     373 B  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/.hoodie_partition_metadata.parquet#1690682384882374  metageneration=1
  4.28 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/227545de-46ed-488d-b47a-0ee72d38243e-0_228-38-12650_20230729055218169.parquet#1690682384910365  metageneration=1
  4.27 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/350cf38c-df1d-4b7a-b388-b544ee406a5f-0_229-38-12659_20230729055218169.parquet#1690682384887569  metageneration=1
802.47 KiB  2023-07-30T02:13:36Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-931f9e04c368-0_0-24-2902_20230730021326494.parquet#1690683216785315  metageneration=1
805.84 KiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-

In [23]:
# Lets ensure the original record is still there
spark.sql(f"SELECT trip_id,taxi_type,trip_year,trip_month,trip_day,vendor_id,pickup_datetime,dropoff_datetime,pickup_location_id," \
          "dropoff_location_id,trip_date "\
          f"FROM {DATABASE_NAME}.{COW_TABLE_NAME} " \
          f" WHERE trip_date=\"{TRIP_DATE}\" AND trip_id={ORIGINAL_TRIP_ID}") \
         .show(truncate=False)


+-----------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+----------+
|trip_id    |taxi_type|trip_year|trip_month|trip_day|vendor_id|pickup_datetime    |dropoff_datetime   |pickup_location_id|dropoff_location_id|trip_date |
+-----------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+----------+
|60129542187|yellow   |2020     |1         |30      |2        |2020-01-30 14:58:56|2020-01-30 15:08:10|260               |138                |2020-01-30|
+-----------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+----------+



In [24]:
# New record inserted
spark.sql(f"SELECT _hoodie_commit_time,_hoodie_commit_seqno,_hoodie_file_name,taxi_type,trip_year,trip_month,trip_day,vendor_id,pickup_datetime,dropoff_datetime," \
          "pickup_location_id,dropoff_location_id,trip_date "\
          f"FROM {DATABASE_NAME}.{COW_TABLE_NAME} "\
          f"WHERE trip_date=\"{TRIP_DATE}\" AND trip_id={NEW_TRIP_ID}") \
        .show(truncate=False)

+-------------------+-------------------------+--------------------------------------------------------------------------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno     |_hoodie_file_name                                                         |taxi_type|trip_year|trip_month|trip_day|vendor_id|pickup_datetime    |dropoff_datetime   |pickup_location_id|dropoff_location_id|trip_date |
+-------------------+-------------------------+--------------------------------------------------------------------------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+----------+
|20230730021326494  |20230730021326494_0_12141|36802c74-35eb-4664-aa56-931f9e04c368-0_0-24-2902_20230730021326494.parquet|yellow   |2020     |1         |30      |2        |2020-01-30 19:58:56|2020-01-30 20:08:10

The record is in file listed under _hoodie_file_name 

### 2. [HUDI DEDUPE FEATURE] Hudi dedupes on inserts based on record key and using precombine field for latest record
Lets insert the new record yet again and observe what happens

In [25]:
insertTripDFCow.write.format("hudi"). \
                options(**hudi_options). \
                mode("append"). \
                save(HUDI_COW_BASE_GCS_URI)

                                                                                

In [26]:
# Refresh Hive Metsatore Metadata
spark.sql(f"REFRESH TABLE {DATABASE_NAME}.{COW_TABLE_NAME};").show(truncate=False)

++
||
++
++



In [27]:
# Run a count to ensure that the record count increased
# Original count was captured further above after an insert - we should not see a chance in the count 
spark.sql(f"SELECT COUNT(*) as trip_count FROM {DATABASE_NAME}.{COW_TABLE_NAME} WHERE trip_date=\"{TRIP_DATE}\"").show(truncate=False)

23/07/30 02:14:16 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=110; previousMaxLatencyMs=70; operationCount=4959; context=gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-931f9e04c368-0_0-56-5807_20230730021359744.parquet
                                                                                

+----------+
|trip_count|
+----------+
|257928    |
+----------+



In [28]:
# We should see an additional parquet files, because Hudi deduped and persisted, but eliminated the dupe
!gsutil ls -alh $HUDI_COW_BASE_GCS_URI/trip_date=$TRIP_DATE

     373 B  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/.hoodie_partition_metadata.parquet#1690682384882374  metageneration=1
  4.28 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/227545de-46ed-488d-b47a-0ee72d38243e-0_228-38-12650_20230729055218169.parquet#1690682384910365  metageneration=1
  4.27 MiB  2023-07-30T01:59:44Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/350cf38c-df1d-4b7a-b388-b544ee406a5f-0_229-38-12659_20230729055218169.parquet#1690682384887569  metageneration=1
802.47 KiB  2023-07-30T02:13:36Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-931f9e04c368-0_0-24-2902_20230730021326494.parquet#1690683216785315  metageneration=1
802.47 KiB  2023-07-30T02:14:07Z  gs://gaia_data_bucket-623600433888/nyc-taxi-trips-hudi-cow/trip_date=2020-01-30/36802c74-35eb-4664-aa56-

In [29]:
# Lets query the record we attempted to insert a second time
spark.sql(f"SELECT _hoodie_commit_time,_hoodie_commit_seqno,_hoodie_file_name,taxi_type,trip_year,trip_month,trip_day,vendor_id,pickup_datetime,dropoff_datetime," \
          "pickup_location_id,dropoff_location_id "\
          f"FROM {DATABASE_NAME}.{COW_TABLE_NAME} "\
          f"WHERE trip_date=\"{TRIP_DATE}\" AND trip_id={NEW_TRIP_ID}") \
        .show(truncate=False)


+-------------------+-------------------------+--------------------------------------------------------------------------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+
|_hoodie_commit_time|_hoodie_commit_seqno     |_hoodie_file_name                                                         |taxi_type|trip_year|trip_month|trip_day|vendor_id|pickup_datetime    |dropoff_datetime   |pickup_location_id|dropoff_location_id|
+-------------------+-------------------------+--------------------------------------------------------------------------+---------+---------+----------+--------+---------+-------------------+-------------------+------------------+-------------------+
|20230730021359744  |20230730021359744_0_12141|36802c74-35eb-4664-aa56-931f9e04c368-0_0-56-5807_20230730021359744.parquet|yellow   |2020     |1         |30      |2        |2020-01-30 19:58:56|2020-01-30 20:08:10|260               |138          

Note that a new file got created, and our record is  within it. (The old file containing the record is still there and useful for time travel)

In [30]:
%%javascript
Jupyter.notebook.session.delete();

<IPython.core.display.Javascript object>