In [1]:
from pyspark.sql import SparkSession

In [2]:
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [3]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

builder = SparkSession.builder \
    .appName("jsondata") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [4]:
#Same Operations on a Larger Schema
path='C:/Users/acer/Downloads/2024-7-20-15-26-1.json'
df=spark.read.format("json").load(path)
df.show()

+--------------------+--------------------+--------------------+--------------------+
|                Body|     EnqueuedTimeUtc|          Properties|    SystemProperties|
+--------------------+--------------------+--------------------+--------------------+
|{mn=TVgwMDAx:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAx:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAx:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAx:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAy:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAy:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAy:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAy:sn=M...|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{{"scope":"device...|
|{mn=TVgwMDAz:sn=M...|2024-08-20T09:56:...|{rp.mib/dev

In [5]:
from pyspark.sql.types import *

schema = StructType([
    StructField("EnqueuedTimeUtc", StringType(), True),
    StructField("Properties", StructType([
        StructField("appTopic", StringType(), True),
        StructField("relatedGroupId", StringType(), True)
    ]), True),
    StructField("SystemProperties", StructType([
        StructField("connectionDeviceId", StringType(), True),
        StructField("connectionAuthMethod", StringType(), True),
        StructField("connectionDeviceGenerationId", StringType(), True),
        StructField("contentType", StringType(), True),
        StructField("contentEncoding", StringType(), True),
        StructField("enqueuedTime", StringType(), True)
    ]), True),
    StructField("Body", StructType([
        StructField("type", StringType(), True),
        StructField("mnsn", StringType(), True),
        StructField("timestamp", LongType(), True),
        StructField("scheduleName", StringType(), True),
        StructField("smsProperties", StructType([
            StructField("interface", StructType([
                StructField("ethernetList", ArrayType(StructType([
                    StructField("id", StringType(), True),
                    StructField("type", StringType(), True),
                    StructField("address", StringType(), True)
                ])), True),
                StructField("ipList", ArrayType(StructType([
                    StructField("address", StringType(), True),
                    StructField("defaultRoute", StringType(), True),
                    StructField("subnetMask", StringType(), True),
                    StructField("ethernetId", StringType(), True)
                ])), True)
            ]), True),
            StructField("device", StructType([
                StructField("modelName", StringType(), True),
                StructField("address", StringType(), True),
                StructField("maintenanceCodeList", ArrayType(StringType()), True),
                StructField("serialId", StringType(), True),
                StructField("familyName", StringType(), True),
                StructField("location", StructType([
                    StructField("address", StringType(), True)
                ]), True),
                StructField("serviceCodeList", ArrayType(StringType()), True),
                StructField("statusSet", StructType([
                    StructField("standby", StringType(), True),
                    StructField("paperJam", StringType(), True),
                    StructField("stackerNotInstalled", StringType(), True),
                    StructField("paperLow", StringType(), True),
                    StructField("paperEmpty", StringType(), True),
                    StructField("offline", StringType(), True),
                    StructField("specifiedOutputTrayMissing", StringType(), True),
                    StructField("nearOverduePreventMaintenance", StringType(), True),
                    StructField("overduePreventMaintenance", StringType(), True),
                    StructField("warmUp", StringType(), True),
                    StructField("coverOpen", StringType(), True),
                    StructField("outputTrayFull", StringType(), True),
                    StructField("tonerLow", StringType(), True),
                    StructField("allOutputTrayFull", StringType(), True),
                    StructField("printerWarning", StringType(), True),
                    StructField("outputTrayNearFull", StringType(), True),
                    StructField("printing", StringType(), True),
                    StructField("inputTrayMissing", StringType(), True),
                    StructField("printerError", StringType(), True),
                    StructField("accountLimit", StringType(), True),
                    StructField("specifiedInputTrayEmpty", StringType(), True),
                    StructField("outputTrayMissing", StringType(), True),
                    StructField("online", StringType(), True),
                    StructField("specifiedInputTrayMissing", StringType(), True),
                    StructField("markerSupplyMissing", StringType(), True),
                    StructField("tonerEmpty", StringType(), True)
                ]), True),
                StructField("counter", StructType([
                    StructField("TYPE", StructType([
                        StructField("lifeCount", StructType([
                            StructField("unit", StringType(), True),
                            StructField("type", StringType(), True),
                            StructField("value", LongType(), True)
                        ]), True)
                    ]), True)
                ]), True),
                StructField("statusRawValue", StringType(), True),
                StructField("friendlyName", StringType(), True)
            ]), True)
        ]), True)
    ]), True)
])


In [6]:
schema_df2 = spark.read.schema(schema).json(path)

In [7]:
schema_df2.show()

+--------------------+--------------------+--------------------+--------------------+
|     EnqueuedTimeUtc|          Properties|    SystemProperties|                Body|
+--------------------+--------------------+--------------------+--------------------+
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{a0ba43aa-f3c1-42...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{5b98aa47-2d23-4b...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{a0ba43aa-f3c1-42...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{5b98aa47-2d23-4b...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e

In [8]:
schema_df2.printSchema()

root
 |-- EnqueuedTimeUtc: string (nullable = true)
 |-- Properties: struct (nullable = true)
 |    |-- appTopic: string (nullable = true)
 |    |-- relatedGroupId: string (nullable = true)
 |-- SystemProperties: struct (nullable = true)
 |    |-- connectionDeviceId: string (nullable = true)
 |    |-- connectionAuthMethod: string (nullable = true)
 |    |-- connectionDeviceGenerationId: string (nullable = true)
 |    |-- contentType: string (nullable = true)
 |    |-- contentEncoding: string (nullable = true)
 |    |-- enqueuedTime: string (nullable = true)
 |-- Body: struct (nullable = true)
 |    |-- type: string (nullable = true)
 |    |-- mnsn: string (nullable = true)
 |    |-- timestamp: long (nullable = true)
 |    |-- scheduleName: string (nullable = true)
 |    |-- smsProperties: struct (nullable = true)
 |    |    |-- interface: struct (nullable = true)
 |    |    |    |-- ethernetList: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 | 

In [34]:
schema_df2.select("Body.scheduleName").distinct().collect()

[Row(scheduleName='deviceStatus'),
 Row(scheduleName='basicUpdate'),
 Row(scheduleName='counterUpdate'),
 Row(scheduleName='suppliesUpdate')]

In [30]:
schema_df2.select('Body').show(truncate=False)

+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|Body                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         

In [11]:
schedule_names = [row["scheduleName"] for row in schema_df2.select("Body.scheduleName").distinct().collect()]
schedule_names

['deviceStatus', 'basicUpdate', 'counterUpdate', 'suppliesUpdate']

In [12]:
from delta.tables import DeltaTable
from pyspark.sql.functions import *

In [13]:
schedule_df = schema_df2.filter(col("Body.scheduleName") == 'basicUpdate')
schedule_df.show()

+--------------------+--------------------+--------------------+--------------------+
|     EnqueuedTimeUtc|          Properties|    SystemProperties|                Body|
+--------------------+--------------------+--------------------+--------------------+
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e0b7-4e...|{smsProps, mn=TVg...|
|2024-08-20T09:56:...|{rp.mib/dev/96d1f...|{96d1f469-e

In [15]:
from pyspark.sql import functions as F

flatschedule_df = schedule_df.select(
    F.col("EnqueuedTimeUtc").alias("EnqueuedTimeUtc"),
    F.col("Properties.appTopic").alias("Properties_appTopic"),
    F.col("Properties.relatedGroupId").alias("Properties_relatedGroupId"),
    F.col("SystemProperties.connectionDeviceId").alias("SystemProperties_connectionDeviceId"),
    F.col("SystemProperties.connectionAuthMethod").alias("SystemProperties_connectionAuthMethod"),
    F.col("SystemProperties.connectionDeviceGenerationId").alias("SystemProperties_connectionDeviceGenerationId"),
    F.col("SystemProperties.contentType").alias("SystemProperties_contentType"),
    F.col("SystemProperties.contentEncoding").alias("SystemProperties_contentEncoding"),
    F.col("SystemProperties.enqueuedTime").alias("SystemProperties_enqueuedTime"),
    F.col("Body.type").alias("Body_type"),
    F.col("Body.mnsn").alias("Body_mnsn"),
    F.col("Body.timestamp").alias("Body_timestamp"),
    F.col("Body.scheduleName").alias("Body_scheduleName"),
    F.col("Body.smsProperties.interface.ethernetList").alias("Body_smsProperties_interface_ethernetList"),
    F.col("Body.smsProperties.interface.ipList").alias("Body_smsProperties_interface_ipList"),
    F.col("Body.smsProperties.device.modelName").alias("Body_smsProperties_device_modelName"),
    F.col("Body.smsProperties.device.address").alias("Body_smsProperties_device_address"),
    F.col("Body.smsProperties.device.maintenanceCodeList").alias("Body_smsProperties_device_maintenanceCodeList"),
    F.col("Body.smsProperties.device.serialId").alias("Body_smsProperties_device_serialId"),
    F.col("Body.smsProperties.device.familyName").alias("Body_smsProperties_device_familyName"),
    F.col("Body.smsProperties.device.location.address").alias("Body_smsProperties_device_location_address"),
    F.col("Body.smsProperties.device.serviceCodeList").alias("Body_smsProperties_device_serviceCodeList"),
    F.col("Body.smsProperties.device.statusSet.standby").alias("Body_smsProperties_device_statusSet_standby"),
    F.col("Body.smsProperties.device.statusSet.paperJam").alias("Body_smsProperties_device_statusSet_paperJam"),
    F.col("Body.smsProperties.device.statusSet.stackerNotInstalled").alias("Body_smsProperties_device_statusSet_stackerNotInstalled"),
    F.col("Body.smsProperties.device.statusSet.paperLow").alias("Body_smsProperties_device_statusSet_paperLow"),
    F.col("Body.smsProperties.device.statusSet.paperEmpty").alias("Body_smsProperties_device_statusSet_paperEmpty"),
    F.col("Body.smsProperties.device.statusSet.offline").alias("Body_smsProperties_device_statusSet_offline"),
    F.col("Body.smsProperties.device.statusSet.coverOpen").alias("Body_smsProperties_device_statusSet_coverOpen"),
    F.col("Body.smsProperties.device.statusSet.outputTrayFull").alias("Body_smsProperties_device_statusSet_outputTrayFull"),
    F.col("Body.smsProperties.device.statusSet.tonerLow").alias("Body_smsProperties_device_statusSet_tonerLow"),
    F.col("Body.smsProperties.device.statusSet.printerError").alias("Body_smsProperties_device_statusSet_printerError"),
    F.col("Body.smsProperties.device.statusSet.tonerEmpty").alias("Body_smsProperties_device_statusSet_tonerEmpty"),
    F.col("Body.smsProperties.device.counter.TYPE.lifeCount.unit").alias("Body_smsProperties_device_counter_TYPE_lifeCount_unit"),
    F.col("Body.smsProperties.device.counter.TYPE.lifeCount.type").alias("Body_smsProperties_device_counter_TYPE_lifeCount_type"),
    F.col("Body.smsProperties.device.counter.TYPE.lifeCount.value").alias("Body_smsProperties_device_counter_TYPE_lifeCount_value"),
    F.col("Body.smsProperties.device.statusRawValue").alias("Body_smsProperties_device_statusRawValue"),
    F.col("Body.smsProperties.device.friendlyName").alias("Body_smsProperties_device_friendlyName")
)


In [16]:
flatschedule_df.show()
flatschedule_df.count()

+--------------------+--------------------+-------------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+---------+--------------------+--------------+-----------------+-----------------------------------------+-----------------------------------+-----------------------------------+---------------------------------+---------------------------------------------+----------------------------------+------------------------------------+------------------------------------------+-----------------------------------------+-------------------------------------------+--------------------------------------------+-------------------------------------------------------+--------------------------------------------+----------------------------------------------+-------------------------------------------+-----------------------

500

In [17]:
latest_data=flatschedule_df.sort(flatschedule_df.Body_timestamp.desc()).limit(1)
latest_data.show()

+--------------------+--------------------+-------------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+---------+--------------------+--------------+-----------------+-----------------------------------------+-----------------------------------+-----------------------------------+---------------------------------+---------------------------------------------+----------------------------------+------------------------------------+------------------------------------------+-----------------------------------------+-------------------------------------------+--------------------------------------------+-------------------------------------------------------+--------------------------------------------+----------------------------------------------+-------------------------------------------+-----------------------

In [18]:
remaining_data=flatschedule_df.filter(~(flatschedule_df['Body_timestamp']==1724147813655))
remaining_data.show()
remaining_data.count()

+--------------------+--------------------+-------------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+---------+--------------------+--------------+-----------------+-----------------------------------------+-----------------------------------+-----------------------------------+---------------------------------+---------------------------------------------+----------------------------------+------------------------------------+------------------------------------------+-----------------------------------------+-------------------------------------------+--------------------------------------------+-------------------------------------------------------+--------------------------------------------+----------------------------------------------+-------------------------------------------+-----------------------

499

In [19]:
table_path=f"./message/basicUpdate2"

In [20]:
latest_data.write.format('delta').mode('overwrite').save(table_path)

In [21]:
from delta.tables import *

In [36]:
dt=DeltaTable.forPath(spark, table_path)
check=dt.toDF()
check.select('Body_mnsn','Body_timestamp').show()

+--------------------+--------------+
|           Body_mnsn|Body_timestamp|
+--------------------+--------------+
|mn=TVgwMDAx:sn=MD...| 1724147812588|
|mn=TVgwMDAy:sn=MD...| 1724147812591|
|mn=TVgwMDAz:sn=MD...| 1724147812610|
|mn=TVgwMDA0:sn=MD...| 1724147812612|
|mn=TVgwMDA1:sn=MD...| 1724147812619|
|mn=TVgwMDA2:sn=MD...| 1724147812622|
|mn=TVgwMDA3:sn=MD...| 1724147812624|
|mn=TVgwMDA4:sn=MD...| 1724147812626|
|mn=TVgwMDA5:sn=MD...| 1724147812628|
|mn=TVgwMDEw:sn=MD...| 1724147812633|
|mn=TVgwMDEx:sn=MD...| 1724147812636|
|mn=TVgwMDEy:sn=MD...| 1724147812639|
|mn=TVgwMDEz:sn=MD...| 1724147812641|
|mn=TVgwMDE0:sn=MD...| 1724147812642|
|mn=TVgwMDE1:sn=MD...| 1724147812644|
|mn=TVgwMDE2:sn=MD...| 1724147812646|
|mn=TVgwMDE3:sn=MD...| 1724147812649|
|mn=TVgwMDE4:sn=MD...| 1724147812652|
|mn=TVgwMDE5:sn=MD...| 1724147812654|
|mn=TVgwMDIw:sn=MD...| 1724147812655|
+--------------------+--------------+
only showing top 20 rows



In [38]:
remaining_data.filter(remaining_data['Body_mnsn'].isNull()).show()

+---------------+-------------------+-------------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+---------+---------+--------------+-----------------+-----------------------------------------+-----------------------------------+-----------------------------------+---------------------------------+---------------------------------------------+----------------------------------+------------------------------------+------------------------------------------+-----------------------------------------+-------------------------------------------+--------------------------------------------+-------------------------------------------------------+--------------------------------------------+----------------------------------------------+-------------------------------------------+----------------------------------------

In [39]:
from pyspark.sql import functions as F

cond = (F.col("t.Body_mnsn") == F.col("s.Body_mnsn"))

dt.alias("t").merge(
    remaining_data.alias("s"),
    cond
).whenMatchedUpdate(
    set={
        "Body_timestamp": "s.Body_timestamp",
        "Body_scheduleName": "s.Body_scheduleName",
        "EnqueuedTimeUtc": "s.EnqueuedTimeUtc",
        "Properties_appTopic": "s.Properties_appTopic",
        "Properties_relatedGroupId": "s.Properties_relatedGroupId",
        "SystemProperties_connectionDeviceId": "s.SystemProperties_connectionDeviceId",
        "SystemProperties_connectionAuthMethod": "s.SystemProperties_connectionAuthMethod",
        "SystemProperties_connectionDeviceGenerationId": "s.SystemProperties_connectionDeviceGenerationId",
        "SystemProperties_contentType": "s.SystemProperties_contentType",
        "SystemProperties_contentEncoding": "s.SystemProperties_contentEncoding",
        "SystemProperties_enqueuedTime": "s.SystemProperties_enqueuedTime",
        "Body_type": "s.Body_type",
        "Body_smsProperties_device_modelName": "s.Body_smsProperties_device_modelName",
        "Body_smsProperties_device_address": "s.Body_smsProperties_device_address",
        "Body_smsProperties_device_maintenanceCodeList": "s.Body_smsProperties_device_maintenanceCodeList",
        "Body_smsProperties_device_serialId": "s.Body_smsProperties_device_serialId",
        "Body_smsProperties_device_familyName": "s.Body_smsProperties_device_familyName",
        "Body_smsProperties_device_location_address": "s.Body_smsProperties_device_location_address",
        "Body_smsProperties_device_serviceCodeList": "s.Body_smsProperties_device_serviceCodeList",
        "Body_smsProperties_device_statusSet_standby": "s.Body_smsProperties_device_statusSet_standby",
        "Body_smsProperties_device_statusSet_paperJam": "s.Body_smsProperties_device_statusSet_paperJam",
        "Body_smsProperties_device_statusSet_tonerLow": "s.Body_smsProperties_device_statusSet_tonerLow",
        "Body_smsProperties_device_statusSet_printerError": "s.Body_smsProperties_device_statusSet_printerError",
        "Body_smsProperties_device_statusRawValue": "s.Body_smsProperties_device_statusRawValue",
        "Body_smsProperties_device_friendlyName": "s.Body_smsProperties_device_friendlyName"
    }
).whenNotMatchedInsert(
    values={
        "Body_type": "s.Body_type",
        "Body_mnsn": "s.Body_mnsn",
        "Body_timestamp": "s.Body_timestamp",
        "Body_scheduleName": "s.Body_scheduleName",
        "EnqueuedTimeUtc": "s.EnqueuedTimeUtc",
        "Properties_appTopic": "s.Properties_appTopic",
        "Properties_relatedGroupId": "s.Properties_relatedGroupId",
        "SystemProperties_connectionDeviceId": "s.SystemProperties_connectionDeviceId",
        "SystemProperties_connectionAuthMethod": "s.SystemProperties_connectionAuthMethod",
        "SystemProperties_connectionDeviceGenerationId": "s.SystemProperties_connectionDeviceGenerationId",
        "SystemProperties_contentType": "s.SystemProperties_contentType",
        "SystemProperties_contentEncoding": "s.SystemProperties_contentEncoding",
        "SystemProperties_enqueuedTime": "s.SystemProperties_enqueuedTime",
        "Body_smsProperties_device_modelName": "s.Body_smsProperties_device_modelName",
        "Body_smsProperties_device_address": "s.Body_smsProperties_device_address",
        "Body_smsProperties_device_maintenanceCodeList": "s.Body_smsProperties_device_maintenanceCodeList",
        "Body_smsProperties_device_serialId": "s.Body_smsProperties_device_serialId",
        "Body_smsProperties_device_familyName": "s.Body_smsProperties_device_familyName",
        "Body_smsProperties_device_location_address": "s.Body_smsProperties_device_location_address",
        "Body_smsProperties_device_serviceCodeList": "s.Body_smsProperties_device_serviceCodeList",
        "Body_smsProperties_device_statusSet_standby": "s.Body_smsProperties_device_statusSet_standby",
        "Body_smsProperties_device_statusSet_paperJam": "s.Body_smsProperties_device_statusSet_paperJam",
        "Body_smsProperties_device_statusSet_tonerLow": "s.Body_smsProperties_device_statusSet_tonerLow",
        "Body_smsProperties_device_statusSet_printerError": "s.Body_smsProperties_device_statusSet_printerError",
        "Body_smsProperties_device_statusRawValue": "s.Body_smsProperties_device_statusRawValue",
        "Body_smsProperties_device_friendlyName": "s.Body_smsProperties_device_friendlyName"
    }
).execute()


In [40]:
df1=spark.read.format("delta").load("./message/basicUpdate")
df1.show()

+---------+--------------------+--------------+-----------------+--------------------+--------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+
|Body_type|           Body_mnsn|Body_timestamp|Body_scheduleName|     EnqueuedTimeUtc| Properties_appTopic|SystemProperties_connectionDeviceId|SystemProperties_connectionAuthMethod|SystemProperties_connectionDeviceGenerationId|SystemProperties_contentType|SystemProperties_contentEncoding|SystemProperties_enqueuedTime|
+---------+--------------------+--------------+-----------------+--------------------+--------------------+-----------------------------------+-------------------------------------+---------------------------------------------+----------------------------+--------------------------------+-----------------------------+
| smsProps|mn=TVgwMDA0:sn=MD...| 1724147

In [27]:
df1.select('Body_mnsn').distinct().count()

500