#### **from_json**

- Function is used to convert **JSON string** into **Struct type or Map type**.

#### **How to convert JSON string to StructType?**

1) JSON String
2) Nested Structure
3) CSV to StructType to JSON string to StructType

**1) JSON String**

In [0]:
from pyspark.sql.functions import from_json, lit, col
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
import pyspark.sql.functions as f

**Ex 01**

In [0]:
data = [(123, '''{"Name":"Kiran", "Dept": "Admin", "Tech": "SAP", "age": 32, "Level": "High"}''', "Sony"),
        (124, '''{"Name":"kishore", "Dept": "IT", "Tech": "ORACLE", "age": 42, "Level": "Medium"}''', "kumar"),
        (125, '''{"Name":"karan", "Dept": "Testing", "Tech": "SQL", "age": 38, "Level": "Low"}''', "sharma"),
        (126, '''{"Name":"sharath", "Dept": "Prod", "Tech": "ANALYSIS", "age": 40, "Level": "Medium"}''', "roy"),
        (127, '''{"Name":"sayan", "Dept": "Sales", "Tech": "DATA SCIENCE", "age": 45, "Level": "Top"}''', "gupta")]

schema = ("id", "Format", "Name")

df_ex01 = spark.createDataFrame(data, schema)
display(df_ex01)

id,Format,Name
123,"{""Name"":""Kiran"", ""Dept"": ""Admin"", ""Tech"": ""SAP"", ""age"": 32, ""Level"": ""High""}",Sony
124,"{""Name"":""kishore"", ""Dept"": ""IT"", ""Tech"": ""ORACLE"", ""age"": 42, ""Level"": ""Medium""}",kumar
125,"{""Name"":""karan"", ""Dept"": ""Testing"", ""Tech"": ""SQL"", ""age"": 38, ""Level"": ""Low""}",sharma
126,"{""Name"":""sharath"", ""Dept"": ""Prod"", ""Tech"": ""ANALYSIS"", ""age"": 40, ""Level"": ""Medium""}",roy
127,"{""Name"":""sayan"", ""Dept"": ""Sales"", ""Tech"": ""DATA SCIENCE"", ""age"": 45, ""Level"": ""Top""}",gupta


In [0]:
schema = StructType([StructField("Name", StringType(), True),
                     StructField("Dept", StringType(), True),
                     StructField("Tech", StringType(), True),
                     StructField("age", IntegerType(), True),
                     StructField("Level", StringType(), True)
                     ]
                    )

In [0]:
df_ex01 = df_ex01.select('Format', from_json(f.col('Format'), schema).alias("parsed_json"))
display(df_ex01)

Format,parsed_json
"{""Name"":""Kiran"", ""Dept"": ""Admin"", ""Tech"": ""SAP"", ""age"": 32, ""Level"": ""High""}","List(Kiran, Admin, SAP, 32, High)"
"{""Name"":""kishore"", ""Dept"": ""IT"", ""Tech"": ""ORACLE"", ""age"": 42, ""Level"": ""Medium""}","List(kishore, IT, ORACLE, 42, Medium)"
"{""Name"":""karan"", ""Dept"": ""Testing"", ""Tech"": ""SQL"", ""age"": 38, ""Level"": ""Low""}","List(karan, Testing, SQL, 38, Low)"
"{""Name"":""sharath"", ""Dept"": ""Prod"", ""Tech"": ""ANALYSIS"", ""age"": 40, ""Level"": ""Medium""}","List(sharath, Prod, ANALYSIS, 40, Medium)"
"{""Name"":""sayan"", ""Dept"": ""Sales"", ""Tech"": ""DATA SCIENCE"", ""age"": 45, ""Level"": ""Top""}","List(sayan, Sales, DATA SCIENCE, 45, Top)"


**Ex 02**

In [0]:
from pyspark.sql import SparkSession

# Assuming `dff` is your JSON string
dff = """[
    {"first_name": "kiran", "age": 32, "address": '{"city": "Baroda", "country": "india", "state": "Rj"}'},
    {"first_name": "Micheal", "age": 25, "address": '{"city": "Nasik", "country": "india", "state": "UP"}'},
    {"first_name": "Prakash", "age": 32, "address": '{"city": "Hyderabad", "country": "india", "state": "TS"}'},
    {"first_name": "Ritesh", "age": 25, "address": '{"city": "Bangalore", "country": "india", "state": "KA"}'},
    {"first_name": "Vaibhav", "age": 32, "address": '{"city": "Delhi", "country": "india", "state": "DL"}'},
    {"first_name": "Goerge", "age": 25, "address": '{"city": "Chennai", "country": "india", "state": "TN"}'}        
    ]"""

# Convert the JSON string to an RDD
rdd = spark.sparkContext.parallelize([dff])

# Read the JSON from the RDD
df_ex02 = spark.read.json(rdd, multiLine=True)

display(df_ex02)

address,age,first_name
"{""city"": ""Baroda"", ""country"": ""india"", ""state"": ""Rj""}",32,kiran
"{""city"": ""Nasik"", ""country"": ""india"", ""state"": ""UP""}",25,Micheal
"{""city"": ""Hyderabad"", ""country"": ""india"", ""state"": ""TS""}",32,Prakash
"{""city"": ""Bangalore"", ""country"": ""india"", ""state"": ""KA""}",25,Ritesh
"{""city"": ""Delhi"", ""country"": ""india"", ""state"": ""DL""}",32,Vaibhav
"{""city"": ""Chennai"", ""country"": ""india"", ""state"": ""TN""}",25,Goerge


In [0]:
schema1 = StructType([StructField("city", StringType(), True),
                      StructField("country", StringType(), True),
                      StructField("state", StringType(), True)
                      ]
                     )

In [0]:
df_ex02 = df_ex02.select('address', f.from_json(f.col('address'), schema1).alias("address_json"))
display(df_ex02)

address,address_json
"{""city"": ""Baroda"", ""country"": ""india"", ""state"": ""Rj""}","List(Baroda, india, Rj)"
"{""city"": ""Nasik"", ""country"": ""india"", ""state"": ""UP""}","List(Nasik, india, UP)"
"{""city"": ""Hyderabad"", ""country"": ""india"", ""state"": ""TS""}","List(Hyderabad, india, TS)"
"{""city"": ""Bangalore"", ""country"": ""india"", ""state"": ""KA""}","List(Bangalore, india, KA)"
"{""city"": ""Delhi"", ""country"": ""india"", ""state"": ""DL""}","List(Delhi, india, DL)"
"{""city"": ""Chennai"", ""country"": ""india"", ""state"": ""TN""}","List(Chennai, india, TN)"


**Ex 03**

In [0]:
data = [("{\"city\":\"Baroda\", \"country\":\"india\", \"state\":\"RJ\"}",),
        ("{\"city\":\"Nasik\", \"country\":\"india\", \"state\":\"UP\"}",),
        ("{\"city\":\"Hyderabad\", \"country\":\"india\", \"state\":\"TS\"}",),
        ("{\"city\":\"Bangalore\", \"country\":\"india\", \"state\":\"KA\"}",),
         ]

df_ex03 = spark.createDataFrame(data, ["json_data"])
display(df_ex03)

json_data
"{""city"":""Baroda"", ""country"":""india"", ""state"":""RJ""}"
"{""city"":""Nasik"", ""country"":""india"", ""state"":""UP""}"
"{""city"":""Hyderabad"", ""country"":""india"", ""state"":""TS""}"
"{""city"":""Bangalore"", ""country"":""india"", ""state"":""KA""}"


In [0]:
schema2 = StructType([StructField("city", StringType(), True),
                      StructField("country", StringType(), True),
                      StructField("state", StringType(), True)
                      ]
                     )

In [0]:
df_ex03 = df_ex03.select('json_data', f.from_json(f.col('json_data'), schema1).alias("json_data_json"))
display(df_ex03)

json_data,json_data_json
"{""city"":""Baroda"", ""country"":""india"", ""state"":""RJ""}","List(Baroda, india, RJ)"
"{""city"":""Nasik"", ""country"":""india"", ""state"":""UP""}","List(Nasik, india, UP)"
"{""city"":""Hyderabad"", ""country"":""india"", ""state"":""TS""}","List(Hyderabad, india, TS)"
"{""city"":""Bangalore"", ""country"":""india"", ""state"":""KA""}","List(Bangalore, india, KA)"


**2) Nested Structure**

In [0]:
# Sample data showcasing varying JSON structures
data = [("1", '{"name": "John Doe", "age": 30, "country": "USA", "First_Name": "Rakesh", "Last_Name": "Kumar"}'),
        ("2", '{"city": "New York", "country": "USA", "zipcode": "10001", "Tech": {"domain": "database", "designation": "data engineer", "Exp": 6, "Platform": "Azure"}}'),
        ("3", '{"product": "Laptop", "brand": "Dell", "specs": {"RAM": "16GB", "Storage": "512GB SSD", "OS": "windows"}}'),
        ("4", '{"First_Name": "Rakesh", "country": "IND", "zipcode": "560103", "specs": {"RAM": "16GB", "Storage": "512GB SSD", "OS": "windows"}, "Tech": {"domain": "database", "designation": "data engineer"}}'),
        ("5", '{"Last_Name": "Kumar", "country": "UK", "zipcode": "571323", "city": "SWEDEN", "age": 30, , "Tech": {"domain": "database", "designation": "data engineer"}}'),
        ("6", '{"name": "paul", "age": 35, "city": "SWEDEN", "country": "UK", "zipcode": "571323", "First_Name": "Sourabh", "Last_Name": "Kumar", "product": "Desktop", "brand": "HP", "specs": {"RAM": "32GB", "Storage": "512GB SSD", "OS": "windows"}}')
        ]

# Creating the DataFrame
df_ex02 = spark.createDataFrame(data, ["id", "json_string"])
display(df_ex02)

id,json_string
1,"{""name"": ""John Doe"", ""age"": 30, ""country"": ""USA"", ""First_Name"": ""Rakesh"", ""Last_Name"": ""Kumar""}"
2,"{""city"": ""New York"", ""country"": ""USA"", ""zipcode"": ""10001"", ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer"", ""Exp"": 6, ""Platform"": ""Azure""}}"
3,"{""product"": ""Laptop"", ""brand"": ""Dell"", ""specs"": {""RAM"": ""16GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}}"
4,"{""First_Name"": ""Rakesh"", ""country"": ""IND"", ""zipcode"": ""560103"", ""specs"": {""RAM"": ""16GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}, ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer""}}"
5,"{""Last_Name"": ""Kumar"", ""country"": ""UK"", ""zipcode"": ""571323"", ""city"": ""SWEDEN"", ""age"": 30, , ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer""}}"
6,"{""name"": ""paul"", ""age"": 35, ""city"": ""SWEDEN"", ""country"": ""UK"", ""zipcode"": ""571323"", ""First_Name"": ""Sourabh"", ""Last_Name"": ""Kumar"", ""product"": ""Desktop"", ""brand"": ""HP"", ""specs"": {""RAM"": ""32GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}}"


In [0]:
# Define the schema for the JSON data
schema = StructType([StructField("name", StringType(), True),
                     StructField("age", IntegerType(), True),
                     StructField("city", StringType(), True),
                     StructField("country", StringType(), True),
                     StructField("zipcode", StringType(), True),
                     StructField("product", StringType(), True),
                     StructField("brand", StringType(), True),
                     StructField("specs", StructType([StructField("RAM", StringType(), True),
                                                      StructField("Storage", StringType(), True),
                                                      StructField("OS", StringType(), True)
                                                      ])
                                 ),
                    StructField("Tech", StructType([StructField("domain", StringType(), True),
                                                    StructField("designation", StringType(), True),
                                                    StructField("Exp", IntegerType(), True),
                                                    StructField("Platform", IntegerType(), True)
                                                    ])
                                ),
                     StructField("First_Name", StringType(), True),
                     StructField("Last_Name", StringType(), True)
                     ])

# Apply the from_json function to convert the JSON string column to a struct type
df_ex03 = df_ex02.withColumn("json_struct", from_json(df_ex02["json_string"], schema))

# Display the resulting DataFrame
display(df_ex03)

id,json_string,json_struct
1,"{""name"": ""John Doe"", ""age"": 30, ""country"": ""USA"", ""First_Name"": ""Rakesh"", ""Last_Name"": ""Kumar""}","List(John Doe, 30, null, USA, null, null, null, null, null, Rakesh, Kumar)"
2,"{""city"": ""New York"", ""country"": ""USA"", ""zipcode"": ""10001"", ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer"", ""Exp"": 6, ""Platform"": ""Azure""}}","List(null, null, New York, USA, 10001, null, null, null, List(database, data engineer, 6, null), null, null)"
3,"{""product"": ""Laptop"", ""brand"": ""Dell"", ""specs"": {""RAM"": ""16GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}}","List(null, null, null, null, null, Laptop, Dell, List(16GB, 512GB SSD, windows), null, null, null)"
4,"{""First_Name"": ""Rakesh"", ""country"": ""IND"", ""zipcode"": ""560103"", ""specs"": {""RAM"": ""16GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}, ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer""}}","List(null, null, null, IND, 560103, null, null, List(16GB, 512GB SSD, windows), List(database, data engineer, null, null), Rakesh, null)"
5,"{""Last_Name"": ""Kumar"", ""country"": ""UK"", ""zipcode"": ""571323"", ""city"": ""SWEDEN"", ""age"": 30, , ""Tech"": {""domain"": ""database"", ""designation"": ""data engineer""}}","List(null, null, null, null, null, null, null, null, null, null, null)"
6,"{""name"": ""paul"", ""age"": 35, ""city"": ""SWEDEN"", ""country"": ""UK"", ""zipcode"": ""571323"", ""First_Name"": ""Sourabh"", ""Last_Name"": ""Kumar"", ""product"": ""Desktop"", ""brand"": ""HP"", ""specs"": {""RAM"": ""32GB"", ""Storage"": ""512GB SSD"", ""OS"": ""windows""}}","List(paul, 35, SWEDEN, UK, 571323, Desktop, HP, List(32GB, 512GB SSD, windows), null, Sourabh, Kumar)"


**3) CSV to StructType to JSON string to StructType**

In [0]:
df_json = spark.read.csv("dbfs:/FileStore/tables/to_json.csv", header=True, inferSchema=True)
display(df_json.limit(10))

Id,Nick_Name,First_Name,Last_Name,Age,Type,Description,Commodity_Index,Sensex_Category,Label_Type,Effective_Date,Start_Date,End_Date,Currency,Ticket,Name,Sex
1,admin,John,Victor,30,Grade1,Baleno,DISCOUNT,Top,average,6-Feb-23,14-Jan-23,6-Feb-23,INR,A/5 21171,"Braund, Mr. Owen Harris",male
2,everest,Paul,Irish,35,Grade2,Engine_Base,DISCOUNT,Top,average,6-Feb-23,14-Jan-23,6-Feb-23,INR,PC 17599,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female
3,moon,Erram,Rammohan,29,Enginner1,Baleno,DISCOUNT,Top,average,8-Jan-24,7-Oct-23,8-Jan-24,INR,STON/O2. 3101282,"Heikkinen, Miss. Laina",female
4,service,Stalin,Rajesh,40,Minister,Engine_Base,DISCOUNT,Top,average,8-Jan-24,7-Oct-23,8-Jan-24,INR,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female
5,Builder,Golla,Rajasekar,43,Builder,Suzuki Swift,DISCOUNT,Top,average,6-Mar-23,7-Feb-23,6-Mar-23,INR,373450,"Allen, Mr. William Henry",male
6,Drinker,Karjala,Hari,33,Army,Suzuki Swift,DISCOUNT,Top,average,6-Mar-23,7-Feb-23,6-Mar-23,INR,330877,"Moran, Mr. James",male
7,Army,Koyi,Damodar,37,Bettalian,Wagon R,DISCOUNT,Top,average,6-Jan-25,9-Jan-24,6-Jan-25,INR,17463,"McCarthy, Mr. Timothy J",male
8,Marketing,Vemparla,Harish,55,Manager,Engine_Base,DISCOUNT,Top,average,6-Jan-25,9-Jan-24,6-Jan-25,INR,349909,"Palsson, Master. Gosta Leonard",male
9,Politician,Devineni,Umesh,58,Senior,Creta,DISCOUNT,Top,average,6-Apr-23,7-Mar-23,6-Apr-23,INR,347742,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female
10,Minister,Ponguru,Narayana,56,Education,Brezza,DISCOUNT,Top,average,6-Apr-23,7-Mar-23,6-Apr-23,INR,237736,"Nasser, Mrs. Nicholas (Adele Achem)",female


In [0]:
# Select all columns and structure them into a single column named 'pp_msg'
df_stru = df_json.select(f.struct('*').alias('sales_msg')).distinct()
display(df_stru.limit(10))

sales_msg
"List(1, admin, John, Victor, 30, Grade1, Baleno, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23, 6-Feb-23, INR, A/5 21171, Braund, Mr. Owen Harris, male)"
"List(7, Army, Koyi, Damodar, 37, Bettalian, Wagon R, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24, 6-Jan-25, INR, 17463, McCarthy, Mr. Timothy J, male)"
"List(2, everest, Paul, Irish, 35, Grade2, Engine_Base, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23, 6-Feb-23, INR, PC 17599, Cumings, Mrs. John Bradley (Florence Briggs Thayer), female)"
"List(5, Builder, Golla, Rajasekar, 43, Builder, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23, 6-Mar-23, INR, 373450, Allen, Mr. William Henry, male)"
"List(3, moon, Erram, Rammohan, 29, Enginner1, Baleno, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23, 8-Jan-24, INR, STON/O2. 3101282, Heikkinen, Miss. Laina, female)"
"List(4, service, Stalin, Rajesh, 40, Minister, Engine_Base, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23, 8-Jan-24, INR, 113803, Futrelle, Mrs. Jacques Heath (Lily May Peel), female)"
"List(8, Marketing, Vemparla, Harish, 55, Manager, Engine_Base, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24, 6-Jan-25, INR, 349909, Palsson, Master. Gosta Leonard, male)"
"List(6, Drinker, Karjala, Hari, 33, Army, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23, 6-Mar-23, INR, 330877, Moran, Mr. James, male)"
"List(9, Politician, Devineni, Umesh, 58, Senior, Creta, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23, 6-Apr-23, INR, 347742, Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg), female)"
"List(10, Minister, Ponguru, Narayana, 56, Education, Brezza, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23, 6-Apr-23, INR, 237736, Nasser, Mrs. Nicholas (Adele Achem), female)"


In [0]:
# Convert the 'sales_msg' column to JSON string
df_final = df_stru.withColumn('sales_msg_json', f.to_json(f.col('sales_msg')))
display(df_final.limit(10))

sales_msg,sales_msg_json
"List(1, admin, John, Victor, 30, Grade1, Baleno, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23, 6-Feb-23, INR, A/5 21171, Braund, Mr. Owen Harris, male)","{""Id"":1,""Nick_Name"":""admin"",""First_Name"":""John"",""Last_Name"":""Victor"",""Age"":30,""Type"":""Grade1"",""Description"":""Baleno"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Feb-23"",""Start_Date"":""14-Jan-23"",""End_Date"":""6-Feb-23"",""Currency"":""INR"",""Ticket"":""A/5 21171"",""Name"":""Braund, Mr. Owen Harris"",""Sex"":""male""}"
"List(7, Army, Koyi, Damodar, 37, Bettalian, Wagon R, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24, 6-Jan-25, INR, 17463, McCarthy, Mr. Timothy J, male)","{""Id"":7,""Nick_Name"":""Army"",""First_Name"":""Koyi"",""Last_Name"":""Damodar"",""Age"":37,""Type"":""Bettalian"",""Description"":""Wagon R"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Jan-25"",""Start_Date"":""9-Jan-24"",""End_Date"":""6-Jan-25"",""Currency"":""INR"",""Ticket"":""17463"",""Name"":""McCarthy, Mr. Timothy J"",""Sex"":""male""}"
"List(2, everest, Paul, Irish, 35, Grade2, Engine_Base, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23, 6-Feb-23, INR, PC 17599, Cumings, Mrs. John Bradley (Florence Briggs Thayer), female)","{""Id"":2,""Nick_Name"":""everest"",""First_Name"":""Paul"",""Last_Name"":""Irish"",""Age"":35,""Type"":""Grade2"",""Description"":""Engine_Base"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Feb-23"",""Start_Date"":""14-Jan-23"",""End_Date"":""6-Feb-23"",""Currency"":""INR"",""Ticket"":""PC 17599"",""Name"":""Cumings, Mrs. John Bradley (Florence Briggs Thayer)"",""Sex"":""female""}"
"List(5, Builder, Golla, Rajasekar, 43, Builder, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23, 6-Mar-23, INR, 373450, Allen, Mr. William Henry, male)","{""Id"":5,""Nick_Name"":""Builder"",""First_Name"":""Golla"",""Last_Name"":""Rajasekar"",""Age"":43,""Type"":""Builder"",""Description"":""Suzuki Swift"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Mar-23"",""Start_Date"":""7-Feb-23"",""End_Date"":""6-Mar-23"",""Currency"":""INR"",""Ticket"":""373450"",""Name"":""Allen, Mr. William Henry"",""Sex"":""male""}"
"List(3, moon, Erram, Rammohan, 29, Enginner1, Baleno, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23, 8-Jan-24, INR, STON/O2. 3101282, Heikkinen, Miss. Laina, female)","{""Id"":3,""Nick_Name"":""moon"",""First_Name"":""Erram"",""Last_Name"":""Rammohan"",""Age"":29,""Type"":""Enginner1"",""Description"":""Baleno"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""8-Jan-24"",""Start_Date"":""7-Oct-23"",""End_Date"":""8-Jan-24"",""Currency"":""INR"",""Ticket"":""STON/O2. 3101282"",""Name"":""Heikkinen, Miss. Laina"",""Sex"":""female""}"
"List(4, service, Stalin, Rajesh, 40, Minister, Engine_Base, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23, 8-Jan-24, INR, 113803, Futrelle, Mrs. Jacques Heath (Lily May Peel), female)","{""Id"":4,""Nick_Name"":""service"",""First_Name"":""Stalin"",""Last_Name"":""Rajesh"",""Age"":40,""Type"":""Minister"",""Description"":""Engine_Base"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""8-Jan-24"",""Start_Date"":""7-Oct-23"",""End_Date"":""8-Jan-24"",""Currency"":""INR"",""Ticket"":""113803"",""Name"":""Futrelle, Mrs. Jacques Heath (Lily May Peel)"",""Sex"":""female""}"
"List(8, Marketing, Vemparla, Harish, 55, Manager, Engine_Base, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24, 6-Jan-25, INR, 349909, Palsson, Master. Gosta Leonard, male)","{""Id"":8,""Nick_Name"":""Marketing"",""First_Name"":""Vemparla"",""Last_Name"":""Harish"",""Age"":55,""Type"":""Manager"",""Description"":""Engine_Base"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Jan-25"",""Start_Date"":""9-Jan-24"",""End_Date"":""6-Jan-25"",""Currency"":""INR"",""Ticket"":""349909"",""Name"":""Palsson, Master. Gosta Leonard"",""Sex"":""male""}"
"List(6, Drinker, Karjala, Hari, 33, Army, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23, 6-Mar-23, INR, 330877, Moran, Mr. James, male)","{""Id"":6,""Nick_Name"":""Drinker"",""First_Name"":""Karjala"",""Last_Name"":""Hari"",""Age"":33,""Type"":""Army"",""Description"":""Suzuki Swift"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Mar-23"",""Start_Date"":""7-Feb-23"",""End_Date"":""6-Mar-23"",""Currency"":""INR"",""Ticket"":""330877"",""Name"":""Moran, Mr. James"",""Sex"":""male""}"
"List(9, Politician, Devineni, Umesh, 58, Senior, Creta, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23, 6-Apr-23, INR, 347742, Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg), female)","{""Id"":9,""Nick_Name"":""Politician"",""First_Name"":""Devineni"",""Last_Name"":""Umesh"",""Age"":58,""Type"":""Senior"",""Description"":""Creta"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Apr-23"",""Start_Date"":""7-Mar-23"",""End_Date"":""6-Apr-23"",""Currency"":""INR"",""Ticket"":""347742"",""Name"":""Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)"",""Sex"":""female""}"
"List(10, Minister, Ponguru, Narayana, 56, Education, Brezza, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23, 6-Apr-23, INR, 237736, Nasser, Mrs. Nicholas (Adele Achem), female)","{""Id"":10,""Nick_Name"":""Minister"",""First_Name"":""Ponguru"",""Last_Name"":""Narayana"",""Age"":56,""Type"":""Education"",""Description"":""Brezza"",""Commodity_Index"":""DISCOUNT"",""Sensex_Category"":""Top"",""Label_Type"":""average"",""Effective_Date"":""6-Apr-23"",""Start_Date"":""7-Mar-23"",""End_Date"":""6-Apr-23"",""Currency"":""INR"",""Ticket"":""237736"",""Name"":""Nasser, Mrs. Nicholas (Adele Achem)"",""Sex"":""female""}"


In [0]:
schema = StructType([StructField('Id', IntegerType(), False),
                     StructField('Nick_Name', StringType(), False),
                     StructField('First_Name', StringType(), False),
                     StructField('Last_Name', StringType(), False),
                     StructField('Age', IntegerType(), False),
                     StructField('Type', StringType(), False),
                     StructField('Description', StringType(), False),
                     StructField('Commodity_Index', StringType(), False),
                     StructField('Sensex_Category', StringType(), False),
                     StructField('Label_Type', StringType(), False),
                     StructField('Effective_Date', StringType(), False),
                     StructField('Start_Date', StringType(), False)]
                    )

In [0]:
# Apply the from_json function on the JSON string column
df_final = df_final.select(f.from_json(f.col('sales_msg_json'), schema).alias('kafka_msg'))
display(df_final.limit(10))

kafka_msg
"List(1, admin, John, Victor, 30, Grade1, Baleno, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23)"
"List(7, Army, Koyi, Damodar, 37, Bettalian, Wagon R, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24)"
"List(2, everest, Paul, Irish, 35, Grade2, Engine_Base, DISCOUNT, Top, average, 6-Feb-23, 14-Jan-23)"
"List(5, Builder, Golla, Rajasekar, 43, Builder, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23)"
"List(3, moon, Erram, Rammohan, 29, Enginner1, Baleno, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23)"
"List(4, service, Stalin, Rajesh, 40, Minister, Engine_Base, DISCOUNT, Top, average, 8-Jan-24, 7-Oct-23)"
"List(8, Marketing, Vemparla, Harish, 55, Manager, Engine_Base, DISCOUNT, Top, average, 6-Jan-25, 9-Jan-24)"
"List(6, Drinker, Karjala, Hari, 33, Army, Suzuki Swift, DISCOUNT, Top, average, 6-Mar-23, 7-Feb-23)"
"List(9, Politician, Devineni, Umesh, 58, Senior, Creta, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23)"
"List(10, Minister, Ponguru, Narayana, 56, Education, Brezza, DISCOUNT, Top, average, 6-Apr-23, 7-Mar-23)"
