In [0]:
from pyspark.sql import functions as F  
from pyspark.sql.types import StringType, StructType, StructField


In [0]:
# read multiple file with filename in new column 

path=r'dbfs:/FileStore/shared_uploads/ayushmaurya15398@gmail.com/multiple_files/*.csv'
df= spark.read.format("csv")\
    .option("header","true")\
        .option("inferSchema","true").load(path)
df=spark.read.csv(path,header=True)
columns_mapping={'state': F.split(F.substring_index( F.input_file_name(),'/',-1),'\.')[0]\
    ,'file_path': F.input_file_name()}
df=df.withColumns(columns_mapping)
df.show(truncate =False)

+----------+---------+---+------+---------+--------------------------------------------------------------------------------------+
|first_name|last_name|age|sex   |state    |file_path                                                                             |
+----------+---------+---+------+---------+--------------------------------------------------------------------------------------+
|Rahul     |Kumar    |28 |Male  |karnataka|dbfs:/FileStore/shared_uploads/ayushmaurya15398@gmail.com/multiple_files/karnataka.csv|
|Priya     |Sharma   |35 |Female|karnataka|dbfs:/FileStore/shared_uploads/ayushmaurya15398@gmail.com/multiple_files/karnataka.csv|
|Suresh    |Patil    |42 |Male  |karnataka|dbfs:/FileStore/shared_uploads/ayushmaurya15398@gmail.com/multiple_files/karnataka.csv|
|Amit      |Sharma   |28 |Male  |rajasthan|dbfs:/FileStore/shared_uploads/ayushmaurya15398@gmail.com/multiple_files/rajasthan.csv|
|Neha      |Verma    |35 |Female|rajasthan|dbfs:/FileStore/shared_uploads/ayushmaur

In [0]:
# Union DataFrames with different schemas

data1 = [("John", 25, "USA"),
         ("Alice", 30, "Canada"),
         ("Bob", 22, "UK")]
columns1 = ["Name", "Age", "Country"]
df1 = spark.createDataFrame(data1, columns1)

data2 = [("Mike", "Brazil"),
         ("Sara", "Australia")]
columns2 = ["Name", "Country"]
df2 = spark.createDataFrame(data2, columns2)

# Add missing columns to df2 to match df1 schema
for col in set(df1.columns) - set(df2.columns):
    df2 = df2.withColumn(col, F.lit(None))

union_df = df1.unionByName(df2)

union_df.show()


+-----+----+---------+
| Name| Age|  Country|
+-----+----+---------+
| John|  25|      USA|
|Alice|  30|   Canada|
|  Bob|  22|       UK|
| Mike|null|   Brazil|
| Sara|null|Australia|
+-----+----+---------+



In [0]:
#merge schema true example in Pyspark

data1 = [("John", 25, "USA"),
         ("Alice", 30, "Canada"),
         ("Bob", 22, "UK")]
columns1 = ["Name", "Age", "Country"]
df1 = spark.createDataFrame(data1, columns1)

data2 = [("Mike", "Brazil"),
         ("Sara", "Australia")]
columns2 = ["Name", "Country"]
df2 = spark.createDataFrame(data2, columns2)

df1.write.option("mergeSchema", "true").saveAsTable("merge_table")

In [0]:
%sql
select * from merge_table

Name,Age,Country
Alice,30,Canada
John,25,USA
Bob,22,UK


In [0]:
df2.write.option("mergeSchema", "true").mode("append").saveAsTable("merge_table")

In [0]:
%sql
select * from merge_table

Name,Age,Country
Alice,30.0,Canada
John,25.0,USA
Bob,22.0,UK
Sara,,Australia
Mike,,Brazil


<h2> Struct col </h2>

In [0]:
# Make a new Struct column (similar to Python's `dict()`) – F.struct(*cols)
df = df.withColumn('my_struct', F.struct(F.col('first_name'), F.col('state')))
df.show()
# Get item from struct by key – col.getField(str)
df.withColumn('first_name_name', F.col('my_struct').getField('first_name')).\
    withColumn('first_name_state', F.col('my_struct').getField('state')).show()

+----------+---------+---+------+---------+--------------------+-------------------+
|first_name|last_name|age|   sex|    state|           file_path|          my_struct|
+----------+---------+---+------+---------+--------------------+-------------------+
|     Rahul|    Kumar| 28|  Male|karnataka|dbfs:/FileStore/s...| {Rahul, karnataka}|
|     Priya|   Sharma| 35|Female|karnataka|dbfs:/FileStore/s...| {Priya, karnataka}|
|    Suresh|    Patil| 42|  Male|karnataka|dbfs:/FileStore/s...|{Suresh, karnataka}|
|      Amit|   Sharma| 28|  Male|rajasthan|dbfs:/FileStore/s...|  {Amit, rajasthan}|
|      Neha|    Verma| 35|Female|rajasthan|dbfs:/FileStore/s...|  {Neha, rajasthan}|
|    Rajesh|    Singh| 42|  Male|rajasthan|dbfs:/FileStore/s...|{Rajesh, rajasthan}|
|   Sandeep|  Mohanty| 28|  Male|   odisha|dbfs:/FileStore/s...|  {Sandeep, odisha}|
|  Priyanka|     Sahu| 35|Female|   odisha|dbfs:/FileStore/s...| {Priyanka, odisha}|
+----------+---------+---+------+---------+--------------------+-