In [0]:
# Prepare data.
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
Data = [("James,A,Smith","36636","M",3000),
    ("Michael,Rose,Jones","40288","M",4000),
    ("Robert, B,Williams","42114","M",4000),
    ("Maria,Anne,Jones","39192","F",4000),
    ("Jen,Mary,Brown","","F",-1)
  ]
Schema = StructType([StructField("name",StringType(),True),
                     StructField("id",StringType(),True),
                     StructField("gender",StringType(),True),
                     StructField("salary",IntegerType(),True)])

In [0]:
# Create DataFrame.
df = spark.createDataFrame(data = Data, schema = Schema)
df.display()

name,id,gender,salary
"James,A,Smith",36636.0,M,3000
"Michael,Rose,Jones",40288.0,M,4000
"Robert, B,Williams",42114.0,M,4000
"Maria,Anne,Jones",39192.0,F,4000
"Jen,Mary,Brown",,F,-1


In [0]:
# Split Column name based on ",".
from pyspark.sql.functions import split,col
df_name = df.select("*",split(col("name"),",").alias("NameArray"))
df_name.display()

name,id,gender,salary,NameArray
"James,A,Smith",36636.0,M,3000,"List(James, A, Smith)"
"Michael,Rose,Jones",40288.0,M,4000,"List(Michael, Rose, Jones)"
"Robert, B,Williams",42114.0,M,4000,"List(Robert, B, Williams)"
"Maria,Anne,Jones",39192.0,F,4000,"List(Maria, Anne, Jones)"
"Jen,Mary,Brown",,F,-1,"List(Jen, Mary, Brown)"


In [0]:
# Add Firstname, Middlename, Lastname columns.
final_df = df_name.withColumn("FirstName",df_name.NameArray[0]) \
                  .withColumn("MiddleName",df_name.NameArray[1]) \
                  .withColumn("LastName",df_name.NameArray[2]).drop("NameArray") 
final_df.display()

name,id,gender,salary,FirstName,MiddleName,LastName
"James,A,Smith",36636.0,M,3000,James,A,Smith
"Michael,Rose,Jones",40288.0,M,4000,Michael,Rose,Jones
"Robert, B,Williams",42114.0,M,4000,Robert,B,Williams
"Maria,Anne,Jones",39192.0,F,4000,Maria,Anne,Jones
"Jen,Mary,Brown",,F,-1,Jen,Mary,Brown
