In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType, IntegerType

In [0]:
spark = SparkSession.builder \
          .appName('SparkByExamples.com') \
          .getOrCreate()

In [0]:
simpleData = [("James","34","true","M","3000.6089"),
    ("Michael","33","true","F","3300.8067"),
    ("Robert","37","false","M","5000.5034")
  ]
columns = ["firstname","age","isGraduated","gender","salary"]

In [0]:
df = spark.createDataFrame(data=simpleData, schema=columns)
df.printSchema()
df.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: string (nullable = true)

+---------+---+-----------+------+---------+
|firstname|age|isGraduated|gender|salary   |
+---------+---+-----------+------+---------+
|James    |34 |true       |M     |3000.6089|
|Michael  |33 |true       |F     |3300.8067|
|Robert   |37 |false      |M     |5000.5034|
+---------+---+-----------+------+---------+



In [0]:
from pyspark.sql.functions import col,round,expr
df.withColumn("salary",df.salary.cast('double')).printSchema()    
df.withColumn("salary",df.salary.cast(DoubleType())).printSchema()    
df.withColumn("salary",col("salary").cast('double')).printSchema()    

#df.withColumn("salary",round(df.salary.cast(DoubleType()),2)).show(truncate=False).printSchema()    


root
 |-- firstname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: double (nullable = true)

root
 |-- firstname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: double (nullable = true)

root
 |-- firstname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: double (nullable = true)



In [0]:
df.selectExpr("firstname","isGraduated","cast(salary as double) salary").printSchema()    

df.createOrReplaceTempView("CastExample")
spark.sql("SELECT firstname,isGraduated,DOUBLE(salary) as salary from CastExample").printSchema()

root
 |-- firstname: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- salary: double (nullable = true)

root
 |-- firstname: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- salary: double (nullable = true)

