In [0]:
# Importing necessary libraries
from pyspark.sql import SparkSession
 
# Create a spark session
spark = SparkSession.builder.appName('pyspark - example join').getOrCreate()
 
# Create data in dataframe
data = [(('Ram'), '1991-04-01', 'M', 3000),
        (('Mike'), '2000-05-19', 'M', 4000),
        (('Rohini'), '1978-09-05', 'M', 4000),
        (('Maria'), '1967-12-01', 'F', 4000),
        (('Jenis'), '1980-02-17', 'F', 1200)]
 
# Column names in dataframe
columns = ["Name", "DOB", "Gender", "salary"]
 
# Create the spark dataframe
df = spark.createDataFrame(data=data,
                           schema=columns)
df.withColumnRenamed("DOB","date of birth").show()
df.withColumnRenamed("DOB","date of birth").withColumnRenamed("Name","personname").show()


+------+-------------+------+------+
|  Name|date of birth|Gender|salary|
+------+-------------+------+------+
|   Ram|   1991-04-01|     M|  3000|
|  Mike|   2000-05-19|     M|  4000|
|Rohini|   1978-09-05|     M|  4000|
| Maria|   1967-12-01|     F|  4000|
| Jenis|   1980-02-17|     F|  1200|
+------+-------------+------+------+

+----------+-------------+------+------+
|personname|date of birth|Gender|salary|
+----------+-------------+------+------+
|       Ram|   1991-04-01|     M|  3000|
|      Mike|   2000-05-19|     M|  4000|
|    Rohini|   1978-09-05|     M|  4000|
|     Maria|   1967-12-01|     F|  4000|
|     Jenis|   1980-02-17|     F|  1200|
+----------+-------------+------+------+



In [0]:
# Importing necessary libraries using select exp
from pyspark.sql import SparkSession
 
# Create a spark session
spark = SparkSession.builder.appName('pyspark - example join').getOrCreate()
 
# Create data in dataframe
data = [(('Ram'), '1991-04-01', 'M', 3000),
        (('Mike'), '2000-05-19', 'M', 4000),
        (('Rohini'), '1978-09-05', 'M', 4000),
        (('Maria'), '1967-12-01', 'F', 4000),
        (('Jenis'), '1980-02-17', 'F', 1200)]
 
# Column names in dataframe
columns = ["Name", "DOB", "Gender", "salary"]
 
# Create the spark dataframe
df = spark.createDataFrame(data=data,
                           schema=columns)


data = df.selectExpr("Gender as category","DOB","Name as name","salary")
 
data.show()


+--------+----------+------+------+
|category|       DOB|  name|salary|
+--------+----------+------+------+
|       M|1991-04-01|   Ram|  3000|
|       M|2000-05-19|  Mike|  4000|
|       M|1978-09-05|Rohini|  4000|
|       F|1967-12-01| Maria|  4000|
|       F|1980-02-17| Jenis|  1200|
+--------+----------+------+------+



In [0]:
from pyspark.sql.functions import col
 
# Select the 'salary' as 'Amount' using aliasing
# Select remaining with their original name
data = df.select(col("Name"),col("DOB"),
                 col("Gender"),
                 col("salary").alias('Amount'))
data.show()

+------+----------+------+------+
|  Name|       DOB|Gender|Amount|
+------+----------+------+------+
|   Ram|1991-04-01|     M|  3000|
|  Mike|2000-05-19|     M|  4000|
|Rohini|1978-09-05|     M|  4000|
| Maria|1967-12-01|     F|  4000|
| Jenis|1980-02-17|     F|  1200|
+------+----------+------+------+

