In [0]:
data = [
    {
        "customer_id": 1,
        "first_name": "Don",
        "last_name": "Ollander",
    },
    {
        "customer_id": 2,
        "first_name": "Jolee",
        "last_name": "Ziemen",
    },
    {
        "customer_id": 3,
        "first_name": "Maddie",
        "last_name": "Sewley",
    },
    {
        "customer_id": 4,
        "first_name": "Lolita",
        "last_name": "Meadway",
    },
    {
        "customer_id": 5,
        "first_name": "Kennedy",
        "last_name": "Debell",
    },
]

In [0]:
schema = ["customer_id", "first_name", "last_name"]

In [0]:
df = spark.createDataFrame(data=data, schema=schema)
df.show()

+-----------+----------+---------+
|customer_id|first_name|last_name|
+-----------+----------+---------+
|          1|       Don| Ollander|
|          2|     Jolee|   Ziemen|
|          3|    Maddie|   Sewley|
|          4|    Lolita|  Meadway|
|          5|   Kennedy|   Debell|
+-----------+----------+---------+



In [0]:
df.printSchema()

root
 |-- customer_id: long (nullable = true)
 |-- first_name: string (nullable = true)
 |-- last_name: string (nullable = true)



In [0]:
df.columns

Out[47]: ['customer_id', 'first_name', 'last_name']

In [0]:
from pyspark.sql.functions import concat_ws

df = df.withColumn("full_name", concat_ws(" ", df["first_name"], df["last_name"]))
df.show()

+-----------+----------+---------+--------------+
|customer_id|first_name|last_name|     full_name|
+-----------+----------+---------+--------------+
|          1|       Don| Ollander|  Don Ollander|
|          2|     Jolee|   Ziemen|  Jolee Ziemen|
|          3|    Maddie|   Sewley| Maddie Sewley|
|          4|    Lolita|  Meadway|Lolita Meadway|
|          5|   Kennedy|   Debell|Kennedy Debell|
+-----------+----------+---------+--------------+



In [0]:
df.createOrReplaceTempView("customer_view")

spark.sql(
    """
    SELECT * FROM customer_view;
    """
).show()

+-----------+----------+---------+--------------+
|customer_id|first_name|last_name|     full_name|
+-----------+----------+---------+--------------+
|          1|       Don| Ollander|  Don Ollander|
|          2|     Jolee|   Ziemen|  Jolee Ziemen|
|          3|    Maddie|   Sewley| Maddie Sewley|
|          4|    Lolita|  Meadway|Lolita Meadway|
|          5|   Kennedy|   Debell|Kennedy Debell|
+-----------+----------+---------+--------------+



In [0]:
from pyspark.sql.functions import col

df.filter(col("customer_id") > 2).show()

+-----------+----------+---------+--------------+
|customer_id|first_name|last_name|     full_name|
+-----------+----------+---------+--------------+
|          3|    Maddie|   Sewley| Maddie Sewley|
|          4|    Lolita|  Meadway|Lolita Meadway|
|          5|   Kennedy|   Debell|Kennedy Debell|
+-----------+----------+---------+--------------+



In [0]:
from pyspark.sql.functions import lit

df.select("*", lit("bronze").alias("rank")).show()

+-----------+----------+---------+--------------+------+
|customer_id|first_name|last_name|     full_name|  rank|
+-----------+----------+---------+--------------+------+
|          1|       Don| Ollander|  Don Ollander|bronze|
|          2|     Jolee|   Ziemen|  Jolee Ziemen|bronze|
|          3|    Maddie|   Sewley| Maddie Sewley|bronze|
|          4|    Lolita|  Meadway|Lolita Meadway|bronze|
|          5|   Kennedy|   Debell|Kennedy Debell|bronze|
+-----------+----------+---------+--------------+------+



In [0]:
df.withColumnRenamed("customer_id", "id").show()

+---+----------+---------+--------------+
| id|first_name|last_name|     full_name|
+---+----------+---------+--------------+
|  1|       Don| Ollander|  Don Ollander|
|  2|     Jolee|   Ziemen|  Jolee Ziemen|
|  3|    Maddie|   Sewley| Maddie Sewley|
|  4|    Lolita|  Meadway|Lolita Meadway|
|  5|   Kennedy|   Debell|Kennedy Debell|
+---+----------+---------+--------------+



In [0]:
df.printSchema()
df.withColumn("customer_id", col("customer_id").cast("string")).printSchema()

root
 |-- customer_id: long (nullable = true)
 |-- first_name: string (nullable = true)
 |-- last_name: string (nullable = true)
 |-- full_name: string (nullable = false)

root
 |-- customer_id: string (nullable = true)
 |-- first_name: string (nullable = true)
 |-- last_name: string (nullable = true)
 |-- full_name: string (nullable = false)



In [0]:
df.show()
df.drop("full_name").show()

+-----------+----------+---------+--------------+
|customer_id|first_name|last_name|     full_name|
+-----------+----------+---------+--------------+
|          1|       Don| Ollander|  Don Ollander|
|          2|     Jolee|   Ziemen|  Jolee Ziemen|
|          3|    Maddie|   Sewley| Maddie Sewley|
|          4|    Lolita|  Meadway|Lolita Meadway|
|          5|   Kennedy|   Debell|Kennedy Debell|
+-----------+----------+---------+--------------+

+-----------+----------+---------+
|customer_id|first_name|last_name|
+-----------+----------+---------+
|          1|       Don| Ollander|
|          2|     Jolee|   Ziemen|
|          3|    Maddie|   Sewley|
|          4|    Lolita|  Meadway|
|          5|   Kennedy|   Debell|
+-----------+----------+---------+

