In [None]:
'''
Problem - 
Load and Transform Data
You need to process a customer dataset to identify high-value customers. Specifically, you will:

Read data from a CSV file with inferSchema option as true.
Filter customers with a purchase amount more than 100 USD.
Further filter to include only customers aged 30 or above.
Use display(df) to show the final DataFrame.
Input
File Path:
/datasets/customers.csv

Schema:

Column	Type
customer_id	integer
name	string
email	string
age	integer
purchase_amount	double
Example Data:

customer_id	name	email	age	purchase_amount
1	Alice Johnson	alice@email.com	25	150.50
2	Bob Smith	bob@email.com	32	200.00
3	Charlie Brown	charlie@email.com	29	75.00
4	Diana Prince	diana@email.com	40	120.00
5	Evan Davis	evan@email.com	35	90.00
Output
Call display function:
Use display(df) to show the final DataFrame. Make sure the schema is correct.

Schema:

Column	Type
customer_id	integer
name	string
purchase_amount	double
Example Data:

customer_id	name	purchase_amount
2	Bob Smith	200.00
4	Diana Prince	120.00
Explanation:

The output DataFrame df_result includes customers who have a purchase amount of at least 100 USD and are aged 30 or above.
In the example, Bob Smith and Diana Prince meet these criteria, so they are included in the result.
'''

# Initialize Spark session
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
spark = SparkSession.builder.appName('Spark Playground').getOrCreate()

df_result = (
  spark.read
  .option("inferSchema", True)
  .option("header", True)
  .csv("/datasets/customers.csv")
  .filter(col("purchase_amount") > 100)
  .filter(col("age") > 30)
  .select("customer_id", "name", "purchase_amount")
)

# Display the final DataFrame using the display() function.
display(df_result)

'''
Bonus Challenge: Can you solve this using Spark SQL and temporary views?
df = df_result = (
  spark.read
  .option("inferSchema", True)
  .option("header", True)
  .csv("/datasets/customers.csv")
)

df.createOrReplaceTempView("customers")
df_result = spark.sql("""
SELECT customer_id, name, purchase_amount
FROM customers
WHERE purchase_amount > 100
AND age > 30
"""
)
'''