# Pyspark Dataframes
- Filter Operations
- Specific operation (e.g., & || == ~)

In [1]:
import pyspark
import pandas as pd
from pyspark.sql import SparkSession

In [2]:
pizza_dataset_path = "../datasets/pizza_sales/order_details.csv"

In [3]:
spark = SparkSession.builder.appName('Learning Spark').getOrCreate()
spark

In [4]:
data = spark.read.option('header', 'true').csv(pizza_dataset_path, inferSchema=True)

In [5]:
data.show()

+----------------+--------+--------------+--------+
|order_details_id|order_id|      pizza_id|quantity|
+----------------+--------+--------------+--------+
|               1|       1|    hawaiian_m|       1|
|               2|       2| classic_dlx_m|       1|
|               3|       2| five_cheese_l|       1|
|               4|       2|   ital_supr_l|       1|
|               5|       2|    mexicana_m|       1|
|               6|       2|    thai_ckn_l|       1|
|               7|       3|   ital_supr_m|       1|
|               8|       3|  prsc_argla_l|       1|
|               9|       4|   ital_supr_m|       1|
|              10|       5|   ital_supr_m|       1|
|              11|       6|     bbq_ckn_s|       1|
|              12|       6|   the_greek_s|       1|
|              13|       7|spinach_supr_s|       1|
|              14|       8|spinach_supr_s|       1|
|              15|       9| classic_dlx_s|       1|
|              16|       9|green_garden_s|       1|
|           

In [6]:
### quantity more than 1
data.filter('quantity>=2').show()

+----------------+--------+--------------+--------+
|order_details_id|order_id|      pizza_id|quantity|
+----------------+--------+--------------+--------+
|              49|      17|mediterraneo_m|       2|
|             184|      78|  spicy_ital_l|       3|
|             212|      91|     bbq_ckn_l|       2|
|             223|      92|   the_greek_m|       2|
|             246|     105|    hawaiian_s|       2|
|             342|     144|     bbq_ckn_m|       3|
|             445|     189|    big_meat_s|       2|
|             457|     193| ckn_alfredo_l|       2|
|             612|     271|    cali_ckn_m|       2|
|             613|     271| ckn_alfredo_m|       2|
|             614|     271|   ckn_pesto_l|       2|
|             661|     291| ckn_alfredo_m|       2|
|             755|     330| four_cheese_l|       2|
|             803|     350|   pepperoni_m|       2|
|             826|     361|    big_meat_s|       2|
|             891|     392|   pepperoni_s|       2|
|           

In [7]:
data.filter('quantity>=2').select(['pizza_id', 'quantity']).show()

+--------------+--------+
|      pizza_id|quantity|
+--------------+--------+
|mediterraneo_m|       2|
|  spicy_ital_l|       3|
|     bbq_ckn_l|       2|
|   the_greek_m|       2|
|    hawaiian_s|       2|
|     bbq_ckn_m|       3|
|    big_meat_s|       2|
| ckn_alfredo_l|       2|
|    cali_ckn_m|       2|
| ckn_alfredo_m|       2|
|   ckn_pesto_l|       2|
| ckn_alfredo_m|       2|
| four_cheese_l|       2|
|   pepperoni_m|       2|
|    big_meat_s|       2|
|   pepperoni_s|       2|
|   pepperoni_m|       3|
| five_cheese_l|       2|
|    big_meat_s|       2|
|    cali_ckn_m|       2|
+--------------+--------+
only showing top 20 rows



In [8]:
data.filter(data['quantity'] >= 2).show()

+----------------+--------+--------------+--------+
|order_details_id|order_id|      pizza_id|quantity|
+----------------+--------+--------------+--------+
|              49|      17|mediterraneo_m|       2|
|             184|      78|  spicy_ital_l|       3|
|             212|      91|     bbq_ckn_l|       2|
|             223|      92|   the_greek_m|       2|
|             246|     105|    hawaiian_s|       2|
|             342|     144|     bbq_ckn_m|       3|
|             445|     189|    big_meat_s|       2|
|             457|     193| ckn_alfredo_l|       2|
|             612|     271|    cali_ckn_m|       2|
|             613|     271| ckn_alfredo_m|       2|
|             614|     271|   ckn_pesto_l|       2|
|             661|     291| ckn_alfredo_m|       2|
|             755|     330| four_cheese_l|       2|
|             803|     350|   pepperoni_m|       2|
|             826|     361|    big_meat_s|       2|
|             891|     392|   pepperoni_s|       2|
|           

In [10]:
# Not function
data.filter(~(data['quantity'] >= 2)).show()

+----------------+--------+--------------+--------+
|order_details_id|order_id|      pizza_id|quantity|
+----------------+--------+--------------+--------+
|               1|       1|    hawaiian_m|       1|
|               2|       2| classic_dlx_m|       1|
|               3|       2| five_cheese_l|       1|
|               4|       2|   ital_supr_l|       1|
|               5|       2|    mexicana_m|       1|
|               6|       2|    thai_ckn_l|       1|
|               7|       3|   ital_supr_m|       1|
|               8|       3|  prsc_argla_l|       1|
|               9|       4|   ital_supr_m|       1|
|              10|       5|   ital_supr_m|       1|
|              11|       6|     bbq_ckn_s|       1|
|              12|       6|   the_greek_s|       1|
|              13|       7|spinach_supr_s|       1|
|              14|       8|spinach_supr_s|       1|
|              15|       9| classic_dlx_s|       1|
|              16|       9|green_garden_s|       1|
|           

In [9]:
data.filter((data['quantity'] >= 2) &
           (data['order_details_id'] > 1000)).show()

+----------------+--------+--------------+--------+
|order_details_id|order_id|      pizza_id|quantity|
+----------------+--------+--------------+--------+
|            1021|     443|    cali_ckn_m|       2|
|            1101|     479| ital_cpcllo_l|       2|
|            1188|     522| four_cheese_l|       2|
|            1190|     522| ital_cpcllo_m|       2|
|            1205|     529|    cali_ckn_m|       2|
|            1209|     531|peppr_salami_m|       2|
|            1277|     567| five_cheese_l|       2|
|            1427|     635|    cali_ckn_s|       2|
|            1525|     678|  spicy_ital_m|       2|
|            1624|     724|  southw_ckn_l|       2|
|            1657|     740| five_cheese_l|       2|
|            1659|     740| ital_cpcllo_l|       2|
|            1745|     776|    thai_ckn_l|       2|
|            1784|     792|  spin_pesto_l|       2|
|            1800|     800|   calabrese_l|       2|
|            1804|     800|  southw_ckn_s|       2|
|           