# PySpark Spark SQL

##### Instalando PySpark

In [1]:
!pip install pyspark



In [2]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[*]').getOrCreate()

## Spark SQL - Consultas e Seleções

In [3]:
df = spark.sql('''select 'OK' as Status''')
df.show()

+------+
|Status|
+------+
|    OK|
+------+



## Importing Data

In [4]:
df = spark.read.csv('/content/drive/MyDrive/6. Estudos/1. FIAP/Fase 3 - Big Data/bases/cereal.csv', sep=',', inferSchema = True, header = True)
df.show()

+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|                name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|   rating|
+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|           100% Bran|  N|   C|      70|      4|  1|   130| 10.0|  5.0|     6|   280|      25|    3|   1.0|0.33|68.402973|
|   100% Natural Bran|  Q|   C|     120|      3|  5|    15|  2.0|  8.0|     8|   135|       0|    3|   1.0| 1.0|33.983679|
|            All-Bran|  K|   C|      70|      4|  1|   260|  9.0|  7.0|     5|   320|      25|    3|   1.0|0.33|59.425505|
|All-Bran with Ext...|  K|   C|      50|      4|  0|   140| 14.0|  8.0|     0|   330|      25|    3|   1.0| 0.5|93.704912|
|      Almond Delight|  R|   C|     110|      2|  2|   200|  1.0| 14.0|     8|    -1|      25|    3|   1.0|0.75|34.384843|
|Apple Cinnamon 

## Manipulation Data With Spark SQL

In [5]:
# cria uma tabela temporária dentro da Sessão
df.createOrReplaceTempView('cereal')

In [6]:
# base criada no SQL
cereal = spark.sql('''SELECT * FROM cereal WHERE type = 'C' ''')
cereal.show()

+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|                name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|   rating|
+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|           100% Bran|  N|   C|      70|      4|  1|   130| 10.0|  5.0|     6|   280|      25|    3|   1.0|0.33|68.402973|
|   100% Natural Bran|  Q|   C|     120|      3|  5|    15|  2.0|  8.0|     8|   135|       0|    3|   1.0| 1.0|33.983679|
|            All-Bran|  K|   C|      70|      4|  1|   260|  9.0|  7.0|     5|   320|      25|    3|   1.0|0.33|59.425505|
|All-Bran with Ext...|  K|   C|      50|      4|  0|   140| 14.0|  8.0|     0|   330|      25|    3|   1.0| 0.5|93.704912|
|      Almond Delight|  R|   C|     110|      2|  2|   200|  1.0| 14.0|     8|    -1|      25|    3|   1.0|0.75|34.384843|
|Apple Cinnamon 

In [7]:
df = df.where(df['type'] == 'C')
# base criada no Python
df.show()

+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|                name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|   rating|
+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|           100% Bran|  N|   C|      70|      4|  1|   130| 10.0|  5.0|     6|   280|      25|    3|   1.0|0.33|68.402973|
|   100% Natural Bran|  Q|   C|     120|      3|  5|    15|  2.0|  8.0|     8|   135|       0|    3|   1.0| 1.0|33.983679|
|            All-Bran|  K|   C|      70|      4|  1|   260|  9.0|  7.0|     5|   320|      25|    3|   1.0|0.33|59.425505|
|All-Bran with Ext...|  K|   C|      50|      4|  0|   140| 14.0|  8.0|     0|   330|      25|    3|   1.0| 0.5|93.704912|
|      Almond Delight|  R|   C|     110|      2|  2|   200|  1.0| 14.0|     8|    -1|      25|    3|   1.0|0.75|34.384843|
|Apple Cinnamon 

## Select no SparkSQL

In [8]:
df.printSchema()

root
 |-- name: string (nullable = true)
 |-- mfr: string (nullable = true)
 |-- type: string (nullable = true)
 |-- calories: integer (nullable = true)
 |-- protein: integer (nullable = true)
 |-- fat: integer (nullable = true)
 |-- sodium: integer (nullable = true)
 |-- fiber: double (nullable = true)
 |-- carbo: double (nullable = true)
 |-- sugars: integer (nullable = true)
 |-- potass: integer (nullable = true)
 |-- vitamins: integer (nullable = true)
 |-- shelf: integer (nullable = true)
 |-- weight: double (nullable = true)
 |-- cups: double (nullable = true)
 |-- rating: double (nullable = true)



In [9]:
df.createOrReplaceTempView('cereal')

In [10]:
cereal = spark.sql(''' SELECT DISTINCT type, mfr FROM cereal ''')
cereal.count()

6

## WHERE no Spark SQL

In [11]:
cereal = spark.sql(''' SELECT * FROM cereal WHERE mfr = 'K' ''')
cereal.count()

23

In [12]:
cereal = spark.sql(''' SELECT * FROM cereal WHERE calories = 100 ''')
cereal.count()

14

In [13]:
cereal = spark.sql(''' SELECT * FROM cereal WHERE mfr = 'K' AND calories >= 100 ''')
cereal.show()

+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|                name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|   rating|
+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|         Apple Jacks|  K|   C|     110|      2|  0|   125|  1.0| 11.0|    14|    30|      25|    2|   1.0| 1.0|33.174094|
|         Corn Flakes|  K|   C|     100|      2|  0|   290|  1.0| 21.0|     2|    35|      25|    1|   1.0| 1.0|45.863324|
|           Corn Pops|  K|   C|     110|      1|  0|    90|  1.0| 13.0|    12|    20|      25|    2|   1.0| 1.0|35.782791|
|  Cracklin' Oat Bran|  K|   C|     110|      3|  3|   140|  4.0| 10.0|     7|   160|      25|    3|   1.0| 0.5|40.448772|
|             Crispix|  K|   C|     110|      2|  0|   220|  1.0| 21.0|     3|    30|      25|    3|   1.0| 1.0|46.895644|
|         Froot 

## Group By

In [14]:
cereal = spark.sql(''' SELECT mfr, type, COUNT(*) AS total, SUM(calories) AS total_calories FROM cereal GROUP BY mfr, type ''')
cereal.show()

+---+----+-----+--------------+
|mfr|type|total|total_calories|
+---+----+-----+--------------+
|  P|   C|    9|           980|
|  K|   C|   23|          2500|
|  G|   C|   22|          2450|
|  Q|   C|    7|           660|
|  R|   C|    8|           920|
|  N|   C|    5|           420|
+---+----+-----+--------------+



## CASE WHEN

In [15]:
cereal = spark.sql(''' SELECT DISTINCT type FROM cereal ''')
cereal.show()

+----+
|type|
+----+
|   C|
+----+



In [16]:
cereal = spark.sql(''' SELECT mfr, type,
                              (CASE
                                WHEN type = 'C' then 'A'
                                WHEN type = 'H' then 'B'
                                else 'C' end) AS type_new,
                                -- comentários devem ser precedidos de dois tracinhos
                              count(*) AS total,
                              sum(calories) as total_calories
                              FROM cereal
                              GROUP BY mfr, type ''')
cereal.show()

+---+----+--------+-----+--------------+
|mfr|type|type_new|total|total_calories|
+---+----+--------+-----+--------------+
|  P|   C|       A|    9|           980|
|  K|   C|       A|   23|          2500|
|  G|   C|       A|   22|          2450|
|  Q|   C|       A|    7|           660|
|  R|   C|       A|    8|           920|
|  N|   C|       A|    5|           420|
+---+----+--------+-----+--------------+



# Consultas Avançadas em SQL usando o PySpark

In [17]:
df.show(5)

+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|                name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|   rating|
+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+
|           100% Bran|  N|   C|      70|      4|  1|   130| 10.0|  5.0|     6|   280|      25|    3|   1.0|0.33|68.402973|
|   100% Natural Bran|  Q|   C|     120|      3|  5|    15|  2.0|  8.0|     8|   135|       0|    3|   1.0| 1.0|33.983679|
|            All-Bran|  K|   C|      70|      4|  1|   260|  9.0|  7.0|     5|   320|      25|    3|   1.0|0.33|59.425505|
|All-Bran with Ext...|  K|   C|      50|      4|  0|   140| 14.0|  8.0|     0|   330|      25|    3|   1.0| 0.5|93.704912|
|      Almond Delight|  R|   C|     110|      2|  2|   200|  1.0| 14.0|     8|    -1|      25|    3|   1.0|0.75|34.384843|
+---------------

In [18]:
cereal = spark.sql('''
                  SELECT mfr,
                        type,
                        SUM(calories) AS sum_calories,
                        MIN(calories) AS min_calories,
                        MAX(calories) AS max_calories,
                        AVG(calories) AS avg_calories,
                        COUNT(distinct name) AS count_distinct_names,
                        COUNT(name) AS count_names
                  FROM cereal
                  GROUP BY mfr, type
                  ORDER BY mfr, type
                  ''')
cereal.show()

+---+----+------------+------------+------------+------------------+--------------------+-----------+
|mfr|type|sum_calories|min_calories|max_calories|      avg_calories|count_distinct_names|count_names|
+---+----+------------+------------+------------+------------------+--------------------+-----------+
|  G|   C|        2450|         100|         140|111.36363636363636|                  22|         22|
|  K|   C|        2500|          50|         160|108.69565217391305|                  23|         23|
|  N|   C|         420|          70|          90|              84.0|                   5|          5|
|  P|   C|         980|          90|         120|108.88888888888889|                   9|          9|
|  Q|   C|         660|          50|         120| 94.28571428571429|                   7|          7|
|  R|   C|         920|          90|         150|             115.0|                   8|          8|
+---+----+------------+------------+------------+------------------+--------------

In [19]:
cereal = spark.sql('''
                  SELECT mfr,
                        type,

                        SUM(calories) AS sum_calories,
                        MIN(calories) AS min_calories,
                        MAX(calories) AS max_calories,
                        cast (AVG(calories) AS decimal (10,2)) AS avg_calories,

                        SUM(carbo) AS sum_carbo,
                        MIN(carbo) AS min_carbo,
                        MAX(carbo) AS max_carbo,
                        cast (AVG(carbo) AS decimal (10,2)) AS avg_carbo,

                        SUM(vitamins) AS sum_vitamins,
                        MIN(vitamins) AS min_vitamins,
                        MAX(vitamins) AS max_vitamins,
                        cast (AVG(vitamins) AS decimal(10,2)) AS avg_vitamins,

                        COUNT(distinct name) AS count_distinct_names,
                        COUNT(name) AS count_names

                  FROM cereal
                  GROUP BY mfr, type
                  ORDER BY mfr, type
                  ''')
cereal.show()

+---+----+------------+------------+------------+------------+---------+---------+---------+---------+------------+------------+------------+------------+--------------------+-----------+
|mfr|type|sum_calories|min_calories|max_calories|avg_calories|sum_carbo|min_carbo|max_carbo|avg_carbo|sum_vitamins|min_vitamins|max_vitamins|avg_vitamins|count_distinct_names|count_names|
+---+----+------------+------------+------------+------------+---------+---------+---------+---------+------------+------------+------------+------------+--------------------+-----------+
|  G|   C|        2450|         100|         140|      111.36|    324.0|     10.5|     21.0|    14.73|         775|          25|         100|       35.23|                  22|         22|
|  K|   C|        2500|          50|         160|      108.70|    348.0|      7.0|     22.0|    15.13|         800|          25|         100|       34.78|                  23|         23|
|  N|   C|         420|          70|          90|       84.0

In [20]:
cereal = spark.sql('''
                  SELECT mfr,
                        type,
                        (CASE
                          WHEN mfr = 'G' then 'Abacaxi'
                          WHEN mfr = 'K' then 'Goiaba'
                          WHEN mfr = 'N' then 'Banana'
                          WHEN mfr = 'P' then 'Tomate'
                          WHEN mfr = 'Q' then 'Uva'
                          WHEN mfr = 'R' then 'Melancia'
                          ELSE 'NA'
                        END) as fruit_type,

                        SUM(calories) AS sum_calories,
                        MIN(calories) AS min_calories,
                        MAX(calories) AS max_calories,
                        cast (AVG(calories) AS decimal (10,2)) AS avg_calories,

                        SUM(carbo) AS sum_carbo,
                        MIN(carbo) AS min_carbo,
                        MAX(carbo) AS max_carbo,
                        cast (AVG(carbo) AS decimal (10,2)) AS avg_carbo,

                        SUM(vitamins) AS sum_vitamins,
                        MIN(vitamins) AS min_vitamins,
                        MAX(vitamins) AS max_vitamins,
                        cast (AVG(vitamins) AS decimal(10,2)) AS avg_vitamins,

                        COUNT(distinct name) AS count_distinct_names,
                        COUNT(name) AS count_names

                  FROM cereal
                  GROUP BY mfr, type
                  ORDER BY mfr, type
                  ''')
cereal.show()

+---+----+----------+------------+------------+------------+------------+---------+---------+---------+---------+------------+------------+------------+------------+--------------------+-----------+
|mfr|type|fruit_type|sum_calories|min_calories|max_calories|avg_calories|sum_carbo|min_carbo|max_carbo|avg_carbo|sum_vitamins|min_vitamins|max_vitamins|avg_vitamins|count_distinct_names|count_names|
+---+----+----------+------------+------------+------------+------------+---------+---------+---------+---------+------------+------------+------------+------------+--------------------+-----------+
|  G|   C|   Abacaxi|        2450|         100|         140|      111.36|    324.0|     10.5|     21.0|    14.73|         775|          25|         100|       35.23|                  22|         22|
|  K|   C|    Goiaba|        2500|          50|         160|      108.70|    348.0|      7.0|     22.0|    15.13|         800|          25|         100|       34.78|                  23|         23|
|  N|

## Joins

### Inner JOIN

In [22]:

sales = spark.read.csv('/content/drive/MyDrive/6. Estudos/1. FIAP/Fase 3 - Big Data/bases/sales_data_sample.csv', sep=',', inferSchema = True, header = True)
sales.show()

+-----------+---------------+---------+---------------+-------+---------------+-------+------+--------+-------+-----------+----+-----------+--------------------+----------------+--------------------+------------+-------------+--------+----------+---------+---------+---------------+----------------+--------+
|ORDERNUMBER|QUANTITYORDERED|PRICEEACH|ORDERLINENUMBER|  SALES|      ORDERDATE| STATUS|QTR_ID|MONTH_ID|YEAR_ID|PRODUCTLINE|MSRP|PRODUCTCODE|        CUSTOMERNAME|           PHONE|        ADDRESSLINE1|ADDRESSLINE2|         CITY|   STATE|POSTALCODE|  COUNTRY|TERRITORY|CONTACTLASTNAME|CONTACTFIRSTNAME|DEALSIZE|
+-----------+---------------+---------+---------------+-------+---------------+-------+------+--------+-------+-----------+----+-----------+--------------------+----------------+--------------------+------------+-------------+--------+----------+---------+---------+---------------+----------------+--------+
|      10107|             30|     95.7|              2| 2871.0| 2/24/2003

In [24]:
sales.createOrReplaceTempView('sales')

In [26]:
sales.printSchema()

root
 |-- ORDERNUMBER: integer (nullable = true)
 |-- QUANTITYORDERED: integer (nullable = true)
 |-- PRICEEACH: double (nullable = true)
 |-- ORDERLINENUMBER: integer (nullable = true)
 |-- SALES: double (nullable = true)
 |-- ORDERDATE: string (nullable = true)
 |-- STATUS: string (nullable = true)
 |-- QTR_ID: integer (nullable = true)
 |-- MONTH_ID: integer (nullable = true)
 |-- YEAR_ID: integer (nullable = true)
 |-- PRODUCTLINE: string (nullable = true)
 |-- MSRP: integer (nullable = true)
 |-- PRODUCTCODE: string (nullable = true)
 |-- CUSTOMERNAME: string (nullable = true)
 |-- PHONE: string (nullable = true)
 |-- ADDRESSLINE1: string (nullable = true)
 |-- ADDRESSLINE2: string (nullable = true)
 |-- CITY: string (nullable = true)
 |-- STATE: string (nullable = true)
 |-- POSTALCODE: string (nullable = true)
 |-- COUNTRY: string (nullable = true)
 |-- TERRITORY: string (nullable = true)
 |-- CONTACTLASTNAME: string (nullable = true)
 |-- CONTACTFIRSTNAME: string (nullable = tr

In [40]:
calendar = spark.sql('''
                SELECT DISTINCT orderdate, qtr_id, month_id, year_id
                FROM sales
                ORDER BY orderdate
                ''')

sales_data = spark.sql('''
                  SELECT DISTINCT ORDERNUMBER,
                                  CUSTOMERNAME,
                                  ORDERDATE,
                                  SALES,
                                  QUANTITYORDERED,
                                  PRODUCTCODE,
                                  ORDERLINENUMBER,
                                  PRICEEACH
                  FROM sales
                  ORDER BY ORDERNUMBER
                  ''')

customers = spark.sql('''
                  SELECT DISTINCT CUSTOMERNAME,
                                  PHONE,
                                  ADDRESSLINE1,
                                  ADDRESSLINE2,
                                  CITY,
                                  STATE,
                                  POSTALCODE,
                                  COUNTRY,
                                  TERRITORY

                  FROM sales
                  ORDER BY CUSTOMERNAME
                  ''')

sales_data.createOrReplaceTempView('sales_data')
calendar.createOrReplaceTempView('calendar')
customers.createOrReplaceTempView('customers')

In [41]:
calendar.count()

252

In [42]:
sales_data.count()

2823

In [43]:
customers.count()

92

In [44]:
calendar.show()

+--------------+------+--------+-------+
|     orderdate|qtr_id|month_id|year_id|
+--------------+------+--------+-------+
|1/10/2003 0:00|     1|       1|   2003|
|1/10/2005 0:00|     1|       1|   2005|
|1/12/2004 0:00|     1|       1|   2004|
|1/12/2005 0:00|     1|       1|   2005|
|1/15/2004 0:00|     1|       1|   2004|
|1/16/2004 0:00|     1|       1|   2004|
|1/19/2005 0:00|     1|       1|   2005|
| 1/2/2004 0:00|     1|       1|   2004|
|1/20/2005 0:00|     1|       1|   2005|
|1/22/2004 0:00|     1|       1|   2004|
|1/23/2005 0:00|     1|       1|   2005|
|1/26/2004 0:00|     1|       1|   2004|
|1/26/2005 0:00|     1|       1|   2005|
|1/29/2003 0:00|     1|       1|   2003|
|1/29/2004 0:00|     1|       1|   2004|
|1/31/2003 0:00|     1|       1|   2003|
|1/31/2005 0:00|     1|       1|   2005|
| 1/5/2005 0:00|     1|       1|   2005|
| 1/6/2003 0:00|     1|       1|   2003|
| 1/6/2005 0:00|     1|       1|   2005|
+--------------+------+--------+-------+
only showing top

In [45]:
sales_data.show()

+-----------+--------------------+--------------+-------+---------------+-----------+---------------+---------+
|ORDERNUMBER|        CUSTOMERNAME|     ORDERDATE|  SALES|QUANTITYORDERED|PRODUCTCODE|ORDERLINENUMBER|PRICEEACH|
+-----------+--------------------+--------------+-------+---------------+-----------+---------------+---------+
|      10100|Online Diecast Cr...| 1/6/2003 0:00| 3390.0|             50|   S18_2248|              2|     67.8|
|      10100|Online Diecast Cr...| 1/6/2003 0:00| 5151.0|             30|   S18_1749|              3|    100.0|
|      10100|Online Diecast Cr...| 1/6/2003 0:00|1689.03|             49|   S24_3969|              1|    34.47|
|      10100|Online Diecast Cr...| 1/6/2003 0:00|1903.22|             22|   S18_4409|              4|    86.51|
|      10101|Blauer See Auto, Co.| 1/9/2003 0:00| 1404.0|             45|   S24_1937|              3|     31.2|
|      10101|Blauer See Auto, Co.| 1/9/2003 0:00| 3782.0|             25|   S18_2325|              4|   

In [46]:
customers.show()

+--------------------+-----------------+--------------------+------------+--------------+----------+----------+---------+---------+
|        CUSTOMERNAME|            PHONE|        ADDRESSLINE1|ADDRESSLINE2|          CITY|     STATE|POSTALCODE|  COUNTRY|TERRITORY|
+--------------------+-----------------+--------------------+------------+--------------+----------+----------+---------+---------+
|      AV Stores, Co.|   (171) 555-1555|   Fauntleroy Circus|        NULL|    Manchester|      NULL|   EC2 5NT|       UK|     EMEA|
|        Alpha Cognac|       61.77.6555|1 rue Alsace-Lorr...|        NULL|      Toulouse|      NULL|     31000|   France|     EMEA|
|  Amica Models & Co.|      011-4988555| Via Monte Bianco 34|        NULL|        Torino|      NULL|     10100|    Italy|     EMEA|
|Anna's Decoration...|     02 9936 8555|   201 Miller Street|    Level 15|  North Sydney|       NSW|      2060|Australia|     APAC|
|   Atelier graphique|       40.32.2555|      54, rue Royale|        NULL|  

In [50]:
master = spark.sql('''
                  SELECT *
                  FROM sales_data s
                  INNER JOIN customers c ON s.CUSTOMERNAME = c.CUSTOMERNAME
                  ''')

master.show()

+-----------+--------------------+---------------+-------+---------------+-----------+---------------+---------+--------------------+---------------+--------------------+------------+-----------+-----+----------+---------+---------+
|ORDERNUMBER|        CUSTOMERNAME|      ORDERDATE|  SALES|QUANTITYORDERED|PRODUCTCODE|ORDERLINENUMBER|PRICEEACH|        CUSTOMERNAME|          PHONE|        ADDRESSLINE1|ADDRESSLINE2|       CITY|STATE|POSTALCODE|  COUNTRY|TERRITORY|
+-----------+--------------------+---------------+-------+---------------+-----------+---------------+---------+--------------------+---------------+--------------------+------------+-----------+-----+----------+---------+---------+
|      10275|   La Rochelle Gifts| 7/23/2004 0:00|4177.35|             45|   S10_1678|              1|    92.83|   La Rochelle Gifts|     40.67.8555|67, rue des Cinqu...|        NULL|     Nantes| NULL|     44000|   France|     EMEA|
|      10291|Scandinavian Gift...|  9/8/2004 0:00|7136.19|          

In [52]:
master_city = spark.sql('''
                  SELECT DISTINCT s.ORDERNUMBER, c.CITY
                  FROM sales_data s
                  INNER JOIN customers c ON s.CUSTOMERNAME = c.CUSTOMERNAME
                  ''')

master_city.show()

+-----------+-------------+
|ORDERNUMBER|         CITY|
+-----------+-------------+
|      10300|    Frankfurt|
|      10385|   San Rafael|
|      10241|   Strasbourg|
|      10182|   San Rafael|
|      10140|   Burlingame|
|      10153|       Madrid|
|      10293|       Torino|
|      10161|      Aaarhus|
|      10406|    Kobenhavn|
|      10414|       Boston|
|      10311|       Madrid|
|      10357|   San Rafael|
|      10195| White Plains|
|      10189|     Pasadena|
|      10422|    Allentown|
|      10111|San Francisco|
|      10204|          NYC|
|      10151|         Oulu|
|      10304|   Versailles|
|      10369|   Brickhaven|
+-----------+-------------+
only showing top 20 rows

