In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("DataframSql").getOrCreate()

In [2]:
from pyspark.sql.types import *

data = [("Giuseppe", "M", 23, 174, 70.5),
        ("Antonio", "M", 25, 179, 68.),
        ("Lorenzo", "M", 33, 172, 88.5),
        ("Luisa", "F", 48, 155, 50.2),
        ("Margheria", "F", 35, 165, 54.3)]

df = spark.createDataFrame(data, ["name", "gender", "age", "height", "weight"])
df.show()

+---------+------+---+------+------+
|     name|gender|age|height|weight|
+---------+------+---+------+------+
| Giuseppe|     M| 23|   174|  70.5|
|  Antonio|     M| 25|   179|  68.0|
|  Lorenzo|     M| 33|   172|  88.5|
|    Luisa|     F| 48|   155|  50.2|
|Margheria|     F| 35|   165|  54.3|
+---------+------+---+------+------+



In [3]:
df.createTempView("people")

In [4]:
df.createOrReplaceTempView("people")

In [5]:
df_sql = spark.sql("SELECT name FROM people")
df_sql.show()

+---------+
|     name|
+---------+
| Giuseppe|
|  Antonio|
|  Lorenzo|
|    Luisa|
|Margheria|
+---------+



In [6]:
spark.sql("Select name, height FROM people where gender='M'").show()

+--------+------+
|    name|height|
+--------+------+
|Giuseppe|   174|
| Antonio|   179|
| Lorenzo|   172|
+--------+------+



In [7]:
spark.sql("SELECT * FROM people ORDER BY weight").show()

+---------+------+---+------+------+
|     name|gender|age|height|weight|
+---------+------+---+------+------+
|    Luisa|     F| 48|   155|  50.2|
|Margheria|     F| 35|   165|  54.3|
|  Antonio|     M| 25|   179|  68.0|
| Giuseppe|     M| 23|   174|  70.5|
|  Lorenzo|     M| 33|   172|  88.5|
+---------+------+---+------+------+



In [8]:
spark.sql("SELECT * FROM people ORDER BY weight DESC").show()

+---------+------+---+------+------+
|     name|gender|age|height|weight|
+---------+------+---+------+------+
|  Lorenzo|     M| 33|   172|  88.5|
| Giuseppe|     M| 23|   174|  70.5|
|  Antonio|     M| 25|   179|  68.0|
|Margheria|     F| 35|   165|  54.3|
|    Luisa|     F| 48|   155|  50.2|
+---------+------+---+------+------+



In [10]:
spark.sql("SELECT * FROM people ORDER BY weight LIMIT 3").show()

+---------+------+---+------+------+
|     name|gender|age|height|weight|
+---------+------+---+------+------+
|    Luisa|     F| 48|   155|  50.2|
|Margheria|     F| 35|   165|  54.3|
|  Antonio|     M| 25|   179|  68.0|
+---------+------+---+------+------+



In [11]:
spark.sql("SELECT COUNT(*) FROM people WHERE gender = 'M' AND height > 175").show()

+--------+
|count(1)|
+--------+
|       1|
+--------+



In [12]:
spark.sql("SELECT COUNT(*) AS counter FROM people WHERE gender = 'M' AND height > 175").show()

+-------+
|counter|
+-------+
|      1|
+-------+



In [15]:
spark.sql("SELECT avg(weight) AS avg_weight FROM people WHERE gender = 'M'").show()

+-----------------+
|       avg_weight|
+-----------------+
|75.66666666666667|
+-----------------+



In [16]:
spark.sql("SELECT max(weight) AS max_weight, min(weight) AS min_weight FROM people WHERE gender = 'M'").show()

+----------+----------+
|max_weight|min_weight|
+----------+----------+
|      88.5|      68.0|
+----------+----------+



In [18]:
spark.sql("SELECT gender, round(avg(weight), 2), max(weight), min(weight)  FROM people GROUP BY gender").show()

+------+---------------------+-----------+-----------+
|gender|round(avg(weight), 2)|max(weight)|min(weight)|
+------+---------------------+-----------+-----------+
|     F|                52.25|       54.3|       50.2|
|     M|                75.67|       88.5|       68.0|
+------+---------------------+-----------+-----------+



In [23]:
from pyspark.sql import DataFrameWriter

df.write.mode('overwrite').saveAsTable("people_perm")

In [24]:
spark.sql("SELECT * FROM people_perm").show()

+---------+------+---+------+------+
|     name|gender|age|height|weight|
+---------+------+---+------+------+
|    Luisa|     F| 48|   155|  50.2|
|Margheria|     F| 35|   165|  54.3|
| Giuseppe|     M| 23|   174|  70.5|
|  Antonio|     M| 25|   179|  68.0|
|  Lorenzo|     M| 33|   172|  88.5|
+---------+------+---+------+------+

