In [1]:
from pyspark.sql import SparkSession

In [2]:
spSession = SparkSession.builder.master('local').appName('appSparkSql').getOrCreate()

In [3]:
dfEmployees = spSession.read.json('aux/datasets/employees.json')

In [4]:
dfEmployees.show(5)

+---+------+------+----------------+------+
|age|gender|id_dep|            name|salary|
+---+------+------+----------------+------+
| 42|     M|     1|  Gilmar Rezende|  5100|
| 50|     M|     2|  Matias Tavares|  8500|
| 36|     M|     1|   Paulo Miranda|  9700|
| 41|     F|     1|Ana Paula Soares|  9500|
| 34|     F|     2|   Carolina Maia|  6500|
+---+------+------+----------------+------+



**Registering the dataframe as a temporary table**

In [5]:
dfEmployees.createOrReplaceTempView('tt_employees')

In [6]:
spSession.sql("SELECT * FROM tt_employees WHERE salary = 9700").show()

+---+------+------+-------------+------+
|age|gender|id_dep|         name|salary|
+---+------+------+-------------+------+
| 36|     M|     1|Paulo Miranda|  9700|
+---+------+------+-------------+------+



**Creating a temporary table**

In [7]:
sqlContext.registerDataFrameAsTable(dfEmployees, 'tt_employees')

*type(tt_employees)* will display an error, because it is not a object, it is stored in memory.

**Persisting the temporary table**

In [8]:
dfEmployeesTT = spSession.table('tt_employees')

In [9]:
type(dfEmployeesTT)

pyspark.sql.dataframe.DataFrame

**Comparing the dataframe with the persisted temporary table**

In [10]:
sorted(dfEmployees.collect()) == sorted(dfEmployeesTT.collect())

True

**Filtering a dataframe created from a temporary table**

In [11]:
sqlContext.registerDataFrameAsTable(dfEmployees, 'tt_employees')

In [12]:
dfEmployeesTT = spSession.table('tt_employees')

In [13]:
dfEmployeesTT.filter('age = 42').first()

Row(age='42', gender='M', id_dep='1', name='Gilmar Rezende', salary='5100')

**Droping the temporary table**

In [14]:
sqlContext.dropTempTable('tt_employees')