In [1]:
from pyspark.sql import SparkSession

spark = SparkSession \
        .builder \
        .appName("myapp") \
        .master("local") \
        .config("spark.executor.memory", "1g") \
        .config("spark.mongodb.input.uri","mongodb://172.17.0.2:27017") \
        .config("spark.mongodb.output.uri","mongodb://172.17.0.2:27017") \
        .config("spark.jars.packages","org.mongodb.spark:mongo-spark-connector_2.12:3.0.0") \
        .enableHiveSupport() \
        .getOrCreate()

In [2]:
conf = spark.sparkContext.getConf()
print("# spark.app.name = ", conf.get("spark.app.name"))
print("# spark.master = ", conf.get("spark.master"))
print("# spark.executor.memory = ", conf.get("spark.executor.memory"))
print("# spark.sql.warehouse.dir = ", conf.get("spark.sql.warehouse.dir"))
print("# spark.sql.catalogImplementation = ", conf.get("spark.sql.catalogImplementation"))

# spark.app.name =  myapp
# spark.master =  local
# spark.executor.memory =  1g
# spark.sql.warehouse.dir =  file:/home/jovyan/HiveMetastore/spark-warehouse
# spark.sql.catalogImplementation =  hive


In [3]:
df = spark.read.format("mongo") \
               .option("database","test") \
               .option("collection","products") \
               .load()

In [4]:
df.write.mode("overwrite").save("products_new")

In [5]:
spark.sql("CREATE EXTERNAL TABLE external_products USING parquet LOCATION '/home/jovyan/HiveMetastore/products_new'")

DataFrame[]

In [6]:
spark.sql("SELECT * FROM external_products").show()

+---------+--------+--------------------+---------+--------------------+-------------+------------+-------------+--------------------+
|ListPrice|MakeFlag|           ModelName|ProductID|         ProductName|ProductNumber|StandardCost|SubCategoryID|                 _id|
+---------+--------+--------------------+---------+--------------------+-------------+------------+-------------+--------------------+
|   1431.5|       1|       HL Road Frame|      680|HL Road Frame - B...|   FR-R92B-58|     1059.31|           14|{6459758565a5eedc...|
|   1431.5|       1|       HL Road Frame|      706|HL Road Frame - R...|   FR-R92R-58|     1059.31|           14|{6459758565a5eedc...|
|    34.99|       0|           Sport-100|      707|Sport-100 Helmet,...|    HL-U509-R|     13.0863|           31|{6459758565a5eedc...|
|    34.99|       0|           Sport-100|      708|Sport-100 Helmet,...|      HL-U509|     13.0863|           31|{6459758565a5eedc...|
|      9.5|       0| Mountain Bike Socks|      709|Moun

In [7]:
spark.sql("CREATE TABLE ModelNames (ModelName string, ListPrice INT)")

DataFrame[]

In [8]:
spark.sql("INSERT OVERWRITE TABLE ModelNames SELECT ModelName, ListPrice FROM external_products")

DataFrame[]

In [9]:
spark.sql("SELECT * FROM ModelNames").show()

+--------------------+---------+
|           ModelName|ListPrice|
+--------------------+---------+
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|           Sport-100|       34|
|           Sport-100|       34|
| Mountain Bike Socks|        9|
| Mountain Bike Socks|        9|
|           Sport-100|       34|
|         Cycling Cap|        8|
|Long-Sleeve Logo ...|       49|
|Long-Sleeve Logo ...|       49|
|Long-Sleeve Logo ...|       49|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|Long-Sleeve Logo ...|       49|
|       LL Road Frame|      337|
|       LL Road Frame|      337|
|       LL Road Frame|      337|
+--------------------+---------+
only showing top 20 rows



In [10]:
spark.sql("DROP TABLE external_products")

DataFrame[]

In [11]:
spark.sql("SELECT * FROM ModelNames").show()

+--------------------+---------+
|           ModelName|ListPrice|
+--------------------+---------+
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|           Sport-100|       34|
|           Sport-100|       34|
| Mountain Bike Socks|        9|
| Mountain Bike Socks|        9|
|           Sport-100|       34|
|         Cycling Cap|        8|
|Long-Sleeve Logo ...|       49|
|Long-Sleeve Logo ...|       49|
|Long-Sleeve Logo ...|       49|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|       HL Road Frame|     1431|
|Long-Sleeve Logo ...|       49|
|       LL Road Frame|      337|
|       LL Road Frame|      337|
|       LL Road Frame|      337|
+--------------------+---------+
only showing top 20 rows

