In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

# spark = SparkSession.builder.master('local[2]').getOrCreate()

from pyspark.sql import Row

# warehouse_location points to the default location for managed databases and tables
warehouse_location = 'hdfs://hdfs-nn:9000/warehouse'

spark = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .enableHiveSupport() \
    .getOrCreate()

In [2]:
spark.sql(
    """
    SHOW TABLES FROM tabd_db
    """
).show()

+--------+--------------------+-----------+
|database|           tableName|isTemporary|
+--------+--------------------+-----------+
| tabd_db|dailyinternetusag...|      false|
| tabd_db|globalinternetusa...|      false|
| tabd_db|socialmediausebyp...|      false|
| tabd_db|socialmediausebyt...|      false|
| tabd_db|  socialnetworkusers|      false|
+--------+--------------------+-----------+



In [3]:
spark.sql(
    """
    SELECT *
    FROM tabd_db.SocialMediaUseByTypeInternetAdvertising
    
    """
).show()

+----+--------------------+----------+--------------------+-------+
|year|              region|percentage|    advertising_type|purpose|
+----+--------------------+----------+--------------------+-------+
|2013|European Union fr...|         0|Pay to advertise ...|   null|
|2013|European Union fr...|         0|Enterprises using...|   null|
|2013|European Union fr...|         0|Have a website an...|   null|
|2013|European Union fr...|         0|Have web sales to...|   null|
|2013|European Union fr...|         0|Use social networ...|   null|
|2013|European Union fr...|         0|Use enterprise bl...|   null|
|2013|European Union fr...|         0|Use multimedia co...|   null|
|2013|European Union fr...|         0|Use wiki based kn...|   null|
|2013|European Union fr...|         0|Use any social media|   null|
|2013|European Union fr...|         0|Use only one type...|   null|
|2013|European Union fr...|         0|Use two or more s...|   null|
|2013|European Union fr...|         0|Have a Web

In [4]:
spark.sql(
    """
    SELECT *
    FROM tabd_db.SocialMediaUseByPurpose
    """
).show()

+----+--------------------+----------+----------------+--------------------+
|year|              region|percentage|advertising_type|             purpose|
+----+--------------------+----------+----------------+--------------------+
|2013|European Union fr...|         5|            null|Website has onlin...|
|2015|European Union fr...|         0|            null|Website has onlin...|
|2017|European Union fr...|         0|            null|Website has onlin...|
|2019|European Union fr...|         0|            null|Website has onlin...|
|2013|European Union fr...|        21|            null|Develop the enter...|
|2015|European Union fr...|        29|            null|Develop the enter...|
|2017|European Union fr...|        38|            null|Develop the enter...|
|2019|European Union fr...|        43|            null|Develop the enter...|
|2013|European Union fr...|        14|            null|Obtain or respond...|
|2015|European Union fr...|        19|            null|Obtain or respond...|

In [9]:
EnterpriseSocialMediaUse = spark.sql(
    """
    
    SELECT *
    
    FROM (
      SELECT year, region, percentage, advertising_type, purpose FROM tabd_db.SocialMediaUseByTypeInternetAdvertising 
      UNION ALL 
      SELECT year, region, percentage, advertising_type, purpose FROM tabd_db.SocialMediaUseByPurpose 
     )
     
     GROUP BY year, region, percentage, advertising_type, purpose
        
    """
)

In [10]:
spark.sql(
    """
    
    SELECT *
    
    FROM (
      SELECT year, region, percentage, advertising_type, purpose FROM tabd_db.SocialMediaUseByTypeInternetAdvertising 
      UNION ALL 
      SELECT year, region, percentage, advertising_type, purpose FROM tabd_db.SocialMediaUseByPurpose 
     )
     
     GROUP BY year, region, percentage, advertising_type, purpose
        
    """
).toPandas()

Unnamed: 0,year,region,percentage,advertising_type,purpose
0,2017,European Union from 2020,0,"Website has online ordering, reservation or bo...",
1,2014,European Union from 2007 to 2013,25,Pay to advertise on the internet,
2,2016,European Union from 2007 to 2013,45,Use any social media,
3,2019,Bulgaria,0,Have web sales to private consumers (B2C) and ...,
4,2019,Bulgaria,9,Use multimedia content sharing websites (YouTu...,
...,...,...,...,...,...
6395,2019,Greece,41,,Develop the enterprise's image or market products
6396,2019,Italy,24,,"Obtain or respond to customer opinions, review..."
6397,2015,Italy,0,,Use only one type of social medium for more th...
6398,2019,Cyprus,0,,"Website has online ordering, reservation or bo..."


In [11]:
EnterpriseSocialMediaUse \
    .write \
    .format("parquet") \
    .mode("overwrite") \
    .save("hdfs://hdfs-nn:9000/warehouse/tabd.db/EnterpriseSocialMediaUse/")