##### Create Delta Tables

In [2]:
countryCode = 'ch'

In [3]:
from pyspark.sql import functions as f

dfSSOL = spark.table('data_user_tim.operators_ssol_' + countryCode ).\
withColumn('addressOhub',\
           f.concat(\
                    f.when(f.col('street').isNotNull(),\
                           f.col('street')).\
                    otherwise(f.lit('')),\
                    f.lit(' '),\
                    f.when(f.col('housenumber').isNotNull(),\
                           f.col('housenumber')).\
                    otherwise(f.lit('')),
                    f.when(f.col('houseNumberExtension').isNotNull(),\
                           f.col('houseNumberExtension')).\
                    otherwise(f.lit('')))
          ).\
select('countryCode', 'operatorOhubId', 'operatorConcatID', f.col('name').alias('nameOhub'), 'addressOhub',
       f.col('zipCode').alias('zipCodeOhub'), f.col('city').alias('cityOhub'), f.col('channel').alias('channelOhub'))

dfSSOL.show()

In [4]:
# Location where to save the Delta Table in the DBFS
deltaTable = "/mnt/datamodel/dev/sources/ohub/cleaned/operators_ssol_" + countryCode

# Location where to put the table in the Databricks database menu
hiveTable = "dev_sources_ohub.cleaned_operators_ssol_" + countryCode

In [5]:
# Write the data to a Delta Table
dfSSOL.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true") \
  .save(deltaTable)


In [6]:
# Load the Delta Table into the Data interface within Databricks.
sqlQuery1 = "drop table if exists " + hiveTable
sqlQuery2 = "create table " + hiveTable + " using delta location " + "'" + deltaTable + "'"
spark.sql(sqlQuery1)
spark.sql(sqlQuery2)

In [7]:
# The data in a Delta Table can be reordered to make it faster to work with the data (you might have a lot of small files). To improve the speed of read queries, you can use OPTIMIZE to collapse small files into larger ones. 
sqlQuery3 = "optimize " + hiveTable
spark.sql(sqlQuery3)
