In [1]:
configs = { 
"fs.azure.account.auth.type": "CustomAccessToken",
"fs.azure.account.custom.token.provider.class": spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")
}

In [2]:
filesystems = ['bronze', 'silver', 'gold']
lakeName = 'prodanalytics1'
subDirectory = ''

In [3]:
def Diff(li1, li2): 
  return (list(set(li1) - set(li2)))

def makefilter(mntName):
  def iterator(x):
    return x.mountPoint == '/mnt/%s' % (mntName) 
  return iterator

def getMountedZones():
  mounted = []
  for mnt in filesystems:
    result = list(filter(makefilter(mnt), dbutils.fs.mounts()))
    if (len(result) == 1):
      mounted.append(mnt)
  #print(mounted)
  return mounted  

def mount(filesystem):
  dbutils.fs.mount(
    source = "abfss://%s@%s.dfs.core.windows.net/%s" % (filesystem, lakeName, subDirectory),
    mount_point = "/mnt/Test_%s" %(filesystem),
    extra_configs = configs
  )
  print('Mounted /mnt/%s' % (filesystem))

#Diff(filesystems, getMountedZones())


In [4]:
def MountFileSystem ( lakeName, fileSystem ) :
  mountPoint = "/mnt/%s" % (fileSystem)

  if mountPoint in [o.mountPoint for o in dbutils.fs.mounts()] :
    print('unmount ' + fileSystem)
    dbutils.fs.unmount(mountPoint)

  dbutils.fs.mount(
    source = "abfss://%s@%s.dfs.core.windows.net/" % (fileSystem, lakeName),
    mount_point = mountPoint,
    extra_configs = configs
  )
  print('mounting is finished')


MountFileSystem ( 
  lakeName = 'prodanalytics1',
  fileSystem = 'bronze'
)
#mount("bronze")

In [5]:
attTrips = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/att/trips_client")
encoreTrips = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/encore/trips_client")
indiaTrips = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/vfindia/trips_client")
maxisTrips = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/maxis/trips_client")
ukTrips = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/vfuk/trips_client")

attTrips.registerTempTable("attTrips")
encoreTrips.registerTempTable("encoreTrips")
indiaTrips.registerTempTable("indiaTrips")
maxisTrips.registerTempTable("maxisTrips")
ukTrips.registerTempTable("ukTrips")

In [6]:
%sql

with cteData
as (
            select 'att' as Instance, StartCountryCode, count(*) as cnt from attTrips group by StartCountryCode
  union all select 'encore' as Instance,StartCountryCode, count(*) from encoreTrips group by StartCountryCode
  union all select 'india' as Instance,StartCountryCode, count(*) from indiaTrips group by StartCountryCode
  union all select 'maxis' as Instance,StartCountryCode, count(*) from maxisTrips group by StartCountryCode
  union all select 'uk' as Instance,StartCountryCode, count(*) from ukTrips group by StartCountryCode
)
select StartCountryCode, sum(cnt)
from cteData
where StartCountryCode != 'US'
group by StartCountryCode;

StartCountryCode,sum(cnt)
UA,10
NL,12
BS,62
PL,2
MX,23123
AT,1
RU,51
AU,14061
CA,1074670
GB,15973


In [7]:
%sql

with cteData
as (
            select 'att' as Instance, StartCountryCode, count(*) as cnt from attTrips group by StartCountryCode
  union all select 'encore' as Instance,StartCountryCode, count(*) from encoreTrips group by StartCountryCode
  union all select 'india' as Instance,StartCountryCode, count(*) from indiaTrips group by StartCountryCode
  union all select 'maxis' as Instance,StartCountryCode, count(*) from maxisTrips group by StartCountryCode
  union all select 'uk' as Instance,StartCountryCode, count(*) from ukTrips group by StartCountryCode
)
select Instance, StartCountryCode, sum(cnt)
from cteData
group by Instance, StartCountryCode;


Instance,StartCountryCode,sum(cnt)
att,MY,3
encore,GB,11624
uk,DE,29
india,ES,10
att,IT,64
encore,IE,16
att,GB,3319
encore,ZA,2
maxis,MX,24
att,RU,8


In [8]:
attForms = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/att/form_headers_client")
encoreForms = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/encore/form_headers_client")
indiaForms = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/vfindia/form_headers_client")
maxisForms = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/maxis/form_headers_client")
ukForms = spark.read.format('parquet').load("/mnt/Test_bronze/cab/operational/vfuk/form_headers_client")

attForms.registerTempTable("attForms")
encoreForms.registerTempTable("encoreForms")
indiaForms.registerTempTable("indiaForms")
maxisForms.registerTempTable("maxisForms")
ukForms.registerTempTable("ukForms")

In [9]:
%sql

with cteData
as (
            select 'att' as Instance, StartCountryCode, count(*) as cnt from attForms group by StartCountryCode
  union all select 'encore' as Instance,StartCountryCode, count(*) from encoreForms group by StartCountryCode
  union all select 'india' as Instance,StartCountryCode, count(*) from indiaForms group by StartCountryCode
  union all select 'maxis' as Instance,StartCountryCode, count(*) from maxisForms group by StartCountryCode
  union all select 'uk' as Instance,StartCountryCode, count(*) from ukForms group by StartCountryCode
)
select Instance, StartCountryCode, sum(cnt) as FormsCount
from cteData
group by Instance, StartCountryCode;

Instance,StartCountryCode,FormsCount
att,MY,4
encore,GB,183
att,MD,1
att,GB,12
maxis,MX,6
india,GB,12
maxis,GB,1
uk,GB,3
india,,507463
att,ZA,1
