In [1]:
def CopyBillingData():
  print("LakeName:", lakeName)
        
  # get last date from the loaded dataset
  LastSyncDate = GetLastSyncDate('companies', 'billing')
  print("Reading data since", LastSyncDate)  

  print('Read Companies data')
  PrepareDataBronzeWithoutInstance (
    lakeName = lakeName,
    entity = BronzeTable.Companies_Billing,
    entityName = 'tmpCompanies',
    startDate = LastSyncDate
  )

  print('Read Accounts data')
  PrepareDataBronzeWithoutInstance (
    lakeName = lakeName,
    entity = BronzeTable.Accounts_Billing,
    entityName = 'tmpAccounts',
    startDate = LastSyncDate
  )
  
  print('Read Instances data')
  PrepareDataBronzeSimpleWithoutInstance(   
    lakeName = lakeName,
    entity = BronzeTable.Instances,
    entityName = 'tmpInstances')

  print('Read Tiers data')
  PrepareDataBronzeSimpleWithoutInstance( 
    lakeName = lakeName,
    entity = BronzeTable.Tiers,
    entityName = 'tmpTiers')
  
  print("Writing data...")
  
  sourceData = spark.sql("""
    select 
          c.CompanyId
        , c.InstanceId
        , coalesce(i.DisplayName, 'BAD DATA') as InstanceName
        , c.AccountId
        , if(a.AccountId is null, 'BAD DATA', a.AccountName) as AccountName         
        , c.TierId
        , if(t.TierId is null, 'BAD DATA', t.TierName) as TierName
        , coalesce(cs.CompanyStatusName, 'BAD DATA') as Status
        , if (c.Status = 4 or c.Status = 201, 1, 0) as IsDeleted 
        , if (c.Status = 0, 1, 0) as IsActive
        , c.Created as BillingCreatedUtc 
        , c.OperationalPending as IsPending 
        , c.ModificationDate
        , if (c.NonCab = 1, c.CompanyName, null) as CompanyName
        , c.NonCab 
        , c.SetupCompletionTime
        , c.ModificationDate
        , current_timestamp() as SilverModifiedUtc
    from tmpCompanies c
      left join tmpAccounts a on a.AccountId = c.AccountId
      left join tmpInstances i on i.InstanceId = c.InstanceId
      left join tmpTiers t on t.TierId = c.TierId and t.InstanceType = i.InstanceType
      left join tmpCompanyStatuses cs on cs.CompanyStatusId = c.Status
  """)  
  
  sourceData.registerTempTable("SourceData")
   
  spark.sql("""
    merge into silver.Companies as t
    using SourceData as s
      on t.CompanyId = s.CompanyId and t.InstanceId = s.InstanceId
    when matched then update set 
        InstanceName = s.InstanceName
      , AccountId = s.AccountId
      , AccountName = s.AccountName
      , TierId = s.TierId
      , Status = s.Status
      , IsDeleted = s.IsDeleted
      , IsActive = s.IsActive
      , BillingCreatedUtc = s.BillingCreatedUtc
      , IsPending = s.IsPending
      , CompanyName = s.CompanyName
      , NonCab = s.NonCab
      , SetupCompletionTime = s.SetupCompletionTime
      , Modified = s.ModificationDate
      , SilverModifiedUtc = s.SilverModifiedUtc
    when not matched then 
      insert ( CompanyId, InstanceId, InstanceName, AccountId, AccountName, TierId, TierName, Status, IsDeleted, IsActive, BillingCreatedUtc, IsPending, NonCab, 
               SetupCompletionTime, VerticalId, VerticalName, OperationalCreatedUtc, DataPurgeCompleted,  
               CompanyName, ContactName, Phone, ContactEmail, AddressLine2, AddressLine1, City, RegionName, PostalCode, CountryCode, SubRegionName, 
               RegionCode, SubRegionCode, UserIntegrationType, Market, EnableCustomerDW, Modified, SilverModifiedUtc )
      values ( CompanyId, InstanceId, InstanceName, AccountId, AccountName, TierId, TierName, Status, IsDeleted, IsActive, BillingCreatedUtc, IsPending, NonCab, 
               SetupCompletionTime, null, null, null, null,    
               CompanyName, null, null, null, null, null, null, null, null, null, null,
               null, null, null, null, null, ModificationDate, SilverModifiedUtc )
  """);

  print("Writing data completed.")
  
  LastSyncDate = sourceData.agg({"ModificationDate": "max"}).collect()[0][0]
  print(LastSyncDate)

  if LastSyncDate is not None:
    UpdateLastSyncDate(LastSyncDate, 'companies', 'billing')