In [1]:
from pyspark.sql import functions as f
from pyspark.sql import types as t
import dateutil.parser 
from pyspark.sql import Window

#### Read relevant tables

In [3]:
countryCode = 'za'

In [4]:
universe  =  spark.table('dev_derived_ouniverse.output_total_universe_'+countryCode).drop("CityGoogle46","ohubId","integer") \
                   .withColumn('d_buyer',f.when(f.col("operatorOhubId").isNotNull(),f.lit(1)).otherwise(f.lit(0)))

sale      =  spark.table("dev_sources_ohub.cleaned_operator_sales") \
                  .where(f.lower(f.col('countryCode')) == countryCode) \
                  .where(f.col('cuEanCode').isNotNull()) \
                  .where(f.col('transactionDate').isNotNull()) \
                  .where(f.col('amount') > 0)

menu      =  spark.table("dev_derived_omenu.output_business_product_mapping_long").where(f.lower(f.col('countryCode')) == countryCode)
productDetails = spark.table('data_sifu.sifu_product_details').where(f.lower(f.col('countryCode')) ==countryCode)


In [5]:
sale.select('operatorOhubId').distinct().count()

#### Create table with relevant products per operator based on the menu data

In [7]:
menuCuEanCode  = menu.alias('m').join(productDetails.alias('p'),menu.productCode == productDetails.productCode,how = 'left_outer').select('m.*','p.cuEanCodes').where(f.col('cuEanCodes').isNotNull()).withColumnRenamed('cuEanCodes','cuEanCode').select('id','cuEanCode', 'productName').distinct()

relevantProductsMenu = universe.join(menuCuEanCode, on = universe.placeIdGoogle == menuCuEanCode.id, how = 'inner').select('operatorid','cuEanCode', 'productName').distinct()

In [8]:
display(relevantProductsMenu)

##### Validate uniqueness menu table

In [10]:
print(
      'Menu table row count: ', menuCuEanCode.distinct().count(), '\n',
      'Menu table id count: ', menuCuEanCode.select('id', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count(), '\n',
      'relevant products menu table  row count: ', relevantProductsMenu.count(), '\n',
      'relevant products menu table id count: ', relevantProductsMenu.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

In [11]:
w1 = Window.partitionBy(f.concat(f.col('id'),f.lit('/'),f.col('cuEanCode'))).orderBy(f.desc('productName'))
menuCuEanCode = menuCuEanCode.select('id','cuEanCode',f.first('productName').over(w1).alias('productName')).distinct()

w2 = Window.partitionBy(f.concat(f.col('operatorid'),f.lit('/'),f.col('cuEanCode'))).orderBy(f.desc('productName'))
relevantProductsMenu = relevantProductsMenu.select('operatorid','cuEanCode',f.first('productName').over(w2).alias('productName')).distinct()

In [12]:
w = Window.partitionBy(f.concat(f.col('id'),f.lit('/'),f.col('cuEanCode')))
display(menuCuEanCode.select('*', f.count('productName').over(w).alias('dupeCount'))\
    .where('dupeCount > 1')\
    .drop('dupeCount'))


id,cuEanCode,productName


In [13]:
print(
      'Menu table row count: ', menuCuEanCode.distinct().count(), '\n',
      'Menu table id count: ', menuCuEanCode.select('id', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count(), '\n',
      'relevant products menu table  row count: ', relevantProductsMenu.count(), '\n',
      'relevant products menu table id count: ', relevantProductsMenu.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

#### Create table with relevant products per operator based on the sales data

In [15]:
# sale = sale.withColumn('year',f.substring(f.col('transactiondate'),1,4).cast('int')).where(f.col('year') >= 2019)
sale = sale.where(f.col('amount') > 0)
display(sale)

countryCode,orderType,transactionDate,ohubUpdated,operatorOhubId,operatorConcatID,sourceProductCode,productID,language,productCode,cuEanCode,duEanCode,productCodeLocal,productName,productType,quantityOfUnits,amount,pricePerUnit,cuListingPriceInCents,duListingPriceInCents,packagingCode,packagingName,brandCode,brandName,subBrandCode,subBrandName,categoryCode,categoryName,subcategoryCode,subcategoryName
ZA,Appshop,2019-07-18T10:12:51.000+0000,2019-07-30T11:09:06.801+0000,6b0215b0-8799-3d7c-8c09-3eb0bdd6ffc0,ZA~EMAKINA~6c491a9e-22c4-4aa2-a6c3-ad3ee53f7722,2 - CU EAN,d0766d0a60bc232e4c177b4d5df781aae45a631acbd94e83f967b54cfaf1e03f,en,1-EN-12603,6001087305006,16001087304112.0,11007,Knorr Professional Tomato Pronto,PRODUCT,3,366.0,122.0,13000,78000,1045,6 x 2kg,148,Knorr,1823,Knorr Professional,4147,Stocks,10985,Knorr Professional
ZA,Appshop,2019-08-27T07:20:21.000+0000,2019-08-28T04:25:47.263+0000,7e72bba4-47a7-33dc-a552-572a3cdf3506,ZA~EMAKINA~2bd88545-a8c4-4c24-86a8-98eeded3cb40,2 - CU EAN,64c9036972346383cb70284806e414c4d06b7bd83c578874374c1b971dc8aafc,en,1-EN-74780,6001087314848,16001087313039.0,12499,Robertsons Veggie Seasoning,PRODUCT,1,120.0,120.0,12500,75000,1020,6 x 1kg,171,Robertsons,409,not defined,701,Seasonings,10983,Robertsons Seasoning Blends
ZA,Appshop,2019-08-27T07:25:40.000+0000,2019-08-28T04:25:47.263+0000,7e72bba4-47a7-33dc-a552-572a3cdf3506,ZA~EMAKINA~2bd88545-a8c4-4c24-86a8-98eeded3cb40,1 - DU EAN,06900f335bdc6b57ff640c4b65f7443e5161d8bdca3a8bc7a68040e388de8d6c,en,1-EN-194331,6001087362405,6001087362405.0,21165447,Knorr Professional Spicy Chicken Breading,PRODUCT,1,203.0,203.0,21600,21600,12626,1 X 5kg,148,Knorr,419,Savoury,701,Seasonings,10982,Knorr Seasonings
ZA,Appshop,2019-08-27T07:25:40.000+0000,2019-08-28T04:25:47.263+0000,7e72bba4-47a7-33dc-a552-572a3cdf3506,ZA~EMAKINA~2bd88545-a8c4-4c24-86a8-98eeded3cb40,2 - CU EAN,4f9154f743b065e42b08ce617c66e0b1325109c18bdcaeff948d550f8d7b45de,en,1-EN-53149,6001087313766,16001087312230.0,12388,Knorr Professional Peri-Peri Marinade,PRODUCT,1,76.0,76.0,8100,48600,1016,6 x 1 kg,148,Knorr,373,not defined,4148,Marinades,10997,Dry Marinades
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,31521123885a306715e10463aa52c2566a857e97d666818298662f0e6c46fe02,en,1-EN-264095,6001087375801,26001087375805.0,67398417,Robertsons Barbecue Spice Pack,PRODUCT,1,318.84,318.84,6400,38400,1032,6 x 500g,171,Robertsons,409,not defined,701,Seasonings,10983,Robertsons Seasoning Blends
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,451d320473e43c466b650e749a4c9033472f62f650ff6fe231932f72b232e28e,en,1-EN-100255,6001087009454,16001087008287.0,20215593,"Knorr Professional Mushroom Sauce Powder, 800 g",PRODUCT,1,93.0,93.0,9900,29700,991,3 x 800g,148,Knorr,419,Savoury,2307,Sauces,10987,Knorr Base Sauces
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,8466c4c09b31613d4d1f597890be2fa2225e5a6c170967277fc4a24a78b6e423,en,1-EN-151849,6001087354981,26001087354985.0,21055636,Fine Foods Fruit Chutney 3kg,PRODUCT,1,472.0,472.0,10500,42000,1030,4 x 3kg,169,Fine Foods,399,Not Defined,2307,Sauces,10992,Fine Foods Condiments
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,9ae6705b1114d3a9160ba2acc8501e2127cb9dad4caea66a8a724f731b6a09a3,en,1-EN-18107,6001087306812,16001087305539.0,11264,Knorr Professional Brown Roux Granules,PRODUCT,1,92.0,92.0,9800,58800,906,6 x 750g,148,Knorr,419,Savoury,702,Thickeners,11001,Brown Roux
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,b7317fa4f34da387db701abcf127d4e0ebaf54bf793ec8e2f9f9b24ce4701fa0,en,1-EN-94789,6001087001045,16001087001066.0,20046254,Knorr Professional Spare Rib Marinade & Basting,PRODUCT,1,100.0,100.0,10600,63600,1028,6 x 2L,148,Knorr,419,Savoury,4148,Marinades,10996,Wet Marinades
ZA,Appshop,2019-10-29T15:51:35.000+0000,2019-12-16T02:21:50.642+0000,1610e1db-7695-3c7e-888e-f68c2fe6c0e4,ZA~EMAKINA~8b971272-a17f-412a-9ecf-15b68845ddf6,2 - CU EAN,eea8af95dad386d2f074ac18bb1282291c2d2ed91832cff3f8ee57539a8a455f,en,1-EN-15734,6001171020259,16001171020263.0,902025,Knorr Professional Aromat Original,PRODUCT,1,642.0,642.0,11100,66600,1020,6 x 1kg,148,Knorr,373,not defined,701,Seasonings,10982,Knorr Seasonings


##### Get Relevant products for operator from the sales table

In [17]:
universeRelevantProductsSSD = universe.alias('u').join(sale.alias('s'), on= universe.operatorOhubID == sale.operatorOhubId, how ='inner').select('u.operatorid','s.cuEanCode','s.productName').distinct()


In [18]:
display(universeRelevantProductsSSD)

operatorid,cuEanCode,productName
2162839119 / / 051c053f-dfde-314c-86d4-06f7d774457d,6001087305815,Robertsons Medium Rajah
/ / 44ee70fd-4671-324c-8a4d-a29d091e1b75,6001087009430,"Knorr Professional Cheese Sauce Powder, 800 g"
/ / 5971d338-11ee-3de4-8915-79f0fb9c4206,6001087309479,Robertsons Parsley
/ / ee681a76-42ae-3870-af9b-a179fe350e57,6001087009447,"Knorr Professional Roast Onion Gravy Powder, 800 g"
/ / f4b3c093-0276-4572-b257-4ad2bafe9b94,6001087009454,"Knorr Professional Mushroom Sauce Powder, 800 g"
/ / 01cd9a07-94e2-3844-ab01-5b657e82aab3,6001087309455,Robertsons Italian Herb Seasoning
/ / 0c03b346-aa59-3de7-b881-479baee10a63,6001087001090,Hellmann's Tangy Mayonnaise 2.5kg
/ / 4d80170f-648b-3e6d-b934-a17a82134b80,6001087309530,Robertsons Paprika
/ / 2ae66328-f979-3a4a-aa14-5604982b82ba,6001087354950,Knorr Professional Beef Stock Granules
/ / f6c5c126-537e-395b-80fe-83d4af5e4274,6001087354967,Knorr Professional Vegetable Stock Granules


In [19]:
print(
      'relevant products SSD table row count: ', universeRelevantProductsSSD.count(), '\n',
      'relevant products SSD table id count: ', universeRelevantProductsSSD.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

In [20]:
w3 = Window.partitionBy(f.concat(f.col('operatorid'),f.lit('/'),f.col('cuEanCode'))).orderBy(f.desc('productName'))
universeRelevantProductsSSD = universeRelevantProductsSSD.\
                                select('operatorid','cuEanCode',f.first('productName').over(w3).alias('productName')).distinct()



In [21]:
print(
      'relevant products SSD table row count: ', universeRelevantProductsSSD.count(), '\n',
      'relevant products SSD table id count: ', universeRelevantProductsSSD.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

In [22]:
universeRelevantProductsSSD.select('operatorid').distinct().count()

#### Combine both relevant product tables using an union

In [24]:
relevantProductsMenu = relevantProductsMenu.withColumn('rationale', f.lit('Menu Data'))
universeRelevantProductsSSD = universeRelevantProductsSSD.withColumn('rationale', f.lit('Sales Data'))

relevantProductsMenu = relevantProductsMenu.join(universeRelevantProductsSSD, on = ['operatorid'], how = 'left_anti')

In [25]:
display(relevantProductsMenu)

operatorid,cuEanCode,productName,rationale


In [26]:
relevantProducts = relevantProductsMenu.union(universeRelevantProductsSSD)

In [27]:
from pyspark.sql.window import Window

w = Window.partitionBy(f.when(f.col('operatorid').isNotNull(),f.col('operatorid')).otherwise(''),\
                              f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')\
                      ).orderBy(f.when(f.col('rationale').isNotNull(),f.col('rationale')).otherwise('').asc()\
                               )
                                                               
relevantProducts = relevantProducts.withColumn("rank", f.dense_rank().over(w)).where(f.col('rank')==1).drop('rank')

In [28]:
relevantProducts = relevantProducts.select('operatorid', 'rationale', 'cuEanCode', f.lower(f.col('productName')).alias('productName')).\
withColumn('productName',f.regexp_replace(f.col('productName'), '  ', ' ')).\
groupBy('operatorid','cuEanCode', 'rationale').\
agg(f.max(f.col('productName')).alias('productName'))

In [29]:
print(relevantProducts.columns)

In [30]:
# 233.563

print(
      'relevant products table row count: ', relevantProducts.count(), '\n',
      'relevant products table id count: ', relevantProducts.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

#### Show duplicate records

In [32]:
duplicateRows = relevantProducts.groupBy('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('').alias('cuEanCode')).agg(f.count(f.lit(1)).alias('cntRecords')).where(f.col('cntRecords') > 1)

relevantProducts.join(duplicateRows, on = ['operatorid','cuEanCode'], how = 'inner').orderBy('operatorid','cuEanCode').show(truncate = False)

#### Create Universe table joined with the relevant products

In [34]:
universeRelevantProducts = universe.join(relevantProducts, on = ['operatorid'], how = 'left_outer').distinct()

In [35]:
print(relevantProducts.columns)

In [36]:
print(
      'universe with relevant products table row count: ', universeRelevantProducts.count(), '\n',
      'universe with relevant products table id count: ', universeRelevantProducts.select('operatorid', f.when(f.col('cuEanCode').isNotNull(),f.col('cuEanCode')).otherwise('')).distinct().count()
     )

In [37]:
display(universeRelevantProducts)

operatorId,osmId,placeIdGoogle,operatorOhubID,name,address,postalCode,city,latitude,longitude,businessType,website,phone,nameGoogle,addressGoogle,postalCodeGoogle,cityGoogle16,latitudeGoogle,longitudeGoogle,businessTypeGoogle,websiteGoogle,type,uid,user_sid,nameOSM,addressOSM,postalCodeOSM,cityOSM,latitudeOSM,longitudeOSM,businessTypeOSM,websiteOSM,cuisineTypeOSM,phoneOSM,nameOHUB,addressOhub,zipcodeOHUB,cityOHUB,operatorConcatID,channelOHUB,source,globalChannel,cuisineType,chain,globalListChannels,addressCombined,d_buyer,cuEanCode,rationale,productName
/ / 004f037a-3fd4-3b92-bd6e-57b19d398531,,,004f037a-3fd4-3b92-bd6e-57b19d398531,Kaygeeâ€™s kitchen,,2940.0,,,,catering,,,,,,,,,,,,,,,,,,,,,,,,Kaygeeâ€™s kitchen,,2940.0,,,catering,OHUB,Other,Unknown,Unknown,[Other],Kaygeeâ€™s kitchen,1,,,
/ / 00c57c31-a3fd-4aff-9251-40d7ef12dc1f,,,00c57c31-a3fd-4aff-9251-40d7ef12dc1f,Action in Autism,Keal Rd,4001.0,Sydenham,-29.831225,30.982534,Education,,,,"Haig Road 105, Durban North",4051.0,,-29.831225,30.982534,"['establishment', 'point_of_interest', 'school']",,,,,,,,,,,,,,,Action in Autism,Keal Rd,4001.0,Sydenham,ZA~CRM_OTHER~ECBA232FC76EAF63F2EEFBDE8130EB2B,Education,OHUB,Student / School dormitory,Unknown,Unknown,"[Other, Student / School dormitory]","Action in Autism,Keal Rd,Sydenham",1,,,
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087305815.0,Sales Data,robertsons medium rajah
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087354974.0,Sales Data,knorr professional chicken stock granules
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087309530.0,Sales Data,robertsons paprika
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087309509.0,Sales Data,robertsons turmeric
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087314848.0,Sales Data,robertsons veggie seasoning
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087309424.0,Sales Data,robertsons cayenne pepper
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087305709.0,Sales Data,robertsons hot rajah
/ / 015c9f3e-510b-3fad-a3b4-7544ca705531,,,015c9f3e-510b-3fad-a3b4-7544ca705531,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,-29.804981,31.00548999999999,Conferencing,,+27 31 579 1463,,"Ebonyfield Avenue 48, Durban",4051.0,,-29.804981,31.00548999999999,"['establishment', 'food', 'point_of_interest']",,,,,,,,,,,,,,,BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK,4016.0,,ZA~ARMSTRONG~OPDA4606SA,Conferencing,OHUB,Other,Unknown,aw,[Other],"BLUE STRAWBERRY CATERERS,SPRINGFIELD INDUSTRIAL PARK",1,6001087305808.0,Sales Data,robertsons mild & spicy rajah


In [38]:
print(universeRelevantProducts.where(f.col('rationale').isNotNull()).select('operatorId').distinct().count(),'\n'
      ,universeRelevantProducts.select('operatorId').distinct().count())

In [39]:
universeRelevantProducts.write.mode('overwrite').saveAsTable('data_user_hien.valueTier_opr_cuEanCode_' + countryCode)

In [40]:
display(spark.table('data_user_hien.valueTier_opr_cuEanCode_' + countryCode) )

operatorId,osmId,placeIdGoogle,operatorOhubID,name,address,postalCode,city,latitude,longitude,businessType,website,phone,nameGoogle,addressGoogle,postalCodeGoogle,cityGoogle16,latitudeGoogle,longitudeGoogle,businessTypeGoogle,websiteGoogle,type,uid,user_sid,nameOSM,addressOSM,postalCodeOSM,cityOSM,latitudeOSM,longitudeOSM,businessTypeOSM,websiteOSM,cuisineTypeOSM,phoneOSM,nameOHUB,addressOhub,zipcodeOHUB,cityOHUB,operatorConcatID,channelOHUB,source,globalChannel,cuisineType,chain,globalListChannels,addressCombined,d_buyer,cuEanCode,rationale,productName
/ / 00010dae-4243-411d-9139-c99134126c43,,,00010dae-4243-411d-9139-c99134126c43,Aliwal Dive Centre,,,,-30.207022,30.800042,Conferencing,,,,"Moodie Street 2, Umkomaas",4170.0,,-30.207022,30.800042,"['establishment', 'lodging', 'point_of_interest', 'travel_agency']",,,,,,,,,,,,,,,Aliwal Dive Centre,,,,,Conferencing,OHUB,Other,Unknown,Unknown,[Other],Aliwal Dive Centre,1,,,
/ / 01b398c5-52d8-3b3f-9ba1-9c2c41b5083a,,,01b398c5-52d8-3b3f-9ba1-9c2c41b5083a,Adelante Lodge B&B,,,Gauteng,-29.107016,26.1974069,Hotel,,+27 51 444 6400,,"Wannenburg Street 4, Bloemfontein",9301.0,,-29.107016,26.1974069,"['establishment', 'lodging', 'point_of_interest']",,,,,,,,,,,,,,,Adelante Lodge B&B,,,Gauteng,,Hotel,OHUB,Hotel,Unknown,Unknown,"[Other, Hotel, Motel]","Adelante Lodge B&B,Gauteng",1,,,
/ / 02c42df1-5090-3f6e-a16a-fba46f0c8d4c,,,02c42df1-5090-3f6e-a16a-fba46f0c8d4c,Fairways Drakensberg,Khotso Lodge & Horse Trails Drakensberg Gardens Road Underberg 3257 South Africa,3257.0,,-29.7459628,29.4264076,Hotel,,+27 82 495 1345,,,3257.0,,-29.7459628,29.4264076,"['campground', 'establishment', 'lodging', 'park', 'point_of_interest', 'travel_agency']",,,,,,,,,,,,,,,Fairways Drakensberg,Khotso Lodge & Horse Trails Drakensberg Gardens Road Underberg 3257 South Africa,3257.0,,,Hotel,OHUB,Other,Unknown,Unknown,[Other],"Fairways Drakensberg,Khotso Lodge & Horse Trails Drakensberg Gardens Road Underberg 3257 South Africa",1,,,
/ / 02d4be10-ef41-438c-b75c-8d5767861a4f,,,02d4be10-ef41-438c-b75c-8d5767861a4f,Helen's foos,,,,-33.93801320000001,18.861963,Restaurant,,,,"Church Street 33, Stellenbosch",7600.0,,-33.93801320000001,18.861963,"['establishment', 'food', 'point_of_interest', 'restaurant']",,,,,,,,,,,,,,,Helen's foos,,,,,Restaurant,OHUB,Restaurant,Unknown,Unknown,"[Other, Restaurant]",Helen's foos,1,,,
/ / 03939201-d9fb-358e-b5db-510abf924d3a,,,03939201-d9fb-358e-b5db-510abf924d3a,Ed's Edibles,,4133.0,,,,catering,,,,,,,,,,,,,,,,,,,,,,,,Ed's Edibles,,4133.0,,ZA~EMAKINA~4e67d65c-775c-493d-a06b-3067cd89b2e4,catering,OHUB,Other,Unknown,Unknown,[Other],Ed's Edibles,1,,,
/ / 07df02f0-4dd8-32dd-93ff-5f6904815a0a,,,07df02f0-4dd8-32dd-93ff-5f6904815a0a,Ext 6 Cosmo City,,6343.0,,-26.0227722,27.9261438,pubs-bars-coffee-tea-shops_pubs,,,,", Roodepoort",2188.0,,-26.0227722,27.9261438,"['political', 'sublocality', 'sublocality_level_2']",,,,,,,,,,,,,,,Ext 6 Cosmo City,,6343.0,,ZA~EMAKINA~88aaf323-9cfe-461f-a7cd-07cb42d3b417,pubs-bars-coffee-tea-shops_pubs,OHUB,Pub,Unknown,Unknown,"[Other, Pub]",Ext 6 Cosmo City,1,,,
/ / 07f5cba3-ca42-3c01-949f-7f282b9603cf,,,07f5cba3-ca42-3c01-949f-7f282b9603cf,Leaders Brand Marketing,,,,-26.05011,27.999540000000003,Education,,,,"Moray Drive 2, Sandton",2191.0,,-26.05011,27.999540000000003,"['establishment', 'point_of_interest']",,,,,,,,,,,,,,,Leaders Brand Marketing,,,,,Education,OHUB,Student / School dormitory,Unknown,Unknown,"[Other, Student / School dormitory]",Leaders Brand Marketing,1,,,
/ / 082dab04-6cf1-3ae0-9319-854e0d785f55,,,082dab04-6cf1-3ae0-9319-854e0d785f55,Kwikspar Bergvliet,Shop 151 Kirstenhof Shopping Centre 288 Main Rd,7800.0,,-34.0502422,18.4628697,Deli,,+27 21 713 1117,,"Austell Road , Cape Town",7945.0,,-34.0502422,18.4628697,"['establishment', 'food', 'grocery_or_supermarket', 'point_of_interest', 'store', 'supermarket']",,,,,,,,,,,,,,,Kwikspar Bergvliet,Shop 151 Kirstenhof Shopping Centre 288 Main Rd,7800.0,,,Deli,OHUB,Other,Unknown,Unknown,[Other],"Kwikspar Bergvliet,Shop 151 Kirstenhof Shopping Centre 288 Main Rd",1,,,
/ / 094f7521-29fa-43a8-91d1-a0a8121e7b10,,,094f7521-29fa-43a8-91d1-a0a8121e7b10,ME Paruk Agencies,,4091.0,Durban,-29.85868039999999,31.0218404,other,,,,", Durban",,,-29.85868039999999,31.0218404,"['locality', 'political']",,,,,,,,,,,,,,,ME Paruk Agencies,,4091.0,Durban,ZA~CRM_OTHER~EE1DBC515E97B6385706C8E61683ED63,other,OHUB,Other,Unknown,Unknown,[Other],"ME Paruk Agencies,Durban",1,,,
/ / 0aab31d3-5479-306b-99f6-252009e927b2,,,0aab31d3-5479-306b-99f6-252009e927b2,Sefapane Lodges & Safaris,"Koper Rd, Phalaborwa",1389.0,Phalaborwa,-23.9492237,31.1512604,Hotel,,,,"Koper Road , Phalaborwa",1389.0,,-23.9492237,31.1512604,"['establishment', 'lodging', 'point_of_interest']",,,,,,,,,,,,,,,Sefapane Lodges & Safaris,"Koper Rd, Phalaborwa",1389.0,Phalaborwa,,Hotel,OHUB,Hotel,Unknown,Unknown,"[Other, Hotel]","Sefapane Lodges & Safaris,Koper Rd, Phalaborwa,Phalaborwa",1,,,


In [41]:
data_user_hien.valueTier_opr_cuEanCode_