## Buy Now, Pay Later Project
### MAST30034: Applied Data Science 
#### Notebook 1: Preprocessing Data

In [1]:
# create modeling spark session
from pyspark.sql import SparkSession

spark = (
    SparkSession.builder.appName('Project 2 test')
    .config('spark.sql.repl.eagerEval.enabled', True) 
    .config('spark.sql.parquet.cacheMetadata', 'true')
    .config("spark.sql.session.timeZone", "Etc/UTC")
    .config("spark.executor.memory", "8g")
    .config("spark.driver.memory", "8g")
    .getOrCreate()
)

#### Merchant Dataset:

In [2]:
# read merchants data
merchants = spark.read.parquet('../data/tables/tbl_merchants.parquet')
merchants.limit(5)

name,tags,merchant_abn
Felis Limited,"((furniture, home...",10023283211
Arcu Ac Orci Corp...,"([cable, satellit...",10142254217
Nunc Sed Company,"([jewelry, watch,...",10165489824
Ultricies Digniss...,"([wAtch, clock, a...",10187291046
Enim Condimentum PC,([music shops - m...,10192359162


In [3]:
# In the coloumn "tags", tags, renueve level and "take_rate" are seperated by either "), (" or "], [", replace these by ### for splitting
from pyspark.sql import functions as F
merchants = merchants.withColumn("tags",F.regexp_replace(F.regexp_replace(
    F.col("tags"),"\){1},{1}\s?\(","###"),"\]{1},{1}\s?\[","###"))


In [4]:
# check if correctly replaced
merchants.select("tags").show(10, False)

+---------------------------------------------------------------------------------------------------------------+
|tags                                                                                                           |
+---------------------------------------------------------------------------------------------------------------+
|((furniture, home furnishings and equipment shops, and manufacturers, except appliances###e###take rate: 0.18))|
|([cable, satellite, and otHer pay television and radio services###b###take rate: 4.22])                        |
|([jewelry, watch, clock, and silverware shops###b###take rate: 4.40])                                          |
|([wAtch, clock, and jewelry repair shops###b###take rate: 3.29])                                               |
|([music shops - musical instruments, pianos, and sheet music###a###take rate: 6.33])                           |
|[(gift, card, novelty, and souvenir shops###a###take rate: 6.34)]                      

In [5]:
# split tags data to three columns, denoted as business_area, revenue_level and take_rate
split_merchants = merchants.withColumn("business_area", F.split(F.col("tags"), "###").getItem(0))\
                           .withColumn("revenue_level", F.split(F.col("tags"), "###").getItem(1))\
                           .withColumn("take_rate", F.split(F.col("tags"), "###").getItem(2))

In [6]:
split_merchants.limit(5)

name,tags,merchant_abn,business_area,revenue_level,take_rate
Felis Limited,"((furniture, home...",10023283211,"((furniture, home...",e,take rate: 0.18))
Arcu Ac Orci Corp...,"([cable, satellit...",10142254217,"([cable, satellit...",b,take rate: 4.22])
Nunc Sed Company,"([jewelry, watch,...",10165489824,"([jewelry, watch,...",b,take rate: 4.40])
Ultricies Digniss...,"([wAtch, clock, a...",10187291046,"([wAtch, clock, a...",b,take rate: 3.29])
Enim Condimentum PC,([music shops - m...,10192359162,([music shops - m...,a,take rate: 6.33])


In [7]:
# remove remaining ()[] punctuation in column business_area and take_rate

# obtain take rate in numeric form from take_rate column, 
# in the form of take rate: d.dd, therefore item 1 is the numeric value

# convert string in business_area to lower form

# convert string in revenue_level to lower form, just in case if there's a typo

curated_merchant = split_merchants\
            .withColumn("business_area", F.regexp_replace("business_area", "[\[\]\(\)]", ""))\
            .withColumn("take_rate", F.regexp_replace("take_rate", "[\[\]\(\)]", ""))\
            .withColumn("take_rate", F.split(F.col("take_rate"), ":").getItem(1))\
            .withColumn("business_area", F.lower(F.col('business_area')))\
            .withColumn("revenue_level", F.lower(F.col('revenue_level')))


final_merchant = curated_merchant.drop('tags')

In [8]:
final_merchant

name,merchant_abn,business_area,revenue_level,take_rate
Felis Limited,10023283211,"furniture, home f...",e,0.18
Arcu Ac Orci Corp...,10142254217,"cable, satellite,...",b,4.22
Nunc Sed Company,10165489824,"jewelry, watch, c...",b,4.4
Ultricies Digniss...,10187291046,"watch, clock, and...",b,3.29
Enim Condimentum PC,10192359162,music shops - mus...,a,6.33
Fusce Company,10206519221,"gift, card, novel...",a,6.34
Aliquam Enim Inco...,10255988167,"computers, comput...",b,4.32
Ipsum Primis Ltd,10264435225,"watch, clock, and...",c,2.39
Pede Ultrices Ind...,10279061213,computer programm...,a,5.71
Nunc Inc.,10323485998,"furniture, home f...",a,6.61


In [9]:
# check if revenue_level match levels (5 types) and count for the number of merchants in each level
final_merchant.groupby('revenue_level').count()


revenue_level,count
e,53
d,98
c,922
b,1351
a,1602


In [58]:
final_merchant.write.mode('overwrite').parquet("../data/curated/merchant.parquet") 

In [11]:
# check for different business area count 
final_merchant.groupby('business_area').count().sort("business_area")


business_area,count
antique shops - ...,3
antique shops - s...,2
antique shops - s...,124
art dealers and ...,1
art dealers and g...,111
artist supply an...,3
artist supply and...,2
artist supply and...,1
artist supply and...,187
bicycle shops - ...,1


Noticed that there are several entries which have the same beginning of business area, to check if they are indeed the same, we convert the "business_area" as list and print them as follows

In [12]:
final_merchant.groupby('business_area').count().sort("business_area").select("business_area").collect()

[Row(business_area='antique shops -  sales, repairs, and restoration services'),
 Row(business_area='antique shops - sales,  repairs, and restoration services'),
 Row(business_area='antique shops - sales, repairs, and restoration services'),
 Row(business_area='art dealers and  galleries'),
 Row(business_area='art dealers and galleries'),
 Row(business_area='artist supply  and craft shops'),
 Row(business_area='artist supply and  craft shops'),
 Row(business_area='artist supply and craft  shops'),
 Row(business_area='artist supply and craft shops'),
 Row(business_area='bicycle  shops - sales and service'),
 Row(business_area='bicycle shops  - sales and service'),
 Row(business_area='bicycle shops -  sales and service'),
 Row(business_area='bicycle shops - sales and  service'),
 Row(business_area='bicycle shops - sales and service'),
 Row(business_area='books,  periodicals, and newspapers'),
 Row(business_area='books, periodicals,  and newspapers'),
 Row(business_area='books, periodical

In [13]:
# The concepts are indeed the same but with some extra spaces. so remove extra spaces in "business_area"
final_merchant = final_merchant.withColumn("business_area", F.regexp_replace("business_area", "\s+", " "))

In [14]:
# check for different business area count again
final_merchant.groupby('business_area').count().sort("business_area")

business_area,count
antique shops - s...,129
art dealers and g...,112
artist supply and...,193
bicycle shops - s...,170
"books, periodical...",164
"cable, satellite,...",175
computer programm...,191
"computers, comput...",181
digital goods: bo...,195
"equipment, tool, ...",134


In [15]:
final_merchant.groupby('business_area').count().select("business_area").collect()

[Row(business_area='opticians, optical goods, and eyeglasses'),
 Row(business_area='watch, clock, and jewelry repair shops'),
 Row(business_area='computer programming , data processing, and integrated systems design services'),
 Row(business_area='digital goods: books, movies, music'),
 Row(business_area='books, periodicals, and newspapers'),
 Row(business_area='florists supplies, nursery stock, and flowers'),
 Row(business_area='art dealers and galleries'),
 Row(business_area='antique shops - sales, repairs, and restoration services'),
 Row(business_area='gift, card, novelty, and souvenir shops'),
 Row(business_area='equipment, tool, furniture, and appliance rent al and leasing'),
 Row(business_area='cable, satellite, and other pay television and radio services'),
 Row(business_area='tent and awning shops'),
 Row(business_area='artist supply and craft shops'),
 Row(business_area='stationery, office supplies and printing and writing paper'),
 Row(business_area='furniture, home furnishi

Clothing and shoe
Electronics
Games & toys
Jewellery and accessories
Beauty & cosmetics 
Household Appliances 
Furniture and homewares

#### Consumer Dataset:

In [16]:
consumer = spark.read.parquet('../data/tables/consumer_user_details.parquet')
consumer

user_id,consumer_id
1,1195503
2,179208
3,1194530
4,154128
5,712975
6,407340
7,511685
8,448088
9,650435
10,1058499


In [17]:
# count the number of consumer
consumer.count()

499999

In [18]:
consumer_detail = spark.read.option('sep', "|").csv('../data/tables/tbl_consumer.csv', 
                                                    header = True)
consumer_detail

name,address,state,postcode,gender,consumer_id
Yolanda Williams,413 Haney Gardens...,WA,6935,Female,1195503
Mary Smith,3764 Amber Oval,NSW,2782,Female,179208
Jill Jones MD,40693 Henry Greens,NT,862,Female,1194530
Lindsay Jimenez,00653 Davenport C...,NSW,2780,Female,154128
Rebecca Blanchard,9271 Michael Mano...,WA,6355,Female,712975
Karen Chapman,2706 Stewart Oval...,NSW,2033,Female,407340
Andrea Jones,122 Brandon Cliff,QLD,4606,Female,511685
Stephen Williams,6804 Wright Crest...,WA,6056,Male,448088
Stephanie Reyes,5813 Denise Land ...,NSW,2482,Female,650435
Jillian Gonzales,461 Ryan Common S...,VIC,3220,Female,1058499


In [19]:
consumer_detail.count()

499999

In [20]:
# add the consumer details onto consumer dataframe
consumer = consumer.join(consumer_detail,on="consumer_id")

In [21]:
consumer.show(15)

+-----------+-------+-----------------+--------------------+-----+--------+-----------+
|consumer_id|user_id|             name|             address|state|postcode|     gender|
+-----------+-------+-----------------+--------------------+-----+--------+-----------+
|    1195503|      1| Yolanda Williams|413 Haney Gardens...|   WA|    6935|     Female|
|     179208|      2|       Mary Smith|     3764 Amber Oval|  NSW|    2782|     Female|
|    1194530|      3|    Jill Jones MD|  40693 Henry Greens|   NT|     862|     Female|
|     154128|      4|  Lindsay Jimenez|00653 Davenport C...|  NSW|    2780|     Female|
|     712975|      5|Rebecca Blanchard|9271 Michael Mano...|   WA|    6355|     Female|
|     407340|      6|    Karen Chapman|2706 Stewart Oval...|  NSW|    2033|     Female|
|     511685|      7|     Andrea Jones|   122 Brandon Cliff|  QLD|    4606|     Female|
|     448088|      8| Stephen Williams|6804 Wright Crest...|   WA|    6056|       Male|
|     650435|      9|  Stephanie

In [20]:
# check with "name"
consumer.select("name").collect()

[Row(name='Yolanda Williams'),
 Row(name='Mary Smith'),
 Row(name='Jill Jones MD'),
 Row(name='Lindsay Jimenez'),
 Row(name='Rebecca Blanchard'),
 Row(name='Karen Chapman'),
 Row(name='Andrea Jones'),
 Row(name='Stephen Williams'),
 Row(name='Stephanie Reyes'),
 Row(name='Jillian Gonzales'),
 Row(name='Eugene Lucas'),
 Row(name='Melissa Jones'),
 Row(name='Angela Brown PhD'),
 Row(name='Lance Butler'),
 Row(name='Paul Abbott'),
 Row(name='Tracy Hart'),
 Row(name='Alyssa Wilson'),
 Row(name='Michael Burnett'),
 Row(name='Victoria Gonzalez'),
 Row(name='James Norris'),
 Row(name='Wendy Singh'),
 Row(name='Christopher Whitehead'),
 Row(name='Cody Scott'),
 Row(name='Jeffrey Moreno'),
 Row(name='Christopher Jordan'),
 Row(name='Kristi Adkins'),
 Row(name='Jason Alvarado'),
 Row(name='Jamie Francis DVM'),
 Row(name='Gregory Harris'),
 Row(name='Jason Brown'),
 Row(name='Stephanie Johnston'),
 Row(name='Karen Stewart'),
 Row(name='Dawn Decker'),
 Row(name='Dakota Kelley'),
 Row(name='Laurie 

By observing the "name" column, some names has title used before names, including "Mrs." "Dr." "Mr." "Miss ", some has the academic abbreviation used after names, including "MD"(Doctor of Medicine), "PhD"(Doctor of Philosophy), "DVM"(Doctor of Veterinary Medicine) and "DDS"(Doctor of Dental Surgery), some has both title before and academic abbreviation after and some does not have any. Since the title and the academic abbreviation might be useful information in predicting, we extract them by the following steps.

In [21]:
consumer.withColumn("name_list", F.split(F.col("name"), " ")).show()

+-----------+-------+-----------------+--------------------+-----+--------+-----------+--------------------+
|consumer_id|user_id|             name|             address|state|postcode|     gender|           name_list|
+-----------+-------+-----------------+--------------------+-----+--------+-----------+--------------------+
|    1195503|      1| Yolanda Williams|413 Haney Gardens...|   WA|    6935|     Female| [Yolanda, Williams]|
|     179208|      2|       Mary Smith|     3764 Amber Oval|  NSW|    2782|     Female|       [Mary, Smith]|
|    1194530|      3|    Jill Jones MD|  40693 Henry Greens|   NT|     862|     Female|   [Jill, Jones, MD]|
|     154128|      4|  Lindsay Jimenez|00653 Davenport C...|  NSW|    2780|     Female|  [Lindsay, Jimenez]|
|     712975|      5|Rebecca Blanchard|9271 Michael Mano...|   WA|    6355|     Female|[Rebecca, Blanchard]|
|     407340|      6|    Karen Chapman|2706 Stewart Oval...|  NSW|    2033|     Female|    [Karen, Chapman]|
|     511685|      

In [22]:
# check with "address"
consumer.select("address").collect()

[Row(address='413 Haney Gardens Apt. 742'),
 Row(address='3764 Amber Oval'),
 Row(address='40693 Henry Greens'),
 Row(address='00653 Davenport Crossroad'),
 Row(address='9271 Michael Manors Suite 651'),
 Row(address='2706 Stewart Oval Suite 588'),
 Row(address='122 Brandon Cliff'),
 Row(address='6804 Wright Crest Suite 311'),
 Row(address='5813 Denise Land Suite 690'),
 Row(address='461 Ryan Common Suite 734'),
 Row(address='33983 Kevin Drive Suite 628'),
 Row(address='13706 Kimberly Port'),
 Row(address='0236 Mills Land Suite 203'),
 Row(address='8943 Kenneth Camp'),
 Row(address='60495 Ryan Hill'),
 Row(address='9671 Jacob Harbors Suite 431'),
 Row(address='44353 Nathan Ridge'),
 Row(address='89400 Torres Fort'),
 Row(address='68657 Johnson Glen Suite 266'),
 Row(address='790 Ramos Landing'),
 Row(address='903 Holder Freeway Apt. 374'),
 Row(address='04814 Erin Port Apt. 649'),
 Row(address='513 Davis Parks'),
 Row(address='339 Philip Pike'),
 Row(address='18420 Haas Crossroad Apt. 5

In [22]:
# read transaction data
transaction_210228 = spark.read.parquet("../data/tables/transactions_20210228_20210827_snapshot")
transaction_210828 = spark.read.parquet("../data/tables/transactions_20210828_20220227_snapshot")
transaction = transaction_210228.union(transaction_210828)

In [23]:
transaction.count()

8151372

In [24]:
transaction.limit(10)

user_id,merchant_abn,dollar_value,order_id,order_datetime
18478,62191208634,63.255848959735246,949a63c8-29f7-4ab...,2021-08-20
2,15549624934,130.3505283105634,6a84c3cf-612a-457...,2021-08-20
18479,64403598239,120.15860593212784,b10dcc33-e53f-425...,2021-08-20
3,60956456424,136.6785200286976,0f09c5a5-784e-447...,2021-08-20
18479,94493496784,72.96316578355305,f6c78c1a-4600-4c5...,2021-08-20
3,76819856970,448.529684285612,5ace6a24-cdf0-4aa...,2021-08-20
18479,67609108741,86.4040605836911,d0e180f0-cb06-42a...,2021-08-20
3,34096466752,301.5793450525113,6fb1ff48-24bb-4f9...,2021-08-20
18482,70501974849,68.75486276223054,8505fb33-b69a-412...,2021-08-20
4,49891706470,48.89796461900801,ed11e477-b09f-4ae...,2021-08-20


In [26]:
transaction.filter(F.col("order_datetime").isNull())

user_id,merchant_abn,dollar_value,order_id,order_datetime


In [53]:
# merge all three dataset

consumer_transaction = consumer.join(transaction,['user_id'],how='inner')
full_transaction_dataset = consumer_transaction.join(final_merchant,['merchant_abn'],how='inner')

In [54]:
full_transaction_dataset


merchant_abn,user_id,consumer_id,name,address,state,postcode,gender,dollar_value,order_id,order_datetime,name.1,business_area,revenue_level,take_rate
33064796871,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,373.0873675184212,fe188788-b89f-4dd...,2021-08-20,Curabitur Massa C...,computer programm...,b,3.75
68435002949,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,232.5364986739752,b4a89891-a113-45e...,2021-08-20,Aliquam Eu Inc.,artist supply and...,a,6.65
41944909975,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,30.91075523023432,302ae628-8eba-4a5...,2021-08-20,Et Nunc Consulting,"books, periodical...",e,0.16
21439773999,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,91.18655746114226,4524fdc9-73f0-477...,2021-08-21,Mauris Non Institute,"cable, satellite,...",a,6.1
86662713230,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,38.8137172956379,28f9e0f3-858d-445...,2021-08-19,Vestibulum Accums...,"watch, clock, and...",a,6.41
61447419161,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,17.881767779834615,7745378a-3135-401...,2021-08-22,Mollis Dui In LLP,"gift, card, novel...",c,2.06
60956456424,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,70.03366022795622,931a2090-8e4a-453...,2021-07-15,Ultricies Digniss...,"gift, card, novel...",b,4.69
21439773999,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,106.06535881800544,0f6c8ab1-263d-47e...,2021-08-27,Mauris Non Institute,"cable, satellite,...",a,6.1
80518954462,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,229.2696185902316,41b219a2-51e4-454...,2021-08-27,Neque Sed Dictum ...,"computers, comput...",b,3.49
68559320474,7,511685,Andrea Jones,122 Brandon Cliff,QLD,4606,Female,105.23501290036468,468265b3-4ceb-458...,2021-08-13,Aliquam Auctor As...,antique shops - s...,b,4.2


In [33]:
# count the total transactions for each merchant
merchant_count = transaction.groupBy(F.col('merchant_abn')).count()

In [34]:
merchant_count.orderBy(F.col('count').asc())

merchant_abn,count
27508358370,1
80426072728,1
24212300522,1
39896168660,1
53852972654,1
39150153670,1
49369565194,1
18250330194,1
20884516643,1
73439079670,1


In [30]:
# check with mechant 64143552561 as it has th highest average cost 
final_merchant[final_merchant.merchant_abn=='64143552561'].collect()

[Row(name='Fusce Feugiat Associates', merchant_abn=64143552561, business_area='jewelry, watch, clock, and silverware shops', revenue_level='a', take_rate=' 5.88')]

In [31]:
# check transaction counts for this business
merchant_count[merchant_count.merchant_abn=='64143552561'].collect()

[Row(merchant_abn=64143552561, count=1)]

In [32]:
transaction[transaction.merchant_abn=='64143552561'].collect()

[Row(user_id=18438, merchant_abn=64143552561, dollar_value=49956.04011941749, order_id='dcc299e7-e991-4944-801a-4858959207d2', order_datetime=datetime.date(2021, 5, 17))]

In [36]:
# read postcode dataset
postcode = spark.read.csv('../data/2021Census_G02_AUST_POA.csv', header = True)
postcode

POA_CODE_2021,Median_age_persons,Median_mortgage_repay_monthly,Median_tot_prsnl_inc_weekly,Median_rent_weekly,Median_tot_fam_inc_weekly,Average_num_psns_per_bedroom,Median_tot_hhd_inc_weekly,Average_household_size
POA2000,32,2800,941,625,2367,1.3,2225,2.1
POA2007,30,2500,772,500,2197,1.2,1805,2.1
POA2008,28,2600,860,525,2453,1.2,1746,1.9
POA2009,37,2800,1297,580,3035,1.1,2422,2.1
POA2010,36,2900,1479,550,3709,1.1,2297,1.7
POA2011,39,2500,1534,490,3565,1.0,2041,1.5
POA2015,34,2770,1582,558,3514,1.1,2607,2.1
POA2016,36,2811,1191,500,3263,1.0,2145,1.9
POA2017,32,2600,1216,575,2684,1.1,2104,1.9
POA2018,34,2500,973,520,2407,1.0,2017,2.3


In [48]:
postcode = postcode.withColumn("postcode", F.regexp_replace("POA_CODE_2021", "POA", ""))

In [56]:
# join postcode with the full dataset
full_dataset = full_transaction_dataset.join(postcode, on = "postcode")
full_dataset.limit(5)

postcode,merchant_abn,user_id,consumer_id,name,address,state,gender,dollar_value,order_id,order_datetime,name.1,business_area,revenue_level,take_rate,POA_CODE_2021,Median_age_persons,Median_mortgage_repay_monthly,Median_tot_prsnl_inc_weekly,Median_rent_weekly,Median_tot_fam_inc_weekly,Average_num_psns_per_bedroom,Median_tot_hhd_inc_weekly,Average_household_size
4606,33064796871,7,511685,Andrea Jones,122 Brandon Cliff,QLD,Female,373.0873675184212,fe188788-b89f-4dd...,2021-08-20,Curabitur Massa C...,computer programm...,b,3.75,POA4606,52,1073,536,220,1343,0.7,1003,2.2
4606,68435002949,7,511685,Andrea Jones,122 Brandon Cliff,QLD,Female,232.5364986739752,b4a89891-a113-45e...,2021-08-20,Aliquam Eu Inc.,artist supply and...,a,6.65,POA4606,52,1073,536,220,1343,0.7,1003,2.2
4606,41944909975,7,511685,Andrea Jones,122 Brandon Cliff,QLD,Female,30.91075523023432,302ae628-8eba-4a5...,2021-08-20,Et Nunc Consulting,"books, periodical...",e,0.16,POA4606,52,1073,536,220,1343,0.7,1003,2.2
4606,21439773999,7,511685,Andrea Jones,122 Brandon Cliff,QLD,Female,91.18655746114226,4524fdc9-73f0-477...,2021-08-21,Mauris Non Institute,"cable, satellite,...",a,6.1,POA4606,52,1073,536,220,1343,0.7,1003,2.2
4606,86662713230,7,511685,Andrea Jones,122 Brandon Cliff,QLD,Female,38.8137172956379,28f9e0f3-858d-445...,2021-08-19,Vestibulum Accums...,"watch, clock, and...",a,6.41,POA4606,52,1073,536,220,1343,0.7,1003,2.2


In [57]:
full_dataset.drop("POA_CODE_2021", "name", "address")

postcode,merchant_abn,user_id,consumer_id,state,gender,dollar_value,order_id,order_datetime,business_area,revenue_level,take_rate,Median_age_persons,Median_mortgage_repay_monthly,Median_tot_prsnl_inc_weekly,Median_rent_weekly,Median_tot_fam_inc_weekly,Average_num_psns_per_bedroom,Median_tot_hhd_inc_weekly,Average_household_size
4606,33064796871,7,511685,QLD,Female,373.0873675184212,fe188788-b89f-4dd...,2021-08-20,computer programm...,b,3.75,52,1073,536,220,1343,0.7,1003,2.2
4606,68435002949,7,511685,QLD,Female,232.5364986739752,b4a89891-a113-45e...,2021-08-20,artist supply and...,a,6.65,52,1073,536,220,1343,0.7,1003,2.2
4606,41944909975,7,511685,QLD,Female,30.91075523023432,302ae628-8eba-4a5...,2021-08-20,"books, periodical...",e,0.16,52,1073,536,220,1343,0.7,1003,2.2
4606,21439773999,7,511685,QLD,Female,91.18655746114226,4524fdc9-73f0-477...,2021-08-21,"cable, satellite,...",a,6.1,52,1073,536,220,1343,0.7,1003,2.2
4606,86662713230,7,511685,QLD,Female,38.8137172956379,28f9e0f3-858d-445...,2021-08-19,"watch, clock, and...",a,6.41,52,1073,536,220,1343,0.7,1003,2.2
4606,61447419161,7,511685,QLD,Female,17.881767779834615,7745378a-3135-401...,2021-08-22,"gift, card, novel...",c,2.06,52,1073,536,220,1343,0.7,1003,2.2
4606,60956456424,7,511685,QLD,Female,70.03366022795622,931a2090-8e4a-453...,2021-07-15,"gift, card, novel...",b,4.69,52,1073,536,220,1343,0.7,1003,2.2
4606,21439773999,7,511685,QLD,Female,106.06535881800544,0f6c8ab1-263d-47e...,2021-08-27,"cable, satellite,...",a,6.1,52,1073,536,220,1343,0.7,1003,2.2
4606,80518954462,7,511685,QLD,Female,229.2696185902316,41b219a2-51e4-454...,2021-08-27,"computers, comput...",b,3.49,52,1073,536,220,1343,0.7,1003,2.2
4606,68559320474,7,511685,QLD,Female,105.23501290036468,468265b3-4ceb-458...,2021-08-13,antique shops - s...,b,4.2,52,1073,536,220,1343,0.7,1003,2.2
