# Retail Data Wrangling and Analytics with PySpark
We will be implementing the data analytics process using PySpark and the retail data, which has been loaded already through the csv file 'retail.csv'. It now sits on a DBFS table.  
For plotting and data visualization we will be using Docker's display diagrams.

In [0]:
# Let's load the data into a dataframe

retail_df = spark.read.table('hive_metastore.default.retail')
display(retail_df.limit(20))

invoice_no,stock_code,description,quantity,invoice_date,unit_price,customer_id,country
558461,21932,SCANDINAVIAN PAISLEY PICNIC BAG,3,2011-06-29T15:03:00Z,1.65,13263.0,United Kingdom
558475,21530,DAIRY MAID TOASTRACK,1,2011-06-29T15:58:00Z,3.29,,United Kingdom
558461,21933,PINK VINTAGE PAISLEY PICNIC BAG,3,2011-06-29T15:03:00Z,1.65,13263.0,United Kingdom
558462,21533,RETROSPOT LARGE MILK JUG,2,2011-06-29T15:11:00Z,4.95,13982.0,United Kingdom
558462,20724,RED RETROSPOT CHARLOTTE BAG,20,2011-06-29T15:11:00Z,0.85,13982.0,United Kingdom
558462,20718,RED RETROSPOT SHOPPER BAG,10,2011-06-29T15:11:00Z,1.25,13982.0,United Kingdom
558462,85099B,JUMBO BAG RED RETROSPOT,10,2011-06-29T15:11:00Z,2.08,13982.0,United Kingdom
558462,82482,WOODEN PICTURE FRAME WHITE FINISH,6,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom
558462,21531,RED RETROSPOT SUGAR JAM BOWL,4,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom
558462,21535,RED RETROSPOT SMALL MILK JUG,4,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom


In [0]:
# Let's inspect the data
# Notice which cols have negative values or nulls
from pyspark.sql.functions import col

print("Null values for customer_id column: ")
print(retail_df.filter(
  col('customer_id').isNull()
).count())

display(retail_df.describe())

Null values for customer_id column: 
243007


summary,invoice_no,stock_code,description,quantity,unit_price,customer_id,country
count,1067371,1067371,1062989,1067371.0,1067371.0,824364.0,1067371
mean,537608.1499316233,28350.201592689715,21848.25,9.9388984711033,4.6493877274214,15324.63850435002,
stddev,26662.450446904888,17968.479697262945,922.9197780233488,172.7057940767504,123.55305872146296,1697.4644503793106,
min,489434,10002,DOORMAT UNION JACK GUNS AND ROSES,-80995.0,-53594.36,12346.0,Australia
max,C581569,m,wrongly sold sets,80995.0,38970.0,18287.0,West Indies


## Total Invoice amount Distributions
Let's find the distribution of total invoice amount for the entire dataset after and before outliers are removed.

### Before Outliers were removed

In [0]:
# Add transaction total column
retail_df = retail_df.withColumn('transaction_total', retail_df.quantity * retail_df.unit_price)
display(retail_df.limit(50))

invoice_no,stock_code,description,quantity,invoice_date,unit_price,customer_id,country,transaction_total
558461,21932,SCANDINAVIAN PAISLEY PICNIC BAG,3,2011-06-29T15:03:00Z,1.65,13263.0,United Kingdom,4.949999999999999
558475,21530,DAIRY MAID TOASTRACK,1,2011-06-29T15:58:00Z,3.29,,United Kingdom,3.29
558461,21933,PINK VINTAGE PAISLEY PICNIC BAG,3,2011-06-29T15:03:00Z,1.65,13263.0,United Kingdom,4.949999999999999
558462,21533,RETROSPOT LARGE MILK JUG,2,2011-06-29T15:11:00Z,4.95,13982.0,United Kingdom,9.9
558462,20724,RED RETROSPOT CHARLOTTE BAG,20,2011-06-29T15:11:00Z,0.85,13982.0,United Kingdom,17.0
558462,20718,RED RETROSPOT SHOPPER BAG,10,2011-06-29T15:11:00Z,1.25,13982.0,United Kingdom,12.5
558462,85099B,JUMBO BAG RED RETROSPOT,10,2011-06-29T15:11:00Z,2.08,13982.0,United Kingdom,20.8
558462,82482,WOODEN PICTURE FRAME WHITE FINISH,6,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom,15.3
558462,21531,RED RETROSPOT SUGAR JAM BOWL,4,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom,10.2
558462,21535,RED RETROSPOT SMALL MILK JUG,4,2011-06-29T15:11:00Z,2.55,13982.0,United Kingdom,10.2


In [0]:
# Remove rows where transaction total was negative
retail_df_invoice_amount = retail_df.filter(retail_df.transaction_total > 0)

# Remove special case invoices with letters - these are special cases like cancellations
retail_df_invoice_amount = retail_df_invoice_amount.filter(~retail_df_invoice_amount.invoice_no.rlike('[a-zA-Z]'))

display(retail_df_invoice_amount.limit(50))


invoice_no,stock_code,description,quantity,invoice_date,unit_price,customer_id,country,transaction_total
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00Z,6.95,13085,United Kingdom,83.4
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00Z,6.75,13085,United Kingdom,81.0
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00Z,6.75,13085,United Kingdom,81.0
489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01T07:45:00Z,2.1,13085,United Kingdom,100.8
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00Z,1.25,13085,United Kingdom,30.0
489434,22064,PINK DOUGHNUT TRINKET POT,24,2009-12-01T07:45:00Z,1.65,13085,United Kingdom,39.6
489434,21871,SAVE THE PLANET MUG,24,2009-12-01T07:45:00Z,1.25,13085,United Kingdom,30.0
489434,21523,FANCY FONT HOME SWEET HOME DOORMAT,10,2009-12-01T07:45:00Z,5.95,13085,United Kingdom,59.5
489435,22350,CAT BOWL,12,2009-12-01T07:46:00Z,2.55,13085,United Kingdom,30.6
489435,22349,"DOG BOWL , CHASING BALL DESIGN",12,2009-12-01T07:46:00Z,3.75,13085,United Kingdom,45.0


In [0]:
# Find the median, mean, min, and max of invoice amount as well as show a distribution of the data
# Note that invoice amount is the total of all transactions for that invoice

from pyspark.sql.functions import col

invoice_values = retail_df_invoice_amount.select('invoice_no', 'transaction_total').groupBy('invoice_no').sum('transaction_total')
invoice_values = invoice_values.select(
  col('sum(transaction_total)').alias('invoice_values')
)

display(invoice_values)

invoice_values
192.0
303.2
155.05999999999997
118.75
275.95
6711.0
1335.92
2507.06
48.96
199.3


Databricks data profile. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

We used Databrick's built-in visualization tools to show the distribution of invoice totals as well as the mean, median, min and max values. Important to note that the data in the boxplot, which also shows distribution of the values, was truncated to just 10K rows for performance reasons so it is a close estimation. The bar graph in the Data Profile is a more accurate representation.

### After outliers were removed

In [0]:
# Remove null values
invoice_values = invoice_values.dropna()
print(invoice_values.filter(invoice_values.invoice_values.isNull()).count())

# Find first 85 quantiles of the data
quantile = invoice_values.approxQuantile('invoice_values', [0.85], 0.0)[0]
invoice_values_85 = invoice_values.filter(col('invoice_values') <= quantile)

# Display data profile and graph as before
display(invoice_values_85)

0


invoice_values
192.0
303.2
155.05999999999997
118.75
275.95
48.96
199.3
188.83
291.14000000000004
312.5899999999999


Databricks data profile. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

## Monthly Place and Cancelled Orders

Important notes:
- Cancelled orders have a 'C' in their invoice number
- Assume cancelled orders and normal orders occured on same day
- Formula for number of placed orders is total_orders - 2*cancelled_orders

In [0]:
# Create yearmonth column
from pyspark.sql.functions import col, date_format

retail_df = retail_df.withColumn("year_month", date_format("invoice_date", "yyyyMM"))
display(retail_df.orderBy('year_month').limit(10))

invoice_no,stock_code,description,quantity,invoice_date,unit_price,customer_id,country,transaction_total,year_month
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00Z,6.75,13085,United Kingdom,81.0,200912
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00Z,1.25,13085,United Kingdom,30.0,200912
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00Z,6.95,13085,United Kingdom,83.4,200912
489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01T07:45:00Z,2.1,13085,United Kingdom,100.8,200912
489434,22064,PINK DOUGHNUT TRINKET POT,24,2009-12-01T07:45:00Z,1.65,13085,United Kingdom,39.6,200912
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00Z,6.75,13085,United Kingdom,81.0,200912
489435,22350,CAT BOWL,12,2009-12-01T07:46:00Z,2.55,13085,United Kingdom,30.6,200912
489434,21523,FANCY FONT HOME SWEET HOME DOORMAT,10,2009-12-01T07:45:00Z,5.95,13085,United Kingdom,59.5,200912
489434,21871,SAVE THE PLANET MUG,24,2009-12-01T07:45:00Z,1.25,13085,United Kingdom,30.0,200912
489435,22349,"DOG BOWL , CHASING BALL DESIGN",12,2009-12-01T07:46:00Z,3.75,13085,United Kingdom,45.0,200912


In [0]:
# Add column to tell us which rows are cancelled
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType

retail_df_monthly_orders = retail_df.dropDuplicates(['invoice_no']).withColumn(
  'cancelled_order',
  when(col('invoice_no').startswith('C'), 1).otherwise(0).cast(IntegerType())
)

display(retail_df_monthly_orders.limit(5))



invoice_no,stock_code,description,quantity,invoice_date,unit_price,customer_id,country,transaction_total,year_month,cancelled_order
489677,21341,KASBAH LANTERN WITH GLASS,24,2009-12-02T09:50:00Z,4.25,14000,United Kingdom,102.0,200912,0
C491017,85044,RED REINDEER STRING OF 20 LIGHTS,-1,2009-12-08T18:38:00Z,4.95,13643,United Kingdom,-4.95,200912,1
491045,71477,COLOUR GLASS. STAR T-LIGHT HOLDER,48,2009-12-09T10:02:00Z,2.75,12747,United Kingdom,132.0,200912,0
491658,85166A,WHITE METAL BAROQUE PHOTO FRAME,1,2009-12-13T11:05:00Z,3.75,16442,United Kingdom,3.75,200912,0
C491705,84482,BUTTONS DESIGN JEWELLERY BOX,-6,2009-12-13T14:09:00Z,3.75,17802,United Kingdom,-22.5,200912,1


In [0]:
# Find number of cancelled orders and number of placed orders per month
retail_df_all_orders = retail_df_monthly_orders.select('invoice_no', 'year_month').groupBy('year_month').count()
retail_df_cancelled_orders = retail_df_monthly_orders.filter(col('cancelled_order') == 1).groupBy('year_month').count().withColumnRenamed('count', 'cancelled_count')

retail_df_monthly_placed_orders = retail_df_all_orders.join(retail_df_cancelled_orders, on='year_month', how='inner').withColumn('placed_count', col('count') - 2*col('cancelled_count'))

display(retail_df_monthly_placed_orders)

year_month,count,cancelled_count,placed_count
201103,1983,318,1347
201001,1633,300,1033
201002,1969,240,1489
201010,2965,476,2013
201009,2375,371,1633
201104,1744,240,1264
201106,2012,329,1354
201102,1393,219,955
201110,2637,362,1913
201008,1877,273,1331


Databricks visualization. Run in Databricks to view.

## Monthly Sales

In [0]:
retail_df_monthly_sales = retail_df.groupBy('year_month').sum('transaction_total').orderBy('year_month').withColumnRenamed('sum(transaction_total)', 'monthly_sales')

display(retail_df_monthly_sales)

year_month,monthly_sales
200912,799847.1100000143
201001,624032.8919999955
201002,533091.4260000042
201003,765848.7609999765
201004,590580.4319999823
201005,615322.8300000005
201006,679786.6099999842
201007,575236.359999999
201008,656776.3399999854
201009,853650.4309999745


Databricks visualization. Run in Databricks to view.

## Monthly Sales Growth

In [0]:
# Use a window function to shift year_month back by 1
from pyspark.sql.functions import col, lag
from pyspark.sql.window import Window

window_spec = Window.orderBy(col('year_month'))

retail_df_sales_growth = retail_df_monthly_sales.withColumn(
  'previous_month_sales', lag(col('monthly_sales')).over(window_spec)
)

retail_df_sales_growth = retail_df_sales_growth.withColumn(
  'sales_growth', col('monthly_sales') / col('previous_month_sales')
)

display(retail_df_sales_growth)




year_month,monthly_sales,previous_month_sales,sales_growth
200912,799847.1100000143,,
201001,624032.8919999955,799847.1100000143,0.7801902191032287
201002,533091.4260000042,624032.8919999955,0.8542681529037224
201003,765848.7609999765,533091.4260000042,1.4366180426994348
201004,590580.4319999823,765848.7609999765,0.7711449858962434
201005,615322.8300000005,590580.4319999823,1.0418950521544184
201006,679786.6099999842,615322.8300000005,1.104764160952688
201007,575236.359999999,679786.6099999842,0.8462013689855004
201008,656776.3399999854,575236.359999999,1.1417503928298038
201009,853650.4309999745,656776.3399999854,1.299758196222467


Databricks visualization. Run in Databricks to view.

## Monthly Active Customers

In [0]:
from pyspark.sql.functions import countDistinct

retail_df_uniq_customers = retail_df.select('customer_id', 'year_month').groupBy("year_month").agg(
  countDistinct('customer_id').alias('uniq_customers')
)

display(retail_df_uniq_customers.orderBy('year_month'))

year_month,uniq_customers
200912,1045
201001,786
201002,807
201003,1111
201004,998
201005,1062
201006,1095
201007,988
201008,964
201009,1202


Databricks visualization. Run in Databricks to view.

## New and Existing Customer
Our Approach:
- An existing customer is one that already made a purchase in a previous month
- A new customer is one that makes their first purchase in that month

In [0]:
# Find first purchase month of each user
from pyspark.sql.functions import min, col

retail_df_first_purchase_month = retail_df.select(
  col('customer_id'),
  col('year_month')
).groupBy('customer_id').agg(
  min('year_month').alias('first_purchase_month')
)

# Ensure continuity with customer count
print(retail_df.select('customer_id').distinct().count())

display(retail_df_first_purchase_month)

5943


customer_id,first_purchase_month
,200912
12346.0,200912
12347.0,201010
12348.0,201009
12349.0,200912
12350.0,201102
12351.0,201011
12352.0,201011
12353.0,201010
12354.0,201104


In [0]:
# Join to have original and first purchase dates

retail_df_first_purchase_month =  retail_df.select(
  col('customer_id'),
  col('year_month')
).join(
  retail_df_first_purchase_month,
  on='customer_id',
  how='left'
)

display(retail_df_first_purchase_month.limit(50))


customer_id,year_month,first_purchase_month
17757.0,201106,201001.0
13982.0,201106,201001.0
17389.0,201106,200912.0
13263.0,201106,201010.0
13418.0,201106,200912.0
16801.0,201106,201103.0
12951.0,201106,201001.0
17757.0,201106,201001.0
13982.0,201106,201001.0
13263.0,201106,201010.0


In [0]:
# Agg function to add true/false col for whether customer is new or existing
from pyspark.sql.functions import col

def is_new_customer(first_purchase_month, year_month):
  return first_purchase_month == year_month

retail_df_first_purchase_month = retail_df_first_purchase_month.withColumn(
  "is_new_customer",
  is_new_customer(col('first_purchase_month'), col('year_month'))
).withColumn(
  "is_existing_customer",
  ~col('is_new_customer')
)

# Clean Nulls
retail_df_first_purchase_month = retail_df_first_purchase_month.dropna()

display(retail_df_first_purchase_month)

customer_id,year_month,first_purchase_month,is_new_customer,is_existing_customer
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False
13085,200912,200912,True,False


In [0]:
# Aggregate to find total new and total existing customers per month
from pyspark.sql.functions import count, when, col

retail_df_monthly_customers = retail_df_first_purchase_month.groupBy('year_month').agg(
  countDistinct(when(col('is_new_customer') == True, col('customer_id'))).alias('new_customers'),
  countDistinct(when(col('is_existing_customer') == True, col('customer_id'))).alias('existing_customers')
)

display(retail_df_monthly_customers.orderBy('year_month'))

year_month,new_customers,existing_customers
200912,1045,0
201001,394,392
201002,363,444
201003,436,675
201004,291,707
201005,254,808
201006,269,826
201007,183,805
201008,158,806
201009,242,960


Databricks visualization. Run in Databricks to view.

## RFM Tables
We want to use an RFM table to analyse and segment our customer demographics.
Recency, Frequency and Monetary value per customer.

In [0]:
from pyspark.sql.functions import max, datediff, current_date, col, lit, sum

recency_helper = retail_df.agg(max(col('invoice_date'))).collect()[0][0]

# RFM caclculations
rfm_scores = retail_df.groupBy('customer_id').agg(
  datediff(lit(recency_helper), max(col('invoice_date'))).alias('recency'),
  countDistinct("invoice_no").alias("frequency"),
  sum("transaction_total").alias("monetary")
).orderBy('customer_id').dropna()

display(rfm_scores.limit(10))

customer_id,recency,frequency,monetary
12346,325,17,-64.67999999999071
12347,2,8,5633.32
12348,75,5,2019.4
12349,18,5,4404.539999999999
12350,310,1,334.40000000000003
12351,375,1,300.93
12352,36,13,1889.21
12353,204,2,406.75999999999993
12354,232,1,1079.4
12355,214,2,947.61


## RFM Segmentation
RFM segmentation categorizes your customers into different segments, according to their interactions with your business, which will allow you to subsequently approach these groups in the most effective way. RFM Segmentation divides customer data into 11 different categories.

We will use our already established RFM table to further create these categories.

In [0]:
from pyspark.sql.functions import when, col, lit

# Create our 5 segments - 20% in each segment
segments = [0.2, 0.4, 0.6, 0.8]
quantiles = rfm_scores.approxQuantile(["recency", "frequency", "monetary"], segments, 0)

# Assign RFM scores to the five segments of customers
rfm_scores = rfm_scores.withColumn(
  "R_Score",
  when(col("recency") < quantiles[0][0], lit(5))
    .when(col("recency") < quantiles[0][1], lit(4))
     .when(col("recency") < quantiles[0][2], lit(3))
      .when(col("recency") < quantiles[0][3], lit(2))
        .otherwise(lit(1))

).withColumn(
  "F_Score",
  when(col("frequency") < quantiles[1][0], lit(1))
   .when(col("frequency") < quantiles[1][1], lit(2))
    .when(col("frequency") < quantiles[1][2], lit(3))
      .when(col("frequency") < quantiles[1][3], lit(4))
        .otherwise(lit(5))
).withColumn(
  "M_Score",
  when(col("monetary") < quantiles[2][0], lit(1))
   .when(col("monetary") < quantiles[2][1], lit(2))
    .when(col("monetary") < quantiles[2][2], lit(3))
      .when(col("monetary") < quantiles[2][3], lit(4))
        .otherwise(lit(5))
)

display(rfm_scores.limit(10))

customer_id,recency,frequency,monetary,R_Score,F_Score,M_Score
12346,325,17,-64.67999999999071,2,5,1
12347,2,8,5633.32,5,4,5
12348,75,5,2019.4,3,4,4
12349,18,5,4404.539999999999,4,4,5
12350,310,1,334.40000000000003,2,2,2
12351,375,1,300.93,2,2,2
12352,36,13,1889.21,4,5,4
12353,204,2,406.75999999999993,2,2,2
12354,232,1,1079.4,2,2,3
12355,214,2,947.61,2,2,3


In [0]:
from pyspark.sql.types import StringType
from pyspark.sql.functions import concat

# Calculate combined RFM score
rfm_scores = rfm_scores.withColumn(
  "RFM_ScoreGroup",
  concat(
    col("R_Score").cast(StringType()),
    col("F_Score").cast(StringType()),
    col("M_Score").cast(StringType())
  )
)

display(rfm_scores.limit(10))


customer_id,recency,frequency,monetary,R_Score,F_Score,M_Score,RFM_ScoreGroup
12346,325,17,-64.67999999999071,2,5,1,251
12347,2,8,5633.32,5,4,5,545
12348,75,5,2019.4,3,4,4,344
12349,18,5,4404.539999999999,4,4,5,445
12350,310,1,334.40000000000003,2,2,2,222
12351,375,1,300.93,2,2,2,222
12352,36,13,1889.21,4,5,4,454
12353,204,2,406.75999999999993,2,2,2,222
12354,232,1,1079.4,2,2,3,223
12355,214,2,947.61,2,2,3,223


In [0]:
from pyspark.sql.functions import regexp_replace

# Segmenting customers according to R and F scores
seg_map = {
  r'[1-2][1-2]': 'Hibernating',
  r'[1-2][3-4]': 'At Risk',
  r'[1-2]5': 'Can\'t Lose',
  r'3[1-2]': 'About to Sleep',
  r'33': 'Need Attention',
  r'[3-4][4-5]': 'Loyal Customers',
  r'41': 'Promising',
  r'51': 'New Customers',
  r'[4-5][2-3]': 'Potential Loyalists',
  r'5[4-5]': 'Champions'
}

rfm_scores = rfm_scores.withColumn(
  "RF_ScoreGroup",
  concat(
    col("R_Score").cast(StringType()),
    col("F_Score").cast(StringType())
  )
)

segment_col = col("RF_ScoreGroup")
for rf, seg in seg_map.items():
  segment_col = when(
    col("RF_ScoreGroup").rlike(rf),
    seg
  ).otherwise(segment_col)

rfm_scores = rfm_scores.withColumn("Segment", segment_col)

display(rfm_scores.limit(10))


customer_id,recency,frequency,monetary,R_Score,F_Score,M_Score,RFM_ScoreGroup,RF_ScoreGroup,Segment
12346,325,17,-64.67999999999071,2,5,1,251,25,Can't Lose
12347,2,8,5633.32,5,4,5,545,54,Champions
12348,75,5,2019.4,3,4,4,344,34,Loyal Customers
12349,18,5,4404.539999999999,4,4,5,445,44,Loyal Customers
12350,310,1,334.40000000000003,2,2,2,222,22,Hibernating
12351,375,1,300.93,2,2,2,222,22,Hibernating
12352,36,13,1889.21,4,5,4,454,45,Loyal Customers
12353,204,2,406.75999999999993,2,2,2,222,22,Hibernating
12354,232,1,1079.4,2,2,3,223,22,Hibernating
12355,214,2,947.61,2,2,3,223,22,Hibernating


In [0]:
from pyspark.sql.functions import avg, count

# Finally, find mean and count for each customer segment and score
rfm_scores_summary = rfm_scores.select(
  "Segment",
  "recency",
  "frequency",
  "monetary"
).groupBy("Segment").agg(
  avg("recency").alias("Mean_Recency"),
  avg("frequency").alias("Mean_Frequency"),
  avg("monetary").alias("Mean_Monetary"),
  count("Segment").alias("Count")
)

display(rfm_scores_summary.orderBy("Segment"))

Segment,Mean_Recency,Mean_Frequency,Mean_Monetary,Count
About to Sleep,106.92572944297082,1.427055702917772,474.3301114058353,377
At Risk,374.8102094240838,4.579842931937173,1147.302748691101,764
Can't Lose,321.0210526315789,16.936842105263157,5384.705915789474,95
Champions,7.16588785046729,23.19392523364486,10423.44488901868,856
Hibernating,466.1025641025641,1.3201840894148589,334.0509875082185,1521
Loyal Customers,65.70397404703974,11.69910786699108,3872.434489862127,1233
Need Attention,111.3359375,3.50390625,1033.03512109375,256
Potential Loyalists,25.292857142857144,2.435714285714285,728.160095238095,840
