## Retail Data Analytics Using Databricks
In the [Python Data Analytics Project](https://github.com/halmasieh/-jarvis_data_eng_HomaAlmasieh/tree/master/python_data_analytics), we have done Data Analytics
for London Gift Shop (LGS), a UK-based online store on the provided transaction dataset from 200912-201112 to answer all the business questions.
In this project, we perform the same analysis using Python/PySpark. This notebook was developed using the Microsoft Azure Databricks interface.

### Importing Data
We upload the online_retail_II.csv file containing the data from Python Data Analytics project and then it is used to construct a PySpark DataFrame with the purpose of data analysis.

In [0]:
# File location and type
file_location = "/FileStore/tables/online_retail_II.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
retail_df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(retail_df.limit(5))

Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00.000+0000,6.95,13085.0,United Kingdom
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom
489434,22041,"""RECORD FRAME 7"""" SINGLE SIZE """,48,2009-12-01T07:45:00.000+0000,2.1,13085.0,United Kingdom
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00.000+0000,1.25,13085.0,United Kingdom


In [0]:
#return the column labels
retail_df.columns

In [0]:
#determine the type of columns
retail_df.dtypes

### Total Invoice Amount

In [0]:
#filter the dataframe for two columns
from pyspark.sql.functions import *
retail_filtered_df = retail_df.filter((col("price") > 0) & (col("quantity") > 0))
display(retail_filtered_df.limit(5))

Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00.000+0000,6.95,13085.0,United Kingdom
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom
489434,22041,"""RECORD FRAME 7"""" SINGLE SIZE """,48,2009-12-01T07:45:00.000+0000,2.1,13085.0,United Kingdom
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00.000+0000,1.25,13085.0,United Kingdom


In [0]:
#derive a new column for invoice_amount from existing columns price and quantity
retail_filtered_df = retail_filtered_df.withColumn("invoice_amount",col("price") * col("quantity"))

In [0]:
#cache the dataframe in a new one for later reference
invoice_df = retail_filtered_df.cache()
display(invoice_df.limit(5))

Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country,invoice_amount
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00.000+0000,6.95,13085.0,United Kingdom,83.4
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom,81.0
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom,81.0
489434,22041,"""RECORD FRAME 7"""" SINGLE SIZE """,48,2009-12-01T07:45:00.000+0000,2.1,13085.0,United Kingdom,100.8
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00.000+0000,1.25,13085.0,United Kingdom,30.0


In [0]:
#determine the total amount for each invoice 
retail_summary_df = retail_filtered_df.groupBy("Invoice").sum().select("invoice", "sum(invoice_amount)").orderBy("sum(invoice_amount)")
display(retail_summary_df.limit(5))

invoice,sum(invoice_amount)
528127,0.19
570554,0.38
567869,0.4
539441,0.42
529767,0.42


In [0]:
# # #Area graph of Invoice versus sum(invoice_amount)
# display(retail_summary_df)

invoice,sum(invoice_amount)
528127,0.19
570554,0.38
567869,0.4
507293,0.42
518991,0.42
529767,0.42
502731,0.42
539441,0.42
532608,0.5
542736,0.55


In [0]:
#double-check the invoice_df's columns
invoice_df.columns

In [0]:
#get the summary statistics of of the column
#display(retail_summary_df.select("sum(invoice_amount)").describe())
display(retail_summary_df.select([min('sum(invoice_amount)'), avg('sum(invoice_amount)'), max('sum(invoice_amount)')]))

min(sum(invoice_amount)),avg(sum(invoice_amount)),max(sum(invoice_amount))
0.19,523.3037611158243,168469.6


In [0]:
#approximate median value
retail_summary_df.approxQuantile("sum(invoice_amount)", [0.5], 0)[0]

In [0]:
#calculate the mode of the column
record_counts = retail_summary_df.groupBy("sum(invoice_amount)").count()
mode = record_counts.orderBy(desc("count")).limit(1)
display(mode)

sum(invoice_amount),count
15.0,118


### Monthly Placed and Canceled Orders

In [0]:
#add the yyyymm column to to the new_retail_df
new_retail_df = retail_df.cache()
new_retail_df = new_retail_df.withColumn("yyyymm",date_format("InvoiceDate","yyyyMM").cast("integer"))
display(new_retail_df.limit(5))

Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country,yyyymm
489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01T07:45:00.000+0000,6.95,13085.0,United Kingdom,200912
489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom,200912
489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01T07:45:00.000+0000,6.75,13085.0,United Kingdom,200912
489434,22041,"""RECORD FRAME 7"""" SINGLE SIZE """,48,2009-12-01T07:45:00.000+0000,2.1,13085.0,United Kingdom,200912
489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01T07:45:00.000+0000,1.25,13085.0,United Kingdom,200912


In [0]:
#calculate all the orders included placed and cancelled 
all_orders = new_retail_df.groupby("yyyymm").agg(expr('count(distinct Invoice)').alias('all_orders'))
all_orders = all_orders.orderBy("yyyymm")
display(all_orders.limit(5))

yyyymm,all_orders
200912,2330
201001,1633
201002,1969
201003,2367
201004,1892


In [0]:
#calculate the number of cancelled orders
cancelled_orders = new_retail_df.filter(col("Invoice").like("C%"))
cancelled_orders = cancelled_orders.groupby("yyyymm").agg(expr('count(distinct Invoice)').alias('cancelled_orders'))
cancelled_orders = cancelled_orders.orderBy("yyyymm") 
display(cancelled_orders.limit(5))

yyyymm,cancelled_orders
200912,401
201001,300
201002,240
201003,407
201004,304


In [0]:
#count the number of orders that actually went through each month
monthly_orders = all_orders.join(cancelled_orders, (cancelled_orders.yyyymm == all_orders.yyyymm))
monthly_orders = monthly_orders.withColumn("placed_orders",monthly_orders.all_orders - 2*monthly_orders.cancelled_orders)
display(monthly_orders.select(all_orders.yyyymm,"all_orders","cancelled_orders","placed_orders").orderBy(all_orders.yyyymm).limit(5))

yyyymm,all_orders,cancelled_orders,placed_orders
200912,2330,401,1528
201001,1633,300,1033
201002,1969,240,1489
201003,2367,407,1553
201004,1892,304,1284


In [0]:
# #plot the # of placed orders versus canceled orders 
# display(monthly_orders.select(all_orders.yyyymm,"all_orders","cancelled_orders","placed_orders").orderBy(all_orders.yyyymm))

yyyymm,all_orders,cancelled_orders,placed_orders
200912,2330,401,1528
201001,1633,300,1033
201002,1969,240,1489
201003,2367,407,1553
201004,1892,304,1284
201005,2418,407,1604
201006,2216,357,1502
201007,2017,344,1329
201008,1877,273,1331
201009,2375,371,1633


### Monthly Sales

In [0]:
#create the monthly sales dataframe
monthly_sales_df = new_retail_df.withColumn("Revenue", col("Quantity") * col("Price"))
revenue_df = monthly_sales_df.select("yyyymm", "Revenue")
revenue_df = revenue_df.groupby("yyyymm").sum()
revenue_df = revenue_df.orderBy("yyyymm")
revenue_df = revenue_df.drop("sum(yyyymm)")
display(revenue_df.limit(5))

yyyymm,sum(Revenue)
200912,799847.1100000143
201001,624032.8919999956
201002,533091.4260000042
201003,765848.7609999765
201004,590580.4319999823


In [0]:
# #build the bar plot for monthly sales from 200912-201112.
# display(revenue_df)

yyyymm,sum(Revenue)
200912,799847.1100000143
201001,624032.8919999956
201002,533091.4260000042
201003,765848.7609999765
201004,590580.4319999823
201005,615322.8300000005
201006,679786.6099999842
201007,575236.360000009
201008,656776.3399999854
201009,853650.4309999745


### Monthly Active Users

In [0]:
#compute # of active users (unique Customer ID) for each month
new_retail_df = new_retail_df.withColumnRenamed("Customer ID","CustomerID")
active_users_df = new_retail_df.select("yyyymm","CustomerID")
active_users_df = active_users_df.groupby("yyyymm").agg(expr('count(distinct CustomerID)').alias('ActiveCustomers'))
display(active_users_df.orderBy("yyyymm").limit(5))

yyyymm,ActiveCustomers
200912,1045
201001,786
201002,807
201003,1111
201004,998


In [0]:
# #plot active users of each month
# display(active_users_df.orderBy("yyyymm"))

yyyymm,ActiveCustomers
200912,1045
201001,786
201002,807
201003,1111
201004,998
201005,1062
201006,1095
201007,988
201008,964
201009,1202


### New and Existing Users

In [0]:
#find out the first purchase for each user
first_purchase_for_user = new_retail_df.select("yyyymm","CustomerID")
first_purchase_for_user = first_purchase_for_user.groupBy("CustomerID").min()
first_purchase_for_user = first_purchase_for_user.withColumnRenamed("min(yyyymm)","user_starting_date")
first_purchase_for_user = first_purchase_for_user.withColumnRenamed("CustomerID","FirstCustomerID")
first_purchase_for_user = first_purchase_for_user.drop("min(CustomerID)")
display(first_purchase_for_user.orderBy("FirstCustomerID").limit(5))

FirstCustomerID,user_starting_date
,200912
12346.0,200912
12347.0,201010
12348.0,201009
12349.0,200912


In [0]:
#add the first users to the table of all the users
new_retail_df = new_retail_df.join(first_purchase_for_user, (new_retail_df.CustomerID == first_purchase_for_user.FirstCustomerID))                                             

In [0]:
display(new_retail_df.orderBy("yyyymm").limit(5))

Invoice,StockCode,Description,Quantity,InvoiceDate,Price,CustomerID,Country,yyyymm,FirstCustomerID,user_starting_date
491198,21108,FAIRY CAKE FLANNEL ASSORTED COLOUR,9,2009-12-10T13:09:00.000+0000,2.55,16596.0,United Kingdom,200912,16596.0,200912
491198,22139,RETRO SPOT TEA SET CERAMIC 11 PC,3,2009-12-10T13:09:00.000+0000,4.95,16596.0,United Kingdom,200912,16596.0,200912
491198,21485,RED SPOT HEART HOT WATER BOTTLE,3,2009-12-10T13:09:00.000+0000,4.95,16596.0,United Kingdom,200912,16596.0,200912
490944,22113,GREY HEART HOT WATER BOTTLE,36,2009-12-08T14:13:00.000+0000,2.95,14285.0,United Kingdom,200912,14285.0,200912
491198,22111,SCOTTIE DOG HOT WATER BOTTLE,3,2009-12-10T13:09:00.000+0000,4.95,16596.0,United Kingdom,200912,16596.0,200912


In [0]:
#merge new and existing dataframes 
users_monthly_existing = new_retail_df.where(col("yyyymm") != col("user_starting_date"))
users_monthly_existing = users_monthly_existing.groupBy("yyyymm").agg(expr('count(distinct CustomerID) as ExistingUsers'))
users_monthly_existing = users_monthly_existing.withColumnRenamed("yyyymm","yyyymm_extra")

users_monthly_new = new_retail_df.where(col("yyyymm") == col("user_starting_date"))
users_monthly_new = users_monthly_new.groupBy("yyyymm").agg(expr('count(distinct CustomerID) as NewUsers'))

users_monthly = users_monthly_new.join(users_monthly_existing,users_monthly_new.yyyymm == users_monthly_existing.yyyymm_extra,"left")
users_monthly = users_monthly.drop("yyyymm_extra")
display(users_monthly.select("yyyymm","NewUsers","ExistingUsers").orderBy("yyyymm").limit(5))

yyyymm,NewUsers,ExistingUsers
200912,1045,
201001,394,392.0
201002,363,444.0
201003,436,675.0
201004,291,707.0


In [0]:
# #plot the # of new users versus existing users
# display(users_monthly.select("yyyymm","NewUsers","ExistingUsers").orderBy("yyyymm").limit(5))

yyyymm,NewUsers,ExistingUsers
200912,1045,
201001,394,392.0
201002,363,444.0
201003,436,675.0
201004,291,707.0


### RFM Analysis
RFM segmentation is a great method to divide customers into equal groups depending on three criteria:
- Recency - How recently did the customer purchase?
- Frequency - How often do they purchase?
- Monetary - How much do they spend?

In [0]:
#data preparation for RFM
from pyspark.sql.types import *
new_retail_df = new_retail_df.withColumn("Quantity", new_retail_df["Quantity"].cast(IntegerType()))
new_retail_df = new_retail_df.withColumn("Price", new_retail_df["Price"].cast(DoubleType()))
new_retail_df = new_retail_df.withColumn("Date", to_date(unix_timestamp("InvoiceDate", "MM/dd/yyyy").cast("timestamp")))

#calculate difference in days 
new_retail_df = new_retail_df.withColumn("Recency", expr("datediff('2012-01-01', Date)"))

#define Total column
new_retail_df = new_retail_df.withColumn("Total", round(new_retail_df["Price"] * new_retail_df["Quantity"], 2))
stat_summary_retail = new_retail_df.select("Quantity", "Price", "Total")
display(stat_summary_retail.summary())

summary,Quantity,Price,Total
count,824364.0,824364.0,824364.0
mean,12.414574144431343,3.676799578826833,20.195317080804063
stddev,188.9760990097584,70.24138768949332,308.68500104847647
min,-80995.0,0.0,-168469.6
25%,2.0,1.25,4.25
50%,5.0,1.95,11.25
75%,12.0,3.75,19.5
max,80995.0,38970.0,168469.6


In [0]:
#create the RFM table and determine Recency, Frequency and Monetary
rfm_table = new_retail_df.groupBy("CustomerId")\
                        .agg(min("Recency").alias("Recency"), \
                             count("Invoice").alias("Frequency"), \
                             round(sum("Total"), 2).alias("Monetary"))
display(rfm_table.limit(5))

CustomerId,Recency,Frequency,Monetary
12467.0,409,18,0.0
12493.0,188,23,416.79
12671.0,629,45,2622.48
12737.0,521,2,3710.5
13094.0,44,38,2214.66


### Customer Distribution Based on Recency, Frequency and Monetary

In [0]:
# #Recency histogram
# display(rfm_table.select("CustomerID", "Recency"))

CustomerID,Recency
12467.0,409
12493.0,188
12671.0,629
12737.0,521
13094.0,44
13533.0,205
13607.0,63
13918.0,72
13956.0,28
13973.0,310


In [0]:
# #Frequency histogram
# display(rfm_table.select("CustomerID", "Frequency"))

CustomerID,Frequency
12467.0,18
12493.0,23
12671.0,45
12737.0,2
13094.0,38
13533.0,76
13607.0,123
13918.0,30
13956.0,152
13973.0,11


In [0]:
# #Monetary histogram
# display(rfm_table.select("CustomerID", "Monetary"))

CustomerID,Monetary
12467.0,0.0
12493.0,416.79
12671.0,2622.48
12737.0,3710.5
13094.0,2214.66
13533.0,270.79
13607.0,1060.61
13918.0,1212.84
13956.0,1026.42
13973.0,264.7


### Computing Quantile of RFM values
We are assigning a score ranging from 1 to 5 to each customer, where 1 denotes the lowest score while 5 is the highest score using PySpark API called QuantileDiscretizer. Obviously, the customer who has 5s at all indicators is considered the best customer.

In [0]:
from pyspark.ml.feature import QuantileDiscretizer
monetary_discretizer = QuantileDiscretizer().setNumBuckets(5).setInputCol("Monetary").setOutputCol("MonetaryScore").setRelativeError(0.0)
fitted_monetary_discretizer = monetary_discretizer.fit(rfm_table)
rfm_table = fitted_monetary_discretizer.transform(rfm_table)

frequency_discretizer = QuantileDiscretizer().setNumBuckets(5).setInputCol("Frequency").setOutputCol("FrequencyScore").setRelativeError(0.0)
fitted_frequency_discretizer = frequency_discretizer.fit(rfm_table)
rfm_table = fitted_frequency_discretizer.transform(rfm_table)

recency_discretizer = QuantileDiscretizer().setNumBuckets(5).setInputCol("Recency").setOutputCol("RecencyScore").setRelativeError(0.0)
fitted_recency_discretizer= recency_discretizer.fit(rfm_table)
rfm_table = fitted_recency_discretizer.transform(rfm_table)

display(rfm_table.limit(5))

CustomerId,Recency,Frequency,Monetary,MonetaryScore,FrequencyScore,RecencyScore
12467.0,409,18,0.0,0.0,1.0,3.0
12493.0,188,23,416.79,1.0,1.0,2.0
12671.0,629,45,2622.48,3.0,2.0,4.0
12737.0,521,2,3710.5,4.0,0.0,4.0
13094.0,44,38,2214.66,3.0,2.0,1.0


### Need to Massage Quantile Discretizer Outputs to Produce Proper RFM Scores
Despite binning the customers by quantile, the labels given to each quantile is not the same value as the quantile's RFM score. Firstly, the QuantileDiscritzer outputs range from 0-4, when we would like them to range from 1-5. Secondly, for recency, there is an inverse relationship between the quantile rank and its RFM score (i.e. larger quantile score = relatively higher recency value = lower recency score). 
This means we need to transform the RFM scores by adding new columns that are functions of the original ones generated by QuantileDiscritizer.

In [0]:
#rename the fields produced by QuantileDiscritizer
rfm_table = (rfm_table.withColumnRenamed("MonetaryScore","OldMonetaryScor")
                      .withColumnRenamed("FrequencyScore","OldFrequencyScore")
                      .withColumnRenamed("RecencyScore","OldRecencyScore"))

#transform and drop old score values
rfm_table = rfm_table.withColumn("MonetaryScore",col("OldMonetaryScor")+1).drop("OldMonetaryScor")
rfm_table = rfm_table.withColumn("FrequencyScore",col("OldFrequencyScore")+1).drop("OldFrequencyScore")
udf_recency= udf(lambda x: -x + 5, FloatType())
rfm_table = rfm_table.withColumn("RecencyScore",udf_recency("OldRecencyScore")).drop("OldRecencyScore")
display(rfm_table.limit(5))

CustomerId,Recency,Frequency,Monetary,MonetaryScore,FrequencyScore,RecencyScore
12467.0,409,18,0.0,1.0,2.0,2.0
12493.0,188,23,416.79,2.0,2.0,3.0
12671.0,629,45,2622.48,4.0,3.0,1.0
12737.0,521,2,3710.5,5.0,1.0,1.0
13094.0,44,38,2214.66,4.0,3.0,4.0


### Concatenating RFM scores

In [0]:
#concatenate the scores RFMScore = RecencyScore + FrequencyScore + MonetaryScore
rfm_table = (rfm_table.withColumn("RecencyScore",rfm_table.RecencyScore.cast(IntegerType()))
                      .withColumn("FrequencyScore",rfm_table.FrequencyScore.cast(IntegerType()))
                      .withColumn("MonetaryScore",rfm_table.MonetaryScore.cast(IntegerType())))

rfm_table = (rfm_table.withColumn("RFMScore",
                                  concat(rfm_table.RecencyScore.cast(StringType()),
                                         rfm_table.FrequencyScore.cast(StringType()),
                                         rfm_table.MonetaryScore.cast(StringType()))
                                 )
            )
display(rfm_table.limit(5))

CustomerId,Recency,Frequency,Monetary,MonetaryScore,FrequencyScore,RecencyScore,RFMScore
12467.0,409,18,0.0,1,2,2,221
12493.0,188,23,416.79,2,2,3,322
12671.0,629,45,2622.48,4,3,1,134
12737.0,521,2,3710.5,5,1,1,115
13094.0,44,38,2214.66,4,3,4,434


In [0]:
#find out who are the best customers
display(rfm_table.select("CustomerID").where("RFMScore == 555").limit(5))

CustomerID
17884.0
15311.0
15750.0
15898.0
17659.0


In [0]:
#segmenting of customers according to RecencyScore and FrequencyScore values
import re

def lookup(s):
    lookups  = [
        ('^[1-2][1-2]','Hibernating'),
        ('^[1-2][3-4]','At Risk'),
        ('^[1-2]5','Can\'t Lose'),
        ('^3[1-2]','About to Sleep'),
        ('^33','Need Attention'),
        ('^[3-4][4-5]','Loyal Customers'),
        ('^41','Promising'),
        ('^51','New Customers'),
        ('^[4-5][2-3]','Potential Loyalists'),
        ('^5[4-5]','Champions')
    ]
    for pattern, value in lookups:
        if re.search(pattern, s):
            return value
    return None
  
lookup_udf = udf(lookup, StringType())

In [0]:
#create the segment column in terms of lables
rfm_table = rfm_table.withColumn("Segment",lookup_udf(rfm_table.RFMScore))
display(rfm_table.limit(5))

CustomerId,Recency,Frequency,Monetary,MonetaryScore,FrequencyScore,RecencyScore,RFMScore,Segment
12467.0,409,18,0.0,1,2,2,221,Hibernating
12493.0,188,23,416.79,2,2,3,322,About to Sleep
12671.0,629,45,2622.48,4,3,1,134,At Risk
12737.0,521,2,3710.5,5,1,1,115,Hibernating
13094.0,44,38,2214.66,4,3,4,434,Potential Loyalists


In [0]:
# #Visulize the CustomerID versus Segment
# display(rfm_table.select("CustomerID", "Segment"))

CustomerID,Segment
12467.0,Hibernating
12493.0,About to Sleep
12671.0,At Risk
12737.0,Hibernating
13094.0,Potential Loyalists
13533.0,Need Attention
13607.0,Loyal Customers
13918.0,Potential Loyalists
13956.0,Champions
13973.0,Hibernating


In [0]:
#get summary aggregations on RFM segments
final_rfm_table = (rfm_table.select("Segment", "Recency","Frequency","Monetary").groupby("Segment")
                                                                                .agg(count(rfm_table.Recency).alias('RecencyCount'),
                                                                                     avg(rfm_table.Recency).alias('RecencyMean'),
                                                                                     count(rfm_table.Frequency).alias('FrequencyCount'),
                                                                                     avg(rfm_table.Frequency).alias('FrequencyMean'),
                                                                                     count(rfm_table.Monetary).alias('MonetaryCount'),
                                                                                     avg(rfm_table.Monetary).alias('FrequenMonetaryMean')
                                                                                    )
                  )
display(final_rfm_table.limit(5))

Segment,RecencyCount,RecencyMean,FrequencyCount,FrequencyMean,MonetaryCount,FrequenMonetaryMean
Champions,820,30.15,820,455.4207317073171,820,10659.325841463413
Promising,119,58.00840336134454,119,8.865546218487395,119,388.5754621848739
At Risk,812,415.2081280788177,812,74.35960591133005,812,1211.5383497536943
About to Sleep,427,131.95316159250586,427,18.416861826697893,427,542.1019672131147
Hibernating,1462,482.07113543091657,1462,15.61422708618331,1462,309.28761969904247
