# Merchant Rating and Ranking

In [61]:
import pandas as pd
import numpy as np
import math

In [11]:
import ray

ray.shutdown()

In [None]:
ray.init()

In [2]:
all_order_train = pd.read_pickle('../all_order_train.pkl')
print(all_order_train.shape)

all_order_train.head()

(33720820, 12)


Unnamed: 0,order_id,user_id,order_number,order_hour_of_day,product_id,purchase_date,merchant,product_name,price,aisle,department,week_number
0,2539329,1,1,8,196,2019-02-13,L&M Markets (Hometown Grocers Co-op),Soda,3.08,soft drinks,beverages,7
1,2231262,31,17,11,196,2019-03-10,L&M Markets (Hometown Grocers Co-op),Soda,3.08,soft drinks,beverages,10
2,3058369,195,34,10,196,2019-02-25,L&M Markets (Hometown Grocers Co-op),Soda,3.08,soft drinks,beverages,9
3,2257155,951,3,20,196,2019-03-09,L&M Markets (Hometown Grocers Co-op),Soda,3.08,soft drinks,beverages,10
4,1121647,992,7,10,196,2019-03-08,L&M Markets (Hometown Grocers Co-op),Soda,3.08,soft drinks,beverages,10


## Calculate Rating Score & Ranking

* `customer_loyalty = returned customers this period / total customers this period`, [0,1] range
* `sales_performance = # of orders above past average sales this period/ # of orders this period`, [0,1] range

In [45]:
@ray.remote
def get_customer_loyalty(current_df, current_week, merchant):
    past_clients = set(current_df.loc[current_df['week_number'] < current_week]['user_id'].values)
    current_clients = set(current_df.loc[current_df['week_number'] == current_week]['user_id'].values)
    returned_clients = past_clients.intersection(current_clients)
    customer_loyalty = len(returned_clients)/len(current_clients)
    
    return {'merchant': merchant, 'week_number': current_week, 'customer_loyalty': round(customer_loyalty, 4)}


@ray.remote
def get_sales_performance(current_df, current_week, merchant):
    past_avg_sales = np.mean(current_df.loc[current_df['week_number'] < current_week]['price'].values)
    current_sales = current_df.loc[current_df['week_number'] == current_week]['price'].values
    above_avg_sales = [v for v in current_sales if v >= past_avg_sales]
    sales_performance = len(above_avg_sales)/len(current_sales)
    
    return {'merchant': merchant, 'week_number': current_week, 'sales_performance': round(sales_performance, 4)}

In [40]:
merchant_lst = all_order_train['merchant'].unique()
week_lst = all_order_train['week_number'].unique()
week_lst.sort()

customer_loyalty_results = [get_customer_loyalty.remote(all_order_train.loc[(all_order_train['merchant']==merchant) 
                                         & (all_order_train['week_number'] <= current_week)][['user_id', 'week_number']],
                                                       current_week, merchant)
                           for merchant in merchant_lst
                           for current_week in week_lst[1:]]
customer_loyalty_lst = ray.get(customer_loyalty_results)

In [53]:
sales_performance_results = [get_sales_performance.remote(all_order_train.loc[(all_order_train['merchant']==merchant) 
                                         & (all_order_train['week_number'] <= current_week)][['price', 'week_number']],
                                                       current_week, merchant)
                           for merchant in merchant_lst
                           for current_week in week_lst[1:]]
sales_performance_lst = ray.get(sales_performance_results)

In [54]:
customer_loyalty_df = pd.DataFrame(customer_loyalty_lst)
sales_performance_df = pd.DataFrame(sales_performance_lst)

In [43]:
customer_loyalty_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty
0,L&M Markets (Hometown Grocers Co-op),8,0.3653
1,L&M Markets (Hometown Grocers Co-op),9,0.7708
2,L&M Markets (Hometown Grocers Co-op),10,0.8876
3,L&M Markets (Hometown Grocers Co-op),11,0.9171
4,L&M Markets (Hometown Grocers Co-op),12,0.9551


In [55]:
sales_performance_df = pd.DataFrame(sales_performance_lst)
sales_performance_df.head()

Unnamed: 0,merchant,week_number,sales_performance
0,L&M Markets (Hometown Grocers Co-op),8,0.4921
1,L&M Markets (Hometown Grocers Co-op),9,0.3243
2,L&M Markets (Hometown Grocers Co-op),10,0.3107
3,L&M Markets (Hometown Grocers Co-op),11,0.3093
4,L&M Markets (Hometown Grocers Co-op),12,0.3392


In [57]:
merchant_weekly_df = customer_loyalty_df.merge(sales_performance_df, on=['merchant', 'week_number'])
print(merchant_weekly_df.shape)
print(merchant_weekly_df['merchant'].nunique(), merchant_weekly_df['week_number'].nunique())

merchant_weekly_df.head()

(686, 4)
49 14


Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance
0,L&M Markets (Hometown Grocers Co-op),8,0.3653,0.4921
1,L&M Markets (Hometown Grocers Co-op),9,0.7708,0.3243
2,L&M Markets (Hometown Grocers Co-op),10,0.8876,0.3107
3,L&M Markets (Hometown Grocers Co-op),11,0.9171,0.3093
4,L&M Markets (Hometown Grocers Co-op),12,0.9551,0.3392


### Weekly Rating & Ranking without Prior

* `rating = 1 - sqrt((power(1-customer_loyalty, 2) + (power(1-sales_performance, 2)))/2)`
  * It uses the distance between perfect score 1 and current point

In [74]:
def calculate_rating(customer_loyalty, sales_performance):
    rating = 1 - math.sqrt((pow(1-customer_loyalty, 2) + (pow(1-sales_performance, 2)))/2)
    
    return round(rating, 4)

In [75]:
merchant_weekly_df['rating'] = merchant_weekly_df.apply(lambda r: calculate_rating(r['customer_loyalty'], r['sales_performance']), axis=1)

merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank
210,Askew's Foods,8,0.698,0.5411,0.6115,1.0
280,Goodness me,8,0.6743,0.4776,0.5647,2.0
154,Red River Co-op,8,0.7011,0.4157,0.5359,3.0
28,Coleman's,8,0.8026,0.3275,0.5044,4.0
126,Calgary Co-op,8,0.6458,0.434,0.5279,5.0


In [79]:
print(min(merchant_weekly_df['rating']), max(merchant_weekly_df['rating']))

0.0 0.6187


In [76]:
# "dense" decides how to rank groups of records that have the same value
# "ascending=False" means highest rating gets the smallest rank (best rank) 
merchant_weekly_df['weekly_rank'] = merchant_weekly_df.groupby('week_number')['rating'].rank('dense', ascending=False)

merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank
210,Askew's Foods,8,0.698,0.5411,0.6115,1.0
280,Goodness me,8,0.6743,0.4776,0.5647,2.0
154,Red River Co-op,8,0.7011,0.4157,0.5359,3.0
28,Coleman's,8,0.8026,0.3275,0.5044,10.0
126,Calgary Co-op,8,0.6458,0.434,0.5279,7.0


In [77]:
merchant_weekly_df = merchant_weekly_df.sort_values(by=['week_number', 'weekly_rank'])

merchant_weekly_df.head(n=20)

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank
210,Askew's Foods,8,0.698,0.5411,0.6115,1.0
280,Goodness me,8,0.6743,0.4776,0.5647,2.0
154,Red River Co-op,8,0.7011,0.4157,0.5359,3.0
616,Subzi Mandi Cash & Carry,8,0.3614,0.8437,0.5351,4.0
14,Fairway Markets,8,0.4971,0.5753,0.5346,5.0
238,Le Marché Esposito,8,0.5287,0.5295,0.5291,6.0
126,Calgary Co-op,8,0.6458,0.434,0.5279,7.0
112,Nations Fresh Food,8,0.416,0.6616,0.5227,8.0
294,Galleria Supermarkets,8,0.5592,0.474,0.5147,9.0
28,Coleman's,8,0.8026,0.3275,0.5044,10.0


### Bayesian Rating

* `order_sales = sales in this period / # orders in this period`
* `bayesian_adjusted_rating_i = ((order_sales_i * rating_i) + sum(order_sales_1..i * rating_1..i)) / (order_sales_i + sum(order_sales_1..i))`

* Reference: https://www.analyticsvidhya.com/blog/2019/07/introduction-online-rating-systems-bayesian-adjusted-rating/

In [82]:
weekly_merchant_df = all_order_train[['merchant', 'week_number', 'price', 'order_id']]\
                                    .groupby(['merchant', 'week_number'], as_index=False)\
                                    .agg({'price': 'sum', 'order_id': 'count'})
weekly_merchant_df.head()

Unnamed: 0,merchant,week_number,price,order_id
0,49th Parallel Grocery,7,1356823.0,90203
1,49th Parallel Grocery,8,3218114.0,245333
2,49th Parallel Grocery,9,2706288.0,233924
3,49th Parallel Grocery,10,1944867.0,174958
4,49th Parallel Grocery,11,2107830.0,177944


In [83]:
weekly_merchant_df['order_sales'] = weekly_merchant_df['price']/weekly_merchant_df['order_id']

weekly_merchant_df.head()

Unnamed: 0,merchant,week_number,price,order_id,order_sales
0,49th Parallel Grocery,7,1356823.0,90203,15.04188
1,49th Parallel Grocery,8,3218114.0,245333,13.11733
2,49th Parallel Grocery,9,2706288.0,233924,11.569092
3,49th Parallel Grocery,10,1944867.0,174958,11.116191
4,49th Parallel Grocery,11,2107830.0,177944,11.845468


In [84]:
merchant_weekly_df = merchant_weekly_df.merge(weekly_merchant_df[['merchant', 'week_number', 'order_sales']],
                                              on=['merchant', 'week_number'])
print(merchant_weekly_df.shape)

merchant_weekly_df.head()

(686, 7)


Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales
0,Askew's Foods,8,0.698,0.5411,0.6115,1.0,14.9245
1,Goodness me,8,0.6743,0.4776,0.5647,2.0,14.747786
2,Red River Co-op,8,0.7011,0.4157,0.5359,3.0,13.58007
3,Subzi Mandi Cash & Carry,8,0.3614,0.8437,0.5351,4.0,20.386051
4,Fairway Markets,8,0.4971,0.5753,0.5346,5.0,14.587754


In [85]:
merchant_weekly_df = merchant_weekly_df.sort_values(by=['merchant', 'week_number'])
merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales
21,49th Parallel Grocery,8,0.5793,0.3633,0.4604,22.0,13.11733
79,49th Parallel Grocery,9,0.8799,0.3058,0.5018,31.0,11.569092
125,49th Parallel Grocery,10,0.9463,0.3194,0.5172,27.0,11.116191
155,49th Parallel Grocery,11,0.9659,0.342,0.5341,9.0,11.845468
230,49th Parallel Grocery,12,0.98,0.3019,0.5062,35.0,10.975239


In [87]:
merchant_weekly_df['cum_rating'] = merchant_weekly_df.groupby(['merchant'])['rating'].cumsum()

merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales,cum_rating
21,49th Parallel Grocery,8,0.5793,0.3633,0.4604,22.0,13.11733,0.4604
79,49th Parallel Grocery,9,0.8799,0.3058,0.5018,31.0,11.569092,0.9622
125,49th Parallel Grocery,10,0.9463,0.3194,0.5172,27.0,11.116191,1.4794
155,49th Parallel Grocery,11,0.9659,0.342,0.5341,9.0,11.845468,2.0135
230,49th Parallel Grocery,12,0.98,0.3019,0.5062,35.0,10.975239,2.5197


In [90]:
merchant_weekly_df['cum_order_sales'] = merchant_weekly_df.groupby(['merchant'])['order_sales'].cumsum()

merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales,cum_rating,cum_order_sales
21,49th Parallel Grocery,8,0.5793,0.3633,0.4604,22.0,13.11733,0.4604,13.11733
79,49th Parallel Grocery,9,0.8799,0.3058,0.5018,31.0,11.569092,0.9622,24.686421
125,49th Parallel Grocery,10,0.9463,0.3194,0.5172,27.0,11.116191,1.4794,35.802613
155,49th Parallel Grocery,11,0.9659,0.342,0.5341,9.0,11.845468,2.0135,47.648081
230,49th Parallel Grocery,12,0.98,0.3019,0.5062,35.0,10.975239,2.5197,58.62332


In [91]:
merchant_weekly_df['bayesian_adjusted_rating'] = (merchant_weekly_df['order_sales']*merchant_weekly_df['rating'] + \
                                                merchant_weekly_df['cum_order_sales']*merchant_weekly_df['cum_rating']) / \
                                                (merchant_weekly_df['order_sales'] + merchant_weekly_df['cum_order_sales'])
merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales,cum_rating,cum_order_sales,bayesian_adjusted_rating
21,49th Parallel Grocery,8,0.5793,0.3633,0.4604,22.0,13.11733,0.4604,13.11733,0.4604
79,49th Parallel Grocery,9,0.8799,0.3058,0.5018,31.0,11.569092,0.9622,24.686421,0.815287
125,49th Parallel Grocery,10,0.9463,0.3194,0.5172,27.0,11.116191,1.4794,35.802613,1.251432
155,49th Parallel Grocery,11,0.9659,0.342,0.5341,9.0,11.845468,2.0135,47.648081,1.718944
230,49th Parallel Grocery,12,0.98,0.3019,0.5062,35.0,10.975239,2.5197,58.62332,2.202184


In [92]:
merchant_weekly_df['bayesian_weekly_rank'] = merchant_weekly_df.groupby('week_number')['bayesian_adjusted_rating'].rank('dense', ascending=False)
merchant_weekly_df = merchant_weekly_df.sort_values(by=['week_number', 'bayesian_weekly_rank'])

merchant_weekly_df.head()

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales,cum_rating,cum_order_sales,bayesian_adjusted_rating,bayesian_weekly_rank
0,Askew's Foods,8,0.698,0.5411,0.6115,1.0,14.9245,0.6115,14.9245,0.6115,1.0
1,Goodness me,8,0.6743,0.4776,0.5647,2.0,14.747786,0.5647,14.747786,0.5647,2.0
2,Red River Co-op,8,0.7011,0.4157,0.5359,3.0,13.58007,0.5359,13.58007,0.5359,3.0
3,Subzi Mandi Cash & Carry,8,0.3614,0.8437,0.5351,4.0,20.386051,0.5351,20.386051,0.5351,4.0
4,Fairway Markets,8,0.4971,0.5753,0.5346,5.0,14.587754,0.5346,14.587754,0.5346,5.0


In [94]:
merchant_weekly_df.iloc[100:108]

Unnamed: 0,merchant,week_number,customer_loyalty,sales_performance,rating,weekly_rank,order_sales,cum_rating,cum_order_sales,bayesian_adjusted_rating,bayesian_weekly_rank
124,Galleria Supermarkets,10,0.9227,0.3235,0.5185,26.0,11.166629,1.6021,38.690917,1.359405,3.0
132,Askew's Foods,10,0.9476,0.2939,0.4993,34.0,11.272908,1.5886,37.403704,1.336331,4.0
109,Longo's,10,0.915,0.3648,0.5468,12.0,12.403802,1.5768,39.635455,1.331295,5.0
121,Goodness me,10,0.9182,0.3323,0.5243,23.0,12.108332,1.5809,38.3751,1.327477,6.0
135,Red River Co-op,10,0.9218,0.2914,0.4959,37.0,10.917444,1.5704,37.118026,1.326189,7.0
119,Coleman's,10,0.9457,0.3351,0.5283,21.0,11.025328,1.5643,35.313379,1.317805,8.0
139,FoodFare,10,0.9216,0.2862,0.4922,41.0,11.558034,1.5647,37.87475,1.313935,9.0
120,Fairway Markets,10,0.9466,0.3332,0.527,22.0,11.571869,1.5535,37.573592,1.311799,10.0


In [95]:
merchant_weekly_df.to_pickle('merchant_rating_ranking.pkl')