In [1]:
# General library imports
#!pip install apyori 
from apyori import apriori 
import pandas as pd
import numpy as np
from functools import reduce
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
from dateutil import relativedelta
import multiprocessing as mp
from itertools import combinations, groupby
from collections import Counter
from operator import itemgetter
import pickle
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances, pairwise_distances
from sklearn.decomposition import NMF

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

# Graphing imports 
import matplotlib.pyplot as plt
import seaborn as sns

# Pandas options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [2]:
recall_fn = lambda x, recc_col, actual_col: x[(~x[actual_col].isna())&(~x[recc_col].isna())].shape[0]/x[~x[actual_col].isna()].shape[0]
precision_fn = lambda x, recc_col, actual_col: x[(~x[actual_col].isna())&(~x[recc_col].isna())].shape[0]/x[~x[recc_col].isna()].shape[0]


In [3]:
data = pd.read_csv('DATA/CA at Big Basket Data.csv')

In [4]:
data['Description'].nunique()

216

In [5]:
unique_prods = data[['SKU','Description']].drop_duplicates()
unique_prods.head()

Unnamed: 0,SKU,Description
0,34993740,Other Sauces
1,15669800,Cashews
2,34989501,Other Dals
3,7572303,Namkeen
4,15669856,Sugar


In [6]:
data['Created On'] = pd.to_datetime(data['Created On'])
data['max_date'] = data.groupby('Member')['Created On'].transform('max')

In [7]:
# We will take the latest transaction by the consumer as a test case and see how well we are able to predict for either
latest_transactions = data[data['max_date']==data['Created On']]
data = data[data['max_date']!=data['Created On']]
latest_transactions.shape, data.shape

((880, 6), (61261, 6))

In [8]:
print(f'Unique customers in data : {data["Member"].nunique()}')

Unique customers in data : 106


In [9]:
# From the data extract the mean number of items per back from the last few purchases
data = data.sort_values(by = ['Member','Created On'], ascending = False)

# Get last n months of transaction info 
recent_months_baskets = data.groupby(['Member','Created On'])['SKU'].count().reset_index(drop=False)
recent_months_baskets['rn'] = recent_months_baskets.groupby('Member').cumcount()

# Find avg. number of products sold per basket in last n (here 5) baskets
mean_recent_basket_size = recent_months_baskets[recent_months_baskets['rn']<5].groupby('Member')['SKU'].mean().reset_index(drop=False)
mean_recent_basket_size.rename(columns = {'SKU':'mean_recent_basket'},inplace=True)
assert mean_recent_basket_size.shape[0] == data['Member'].nunique()

### Baseline model: P-TopFreq

In [10]:
# Let's construct a table of number of times a customer has purchased a product 
baseline_freq = data.groupby(['Member','SKU'])['Order'].count().reset_index(drop=False)
baseline_freq.rename(columns = {'Order':'freq'},inplace=True)
# Sort and creating a ranking 
baseline_freq = baseline_freq.sort_values(by = ['Member','freq'], ascending = False)
baseline_freq['ranking'] = baseline_freq.groupby('Member').cumcount() + 1

# Merge with avg. products purchased in recent months 
baseline_freq = baseline_freq.merge(mean_recent_basket_size, on = 'Member', how = 'left')

# We inflate the number of SKUs to relax major constraint and make the problem slightly easier 
# Full rationale is provided with the CF model, where we do the same procedure 
baseline_freq['mean_recent_basket'] = baseline_freq['mean_recent_basket'].apply(lambda x: np.ceil(x*1.5))

########################################################
# NOTE: TOP 10 recs per customer only -> case study requirement

baseline_freq['mean_recent_basket'] = 10
#######################################################

# Subset to products <= 'mean_recent_basket'
baseline_freq = baseline_freq[baseline_freq['mean_recent_basket']>=baseline_freq['ranking']]

# Results validation 
# merge with next order data
latest_transactions_items = latest_transactions[['Member','SKU']]
latest_transactions_items['next_order'] = 1
baseline_freq = baseline_freq.merge(latest_transactions_items, on = ['Member','SKU'],how='outer')

# Metrics
print(f'Baseline Result ==> Precision : {precision_fn(baseline_freq,"freq","next_order"):0.4%} ; recall : {recall_fn(baseline_freq,"freq","next_order"):.4%}')

Baseline Result ==> Precision : 25.0000% ; recall : 30.1136%


In [11]:
def months_diff(x,ref_date):
    #print(x,ref_date)
    r = relativedelta.relativedelta(x,ref_date)
    return r.months + r.years*12

def days_diff(x,ref_date):
    return ( x - ref_date).days

def create_weights(df, date_col = 'Date', max_date_col='max_date'):
    #display(df.head())
    max_date_train = df[date_col].max()
    max_date = df[max_date_col].unique()[0]
    time_arr = df[date_col].apply(lambda x: days_diff(x,max_date))
    return np.exp(time_arr/90)


In [12]:
# Create weights 
data['Date'] = data['Created On'].apply(lambda x: x.date())
data['max_date'] = data.groupby(['Member'])['Date'].transform('max')

all_grps = []
for idx, grp in data.groupby(['Member']):
    grp['weights'] = create_weights(grp)
    all_grps.append(grp)
    
del data
data = pd.concat(all_grps,axis=0)

data.head()

Unnamed: 0,Member,Order,SKU,Created On,Description,max_date,Date,weights
7723,M04158,6755145,15668520,2014-06-30 08:44:00,Bread,2014-06-30,2014-06-30,1.0
7729,M04158,6807524,34986117,2014-06-26 14:26:00,Whole Spices,2014-06-30,2014-06-26,0.956529
7730,M04158,6807524,15668520,2014-06-26 14:26:00,Bread,2014-06-30,2014-06-26,0.956529
7726,M04158,6785498,34993343,2014-06-19 16:31:00,Aluminium Foil & Cling Wrap,2014-06-30,2014-06-19,0.884952
7727,M04158,6785498,15668520,2014-06-19 16:31:00,Bread,2014-06-30,2014-06-19,0.884952


In [13]:
# We represent users as a 1-D array
# Purchase of item is indicated by 1/0 boolean
# Recency is indicated by higher weights 
# Highly number implies item is present more frequently/more recently than lower ranked items

data['item_weight'] = data['weights']
item_freq = data.groupby(['Member','SKU']).agg({'item_weight':'sum'}).reset_index(drop=False)
item_freq_pivot = pd.pivot_table(item_freq,index='SKU',columns = 'Member',values='item_weight',aggfunc='sum').fillna(0)

In [14]:

item_freq_pivot.head()

Member,M04158,M08075,M09303,M09736,M12050,M12127,M14746,M16218,M16611,M18732,M22037,M25900,M27458,M27871,M31101,M31908,M31966,M32039,M32409,M32449,M32480,M32655,M33064,M33422,M33491,M33558,M33745,M33767,M34566,M35070,M35464,M35538,M35649,M36366,M36432,M36702,M36876,M37253,M37600,M38622,M39021,M40184,M41700,M41747,M41781,M42182,M42513,M42827,M43189,M43831,M43977,M44156,M45375,M45470,M46325,M46328,M46575,M46687,M47229,M48101,M48154,M48938,M50038,M50094,M50420,M50767,M51043,M51278,M52629,M54100,M54345,M54382,M54619,M54796,M55932,M56255,M56309,M56368,M56489,M56516,M56897,M57093,M57327,M57354,M58761,M58939,M59012,M59232,M62656,M62833,M63404,M64055,M64379,M76390,M77779,M78365,M78720,M82651,M84827,M86304,M86572,M90375,M91098,M96365,M99030,M99206
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1
6884195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058167,0.046062,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.381562,0.0,0.0,0.0,0.0,0.047623,0.0,0.651648,1.046036,0.0,0.0,0.0,0.0,0.0,0.0,0.016758,0.020018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.347636,0.0,0.0,0.0,0.0,0.016945,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028885,0.047623,0.0,0.114559,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7536640,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003201,0.0,0.00823,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7537167,0.114559,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005963,0.003364,0.0,0.318401,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7537178,0.085816,0.0,0.0,0.0,0.0,0.0,0.0,0.020018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035674,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7538018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.295802,0.0,0.33659,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.956529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.651428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
item_freq_pivot.shape

(1723, 106)

In [16]:
# For CF, since we do not have 
sparsity_pcent = lambda df : sum(df.values.reshape((df.shape[0]*df.shape[1]))==0)/(df.shape[0]*df.shape[1])

print(f'Sparsity : {sparsity_pcent(item_freq_pivot):.2%}')

Sparsity : 91.60%


In [17]:
# Sparsity is too high. Use NMP to factorize into a lower dimension matrix 
# The dimension is chosen randomly in this case, but can also be a part of the hyperparam loop 

In [18]:
# W = item_freq_pivot.copy(deep=True)
# n = 500

In [19]:

for dim in [50,40,30,20,10]:
    factorize = NMF(n_components=dim, init='random', random_state=42)
    W = factorize.fit_transform(item_freq_pivot.values)
    print(f'Sparsity with dim {dim} : {sparsity_pcent(pd.DataFrame(W)):.2%}')

Sparsity with dim 50 : 80.02%
Sparsity with dim 40 : 77.49%
Sparsity with dim 30 : 75.80%
Sparsity with dim 20 : 71.60%
Sparsity with dim 10 : 60.88%


In [20]:
# # We Choose 40
# # Note that this is a fairly arbitrary benchmark. Ideally, we'd want to hyperparameterize the dimensions. 
factorize = NMF(n_components=40, init='random', random_state=42)
W = factorize.fit_transform(item_freq_pivot.values)
# W = item_freq_pivot.copy(deep=True)
# n = 500

In [94]:
W[0]

array([8.95559559e-01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       7.21060697e-04, 4.36921702e-01, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       2.25458809e-03, 0.00000000e+00, 2.26327720e-01, 0.00000000e+00,
       3.21145746e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.17133775e-01, 0.00000000e+00, 0.00000000e+00,
       1.28559308e-02, 1.18789818e-01, 7.49893283e-03, 0.00000000e+00,
       4.95736558e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.35745146e-02])

### Similarity functions 

In [40]:
1 - pairwise_distances([[5,0,10],[50,0,100]],metric='cosine')

array([[1., 1.],
       [1., 1.]])

In [41]:
pairwise_distances([[5,0,10],[50,0,100]],metric='l2') #euclidean

array([[  0.        , 100.62305899],
       [100.62305899,   0.        ]])

In [42]:
1 - pairwise_distances([[5,0,10],[50,0,100]],metric='correlation')

array([[1., 1.],
       [1., 1.]])

In [78]:
# Now that we have a factorized matirx, let's get a set of similar items 
# If the customer set and item set is large this can be an extremely compute and memory intensive operation 
sim_matrix = 1 - pairwise_distances(W,metric='cosine')

In [79]:
sim_matrix.shape

(1723, 1723)

In [80]:
# For each item, let's extract the top 10 similar item and set the rest to 0. 
def extract_top_k(sim_array, k = 11):
    sorted_indices = np.argsort(sim_array)
    sim_array[sorted_indices[:sorted_indices.shape[0]-k]] = 0
    return sim_array
    
# We choose 100 similar items here. Again, this should ideally be found after a hyperparameter search or some analysis. 
n = 100
sim_matrix_filt = np.apply_along_axis(extract_top_k, 1, sim_matrix, k = n + 1)

In [95]:
sim_matrix_filt.shape

(1723, 1723)

In [82]:
# While there are many ways in which to consolidate results for each customer-item pair, we will keep it simple and 
# let the weights be the weighted sum across all similar items
# Note: we can set the diagonal weight to a number >1 to give self-history more importance, or <1 to give it less (for new item recommendations)
# Both the item DF has dimensions (1723,106) and the similarity array has dim (1723, 1723)
# We do sim_matrix_filt @ item_freq_pivot to get an output of dim (1723, 106)

def reweight_matrix(matrix):
    """Reweight the matrix to provide equal weight to self and similar stores"""
    other_user_weights = matrix.sum(axis=1) - 1
    matrix = matrix/other_user_weights[:,None]
    np.fill_diagonal(matrix,val = 1)
    return matrix

# Reweight matrix - provides more importance to self-product vs others.
sim_matrix_filt_reweighted = reweight_matrix(sim_matrix_filt)

# Creates an indicator of which items were purchased 
indicator_array = item_freq_pivot.values > 0

# Finds the weighted sum of products 
weighted_sums_prods = (sim_matrix_filt_reweighted @ item_freq_pivot.values) / (sim_matrix_filt_reweighted @ indicator_array)


In [96]:
weighted_sums_prods.shape

(1723, 106)

In [83]:
sim_matrix_filt_reweighted[0]

array([1.        , 0.        , 0.        , ..., 0.        , 0.00987302,
       0.        ])

In [84]:
# Next we convert the matrix of item-prod historical data + recommendations into a dataframe 

# Historical data
value_vars = item_freq_pivot.columns
cust_item_df = pd.melt(item_freq_pivot.reset_index(drop=False),id_vars = 'SKU',value_vars = value_vars, \
       var_name = 'Member', value_name = 'prev_purchases')

# Recommendations 
recc_pivot_df = pd.DataFrame(weighted_sums_prods, columns =value_vars, index=item_freq_pivot.index)
recc_df = pd.melt(recc_pivot_df.reset_index(drop=False),id_vars = 'SKU',value_vars = value_vars, \
       var_name = 'Member', value_name = 'recc_purchases')

# Validating that the DF is the same size as the two above
results = cust_item_df.merge(recc_df, on = ['Member','SKU'], how = 'left')
assert results.shape[0] == cust_item_df.shape[0]
assert results.shape[0] == recc_df.shape[0]

In [85]:
# Subsetting to those with a positive value
results['recc_type'] = results.apply(lambda x: 'New_Item_Recc' if (x['recc_purchases'] > 0)&(x['prev_purchases']==0) else 'Repeat_Item_Recc',axis=1)
results = results[(results['prev_purchases']>0)|(results['recc_purchases']>0)]

In [86]:
# Checking what % of the data lies in each group
results.groupby(results['prev_purchases']==0)['SKU'].count()*100/results.shape[0]

prev_purchases
False     8.518597
True     91.481403
Name: SKU, dtype: float64

In [87]:
# Merge with avg. products purchased in recent months 
results = results.merge(mean_recent_basket_size, on = 'Member', how = 'left')

In [88]:
# ML is probabilistic 
# We'll recommend slightly more products that were purchased on avg. 
# For 2 reasons:
# Firstly, since it's an avg. pruchase, at least 1 of the 5 that are used to avg. it out had a larger itemset 
# Secondly, since we will likely get a few products wrong, we want a bit of lassitude in how many items should be displayed 
results['mean_recent_basket'] = results['mean_recent_basket'].apply(lambda x: np.ceil(x*1.5))

########################################################
# NOTE: TOP 10 recs per customer only

results['mean_recent_basket'] = 10
#######################################################


In [89]:
# Let's bifircuate the problem into repeat item purchases and new item purchases -> we deal only with repeat purchases, for now. 
results_new = results[results['recc_type']=='New_Item_Recc']
results_repeat = results[results['recc_type']!='New_Item_Recc']

In [90]:
# For repeat purchases, we rank items by recc_purchase value and for each customer, we subset to the top mean_recent_basket items 
results_repeat = results_repeat.sort_values(by = ['Member','recc_purchases'], ascending = False)
results_repeat['ranking'] = results_repeat.groupby('Member').cumcount() + 1
results_repeat = results_repeat[results_repeat['ranking']<=results_repeat['mean_recent_basket']]

In [91]:
#results_repeat.head(100)

In [92]:
# Let's validate which of these products are actually purchased in the next order 

# Merge the datasets
latest_transactions_items = latest_transactions[['Member','SKU']]
latest_transactions_items['next_order'] = 1
results_repeat = results_repeat.merge(latest_transactions_items, on = ['Member','SKU'],how='outer')

In [93]:
print(f'CF Result ==> Precision : {precision_fn(results_repeat,"recc_purchases","next_order"):0.4%} ; recall : {recall_fn(results_repeat,"recc_purchases","next_order"):.4%}')

CF Result ==> Precision : 27.3585% ; recall : 32.9545%
