# Cross Selling

## Market Basket Analysis

In [None]:
import math
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [5]:
#import libraries for mba


#import data for mba (data should contain old sub-categories)
purchases_subcat = pd.read_csv('data/bonus_amazonpurchasessubcat.csv')
survey = pd.read_csv('data/survey.csv')

#data transformation
df_mba = pd.merge(purchases_subcat, survey, on = 'Survey ResponseID', how = 'left')
df_mba = df_mba.dropna(subset = ['Category'])
##convert Order Date column into datetime
df_mba['Order Date'] = pd.to_datetime(df_mba['Order Date'], errors='coerce')

In [6]:
#filter out sub-categories that appear in less than 1000 rows/transactions
thres_idx = df_mba['Category'].value_counts()[df_mba['Category'].value_counts()>=1000].index
df_mba = df_mba[df_mba['Category'].isin(thres_idx)]

In [7]:
#data transformation for mba (pivot)
df_mba['single_transaction'] = df_mba['Survey ResponseID'].astype(str)+"_"+df_mba['Order Date'].astype(str)
df_mba_piv = pd.crosstab(df_mba['single_transaction'], df_mba['Category'])

In [8]:
def mba(data):
  nrows = data.shape[0]
  niter = math.ceil(nrows/10000)
  all_rules = pd.DataFrame()
  for i in range(niter):
    sample_data = data.iloc[i*10000:(i+1)*10000]
    def encode(item_freq):
      return item_freq > 0
    basket_input = sample_data.applymap(encode)
    frequent_itemsets = apriori(basket_input, min_support=0.001, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="lift", num_itemsets = None)
    all_rules = pd.concat([all_rules, rules], ignore_index = True)
  return all_rules

rules_df = mba(df_mba_piv.sample(frac = 1, random_state = 2).reset_index(drop = True))

  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.applymap(encode)
  basket_input = sample_data.apply

In [9]:
rules_df.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(ABIS_BOOK),(ART_CRAFT_KIT),0.0826,0.0062,0.0012,0.014528,2.343201,1.0,0.000688,1.008451,0.624846,0.013699,0.00838,0.104038
1,(ART_CRAFT_KIT),(ABIS_BOOK),0.0062,0.0826,0.0012,0.193548,2.343201,1.0,0.000688,1.137576,0.57681,0.013699,0.120938,0.104038
2,(ABIS_BOOK),(BLANK_BOOK),0.0826,0.0048,0.0011,0.013317,2.774415,1.0,0.000704,1.008632,0.697148,0.012746,0.008558,0.121242
3,(BLANK_BOOK),(ABIS_BOOK),0.0048,0.0826,0.0011,0.229167,2.774415,1.0,0.000704,1.190141,0.642648,0.012746,0.159763,0.121242
4,(ABIS_BOOK),(DRINKING_CUP),0.0826,0.0092,0.0013,0.015738,1.710706,1.0,0.00054,1.006643,0.452852,0.014365,0.006599,0.078521


In [10]:
##copy of rules
rules_copy = rules_df.copy()

In [11]:
##group rules by averaging out all rows with same set of (antecedents and consequents)
aggregated_rules = rules_copy.groupby(['antecedents', 'consequents'], as_index=False).agg({
    'antecedent support':'mean',
    'consequent support':'mean',
    'support':'mean',
    'confidence':'mean',
    'lift':'mean',
    'leverage':'mean',
    'conviction':'mean',
    'zhangs_metric':'mean',
    'jaccard':'mean',
    'certainty':'mean',
    'kulczynski':'mean'})
aggregated_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(ABIS_BOOK),(ART_CRAFT_KIT),0.082933,0.00635,0.001108,0.013377,2.116829,0.000581,1.007124,0.567611,0.012583,0.007072,0.094332
1,(ABIS_BOOK),(BLANKET),0.083,0.0083,0.001229,0.014812,1.789964,0.000539,1.006615,0.468098,0.01364,0.006567,0.081601
2,(ABIS_BOOK),(OFFICE_PRODUCTS),0.0857,0.007,0.0011,0.012835,1.833639,0.0005,1.005911,0.497251,0.012009,0.005877,0.084989
3,(ABIS_BOOK),(LAMP),0.08055,0.007175,0.0011,0.0136,1.894131,0.000522,1.006516,0.507598,0.01265,0.006471,0.083373
4,(ABIS_BOOK),(BATTERY),0.0816,0.0146,0.0011,0.01348,0.923315,-9.1e-05,0.998865,-0.082934,0.011567,-0.001136,0.044411


In [12]:
#filter aggregated rules
#remove lift < 1
aggregated_rules = aggregated_rules[aggregated_rules['lift'] >= 1]

#arrange aggregated rules such that rules are in descending order from best rules to worse rules
#sort by descending lift
#sort by descending confi
#sort by descending support
aggregated_rules = aggregated_rules.sort_values(['lift', 'confidence', 'support'], ascending = [False, False, False])

In [13]:
aggregated_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
2892,"(DAIRY_BASED_DRINK, DAIRY_BASED_CHEESE)","(FOOD, BREAD, VEGETABLE)",0.0014,0.0017,0.001,0.714286,420.168067,0.000998,3.49405,0.999019,0.47619,0.713799,0.651261
1124,"(FOOD, BREAD, VEGETABLE)","(DAIRY_BASED_DRINK, DAIRY_BASED_CHEESE)",0.0017,0.0014,0.001,0.588235,420.168067,0.000998,2.425171,0.999319,0.47619,0.587658,0.651261
2571,"(FRUIT, DAIRY_BASED_YOGURT)","(MEAT, VEGETABLE, GROCERY)",0.001636,0.001636,0.001091,0.666667,407.555556,0.001088,2.995093,0.999181,0.5,0.666121,0.666667
7275,"(MEAT, VEGETABLE, GROCERY)","(FRUIT, DAIRY_BASED_YOGURT)",0.001636,0.001636,0.001091,0.666667,407.555556,0.001088,2.995093,0.999181,0.5,0.666121,0.666667
1198,"(BREAD, VEGETABLE, DAIRY_BASED_CHEESE)","(FOOD, DAIRY_BASED_DRINK)",0.0016,0.0016,0.001,0.625,390.625,0.000997,2.6624,0.999038,0.454545,0.624399,0.625


## Frequently Purchased Category for Each Customer

In [14]:
#take input about customer's id
cust_id = input('What is the customer\'s ID?\n') #can test code with: R_01vNIayewjIIKMF

In [15]:
#for each customer, count the number of transactions for each category
cust_count = purchases_subcat.groupby(['Survey ResponseID', 'Category']).size().reset_index(name='Count')
#return the most frequently purchased category for the customer id given as input
cust_freq_cat = cust_count[cust_count['Survey ResponseID'] == cust_id].sort_values(by = ['Count'], ascending = False).iloc[0,1]
print(cust_freq_cat)

IndexError: index 0 is out of bounds for axis 0 with size 0

## Recommend Products

In [None]:
def recomend_product(cust_freq_cat):
  recc_cat = aggregated_rules[aggregated_rules['antecedents'].apply(lambda x: cust_freq_cat in x)].iloc[0, 1]
  recc_cat = list(recc_cat)
  if len(recc_cat) == 2:
    items = []
    for i in range(2):
      if i == 0:
        items.append(purchases_subcat[purchases_subcat['Category'] == recc_cat[i]].groupby('Title').size().reset_index(name='Count').sort_values(by = 'Count', ascending = False).head(1)['Title'].tolist()[0])
      else:
        items += purchases_subcat[purchases_subcat['Category'] == recc_cat[i]].groupby('Title').size().reset_index(name='Count').sort_values(by = 'Count', ascending = False).head(2)['Title'].tolist()
  elif len(recc_cat) == 3:
    items = []
    for i in range(3):
      items.append(purchases_subcat[purchases_subcat['Category'] == recc_cat[i]].groupby('Title').size().reset_index(name='Count').sort_values(by = 'Count', ascending = False).head(1)['Title'].tolist()[0])
  else:
    items = purchases_subcat[purchases_subcat['Category'] == recc_cat[0]].groupby('Title').size().reset_index(name='Count').sort_values(by = 'Count', ascending = False).head(3)['Title']
    items = items.tolist()
  print("You might also like: ")
  for i in range(len(items)):
    print(items[i])
  return(items)

In [None]:
recommend = recomend_product(cust_freq_cat)

You might also like: 
Amazon Basics High-Speed HDMI Cable (18 Gbps, 4K/60Hz) - 6 Feet, Black
JSAUX USB-C to USB A Cable 3.1A Fast Charging [2-Pack 6.6ft], USB Type C Charger Cord Compatible with Samsung Galaxy S20 S10 S9 S8 A73 A51 A13, Note 20 10, LG G8 G7, PS5 Controller USB C Charger-Red
Amazon Basics Nylon Braided Lightning to USB A Cable, MFi Certified Apple iPhone Charger, Dark Gray, 6-Foot


# Upselling

## Cosine similarity

### Loading files and data

In [None]:
purchases = pd.read_csv('data/dataprep_purchases.csv')
purchases['Order Date'] = pd.to_datetime(purchases['Order Date'], format='%Y-%m-%d')
purchases = purchases.drop(purchases.columns[0], axis=1)

Downloading...
From (original): https://drive.google.com/uc?id=17_TZzFAkjQcMVfvOhXt9biAUYcJsSLxi
From (redirected): https://drive.google.com/uc?id=17_TZzFAkjQcMVfvOhXt9biAUYcJsSLxi&confirm=t&uuid=a7c0b349-be24-4821-81d0-1b80d80ca229
To: /content/AmazonPurchases.csv
100%|██████████| 260M/260M [00:05<00:00, 45.3MB/s]


### Recommendation algorithm

In [None]:
df = {
    'Product_id': purchases['ASIN/ISBN (Product Code)'],
    'Product_description': purchases['Title'],
    'Price': purchases['Purchase Price Per Unit'],
    'Category': purchases['Category']
}

df = pd.DataFrame(df)
df = df.dropna()
df['Price'] = df.groupby(['Product_id'])['Price'].transform('max')
df['Purchase count'] = df.groupby(['Product_id'])['Product_id'].transform('count')
df['Rating'] =  np.random.uniform(1, 5, size=df.shape[0])
df['Rating'] = df.groupby(['Product_id'])['Rating'].transform('mean')
df = df.drop_duplicates()
#Select category here
df = df[df['Category'] == 'Baby']
df = df.reset_index(drop=True)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['Product_description'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
def get_recommendations(product_id, cosine_sim=cosine_sim, df=df, metric = "similarity"):
    if metric == "similarity":
      idx = df.index[df['Product_id'] == product_id].tolist()[0]
      sim_scores = list(enumerate(cosine_sim[idx]))
      sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
      recommended_indices = [i[0] for i in sim_scores[:3]]
      return df.iloc[recommended_indices][['Product_description']]
    if metric == "price":
      idx = df.index[df['Product_id'] == product_id].tolist()[0]
      sim_scores = list(enumerate(cosine_sim[idx]))
      sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
      recommended_indices = [i[0] for i in sim_scores[:20]]
      price_list = df.iloc[recommended_indices][['Price','Product_description']]
      price_list = price_list.sort_values(by=['Price'])
      return price_list[:3]
    if metric == "rating":
      idx = df.index[df['Product_id'] == product_id].tolist()[0]
      sim_scores = list(enumerate(cosine_sim[idx]))
      sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
      recommended_indices = [i[0] for i in sim_scores[:20]]
      rating_list = df.iloc[recommended_indices][['Rating','Product_description']]
      rating_list = rating_list.sort_values(by=['Rating'], ascending = False)
      return rating_list[:3]
    if metric == "purchased":
      idx = df.index[df['Product_id'] == product_id].tolist()[0]
      sim_scores = list(enumerate(cosine_sim[idx]))
      sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
      recommended_indices = [i[0] for i in sim_scores[:20]]
      purchased_list = df.iloc[recommended_indices][['Purchase count','Product_description']]
      purchased_list = purchased_list.sort_values(by=['Purchase count'],ascending = False)
      return purchased_list[:3]


### Recommend products

In [None]:
#Highest similarity output
product_id = 'B011F7JK52'  # Replace with the ID of the product being purchased
recommended_products = get_recommendations(product_id,cosine_sim,df,"similarity")
pd.set_option("display.max_colwidth", None)
print("Recommended products for the customer:")
print(recommended_products)
#Baby Bowls and Matching Lids - Suction Cup Bowls for Babies, Toddlers & Infants - Set of 3 Sizes - 6 Pieces (item name)

Recommended products for the customer:
                                                                                                                                                                                     Product_description
1503                                                                                                                    SUPABOWL Baby Suction Bowls for Toddlers, Baby Bowls First Stage w Lids & Spoons
2585  Baby Bowls Feeding Set with Baby Fork and Baby Spoons BPA Free | Toddler Bowls with Baby Food Masher | Suction Baby Bowl for Toddlers with Toddler Utensils |Baby Bowls with Suction | Baby Shower
982                                                                                                                                                        2 Pack Silicone Baby Bowls with Super Suction


In [None]:
#Lowest price output
product_id = 'B011F7JK52'  # Replace with the ID of the product being purchased
recommended_products = get_recommendations(product_id,cosine_sim,df,"price")
pd.set_option("display.max_colwidth", None)
print("Recommended products for the customer:")
print(recommended_products)
#Baby Bowls and Matching Lids - Suction Cup Bowls for Babies, Toddlers & Infants - Set of 3 Sizes - 6 Pieces (item name)

Recommended products for the customer:
      Price  \
2489   3.97   
5118   5.99   
4427   6.95   

                                                                                                                          Product_description  
2489                 Lofca Food Grade Silicone Suction Bowls Set with Spoons - 2 Piece Baby Feeding Set for Babies Kids Toddlers Self Feeding  
5118                                                                                                  NUK Mash and Serve Bowl, Bowls, 1 Count  
4427  PandaEar Stay Put Spill Proof Stackable Baby Suction Bowls 3 Sizes for Toddlers with Silicone Feeding Utensils and Secure Lids BPA Free  


In [None]:
#Highest rating output
product_id = 'B011F7JK52'  # Replace with the ID of the product being purchased
recommended_products = get_recommendations(product_id,cosine_sim,df,"rating")
pd.set_option("display.max_colwidth", None)
print("Recommended products for the customer:")
print(recommended_products)
#Baby Bowls and Matching Lids - Suction Cup Bowls for Babies, Toddlers & Infants - Set of 3 Sizes - 6 Pieces (item name)

Recommended products for the customer:
        Rating  \
3775  4.567887   
5118  4.433689   
4427  4.366013   

                                                                                                                          Product_description  
3775                              Medela Breastmilk Collection Storage Feeding Bottle with Lids-8 Pack (8 Bottles and 8 Lids)w/lid 8oz /250ml  
5118                                                                                                  NUK Mash and Serve Bowl, Bowls, 1 Count  
4427  PandaEar Stay Put Spill Proof Stackable Baby Suction Bowls 3 Sizes for Toddlers with Silicone Feeding Utensils and Secure Lids BPA Free  


In [None]:
#Most purchased products output
product_id = 'B011F7JK52'  # Replace with the ID of the product being purchased
recommended_products = get_recommendations(product_id,cosine_sim,df,"purchased")
pd.set_option("display.max_colwidth", None)
print("Recommended products for the customer:")
print(recommended_products)
#Baby Bowls and Matching Lids - Suction Cup Bowls for Babies, Toddlers & Infants - Set of 3 Sizes - 6 Pieces (item name)

Recommended products for the customer:
      Purchase count  \
676                9   
2007               4   
2866               4   

                                                                                                                                                       Product_description  
676                                                                                     Munchkin Stay Put Suction Bowls for Babies and Toddlers, 3 Pack, Blue/Green/Purple  
2007                                                                                                                           NUK First Essentials Bunch-a-Bowls, 4 Count  
2866  UpwardBaby Bowls with Suction - 4 Piece Silicone Set with Spoon for Babies Kids Toddlers - BPA Free Baby Led Weaning Food Plates - First Stage Self Feeding Utensils  
