## Apriori Algorithm

Question: Can we predict which products a customer will most likely purchase together within various product segments?

Goal: Help Amazon identify products frequently bought together by customers to increase sales and revenues (cross sell) by analyzing Amazon Marketplace segment data.

In [1]:
# Import dependencies
from sqlalchemy import create_engine
import pandas as pd

# Importing apriori dependencies
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
# Connect to postgres/RDS database 
POSTGRES_ADDRESS = 'mypostgresdb.cwuhtytzosg8.us-east-2.rds.amazonaws.com' 
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'root' 
POSTGRES_PASSWORD = 'XXXX' 
POSTGRES_DBNAME = 'my_data_class_db'

# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
.format(username=POSTGRES_USERNAME,
password=POSTGRES_PASSWORD,
ipaddress=POSTGRES_ADDRESS,
port=POSTGRES_PORT,
dbname=POSTGRES_DBNAME))
# Create the connection
cnx = create_engine(postgres_str)

### Video Analysis 

In [3]:
# Load database for sentiment/topic analysis
df_videos = pd.read_sql_query('''SELECT * FROM videos_apriori_analysis''', con=cnx)
df_videos.head()

Unnamed: 0,customer_id,product_id,quantity
0,25551507,0788812807,1
1,25551507,6302320402,1
2,31354506,6301442733,1
3,42622115,B00003CX7L,1
4,27446106,0788806270,1


In [5]:
# Create pivot table to run algorithm 
apriori_table = df_videos.groupby(["customer_id","product_id"])["quantity"].sum().unstack().reset_index().fillna(0).set_index("customer_id")
apriori_table

product_id,0738920525,0767800958,0767815963,076781598X,076783822X,0773386777,0780614097,078062128X,0780625900,0780626028,...,B000E4C2TY,B000FP5PPA,B000H61X62,B000QX1SUC,B000TOSN7Y,B0019KBJOQ,B004J0SG6C,B00AVPYKFU,B00B9LNBWS,B00BBND56G
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15160,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19893,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53094662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53094728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53095639,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53096090,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Cleaning Apriori table to remove unnecessary numbers
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
apriori_cleaned_videos = apriori_table.applymap(encode_units)
apriori_cleaned_videos

product_id,0738920525,0767800958,0767815963,076781598X,076783822X,0773386777,0780614097,078062128X,0780625900,0780626028,...,B000E4C2TY,B000FP5PPA,B000H61X62,B000QX1SUC,B000TOSN7Y,B0019KBJOQ,B004J0SG6C,B00AVPYKFU,B00B9LNBWS,B00BBND56G
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15160,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19893,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20767,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26626,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41909,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53094662,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
53094728,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
53095639,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
53096090,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
# Running apriori algorithm on cleaned dataset. 
item_association_videos = apriori(apriori_cleaned_videos, min_support=0.0001, use_colnames=True)
item_association_videos

Unnamed: 0,support,itemsets
0,0.002396,(0738920525)
1,0.001322,(0767800958)
2,0.001652,(0767815963)
3,0.001652,(076781598X)
4,0.002479,(076783822X)
...,...,...
1615,0.000165,"(0788812408, 0788805533, 155890641X, 630027419..."
1616,0.000165,"(0788812408, 0788805533, 155890641X, 630027419..."
1617,0.000165,"(0788812408, 155890641X, 6300274195, 078880219..."
1618,0.000165,"(0788812408, 0788805533, 155890641X, 630027419..."


In [8]:
# Apriori association results table and confidence levels. 
apriori_rules_videos = association_rules(item_association_videos, metric="lift", min_threshold=1)

In [9]:
# Show output by descending order of confidence level
apriori_rules_videos = apriori_rules_videos.sort_values(["confidence"], ascending=False)
apriori_rules_videos 

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
9175,"(0788802194, 0788812408, 0788812807)","(6304401132, 0788805533, 0788806270)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf
11817,"(0788812408, 155890641X, 0788802194, 630440113...",(6300274195),0.000165,0.001157,0.000165,1.000000,864.571429,0.000165,inf
11819,"(155890641X, 6300274195, 0788802194, 630440113...",(0788812408),0.000165,0.003139,0.000165,1.000000,318.526316,0.000165,inf
11820,"(155890641X, 0788812408, 0788802194, 6300274195)","(6304401132, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf
11821,"(155890641X, 0788812408, 6304401132, 6300274195)","(0788802194, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf
...,...,...,...,...,...,...,...,...,...
466,(B00004U8H5),(B000083C59),0.004957,0.002809,0.000165,0.033333,11.866667,0.000151,1.031577
322,(6302208661),(6302967945),0.004957,0.002726,0.000165,0.033333,12.226263,0.000152,1.031662
430,(B00004U8H5),(6305107807),0.004957,0.001404,0.000165,0.033333,23.733333,0.000158,1.033030
174,(094567189X),(6303243606),0.005040,0.002892,0.000165,0.032787,11.338642,0.000151,1.030909


Interpreataion of 1st row of results, VHS bought together:

0788802194: Cinderella

0788812408: The Little Mermaid

0788812807: Lady and the Tramp

6304401132: Sleeping Beauty

0788805533: The Lion King II

0788806270: Bambi

In [12]:
# Add category column (to be used in viz)
apriori_rules_videos['Category'] = 'Videos'
apriori_rules_videos

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
9175,"(0788802194, 0788812408, 0788812807)","(6304401132, 0788805533, 0788806270)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11817,"(0788812408, 155890641X, 0788802194, 630440113...",(6300274195),0.000165,0.001157,0.000165,1.000000,864.571429,0.000165,inf,Videos
11819,"(155890641X, 6300274195, 0788802194, 630440113...",(0788812408),0.000165,0.003139,0.000165,1.000000,318.526316,0.000165,inf,Videos
11820,"(155890641X, 0788812408, 0788802194, 6300274195)","(6304401132, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11821,"(155890641X, 0788812408, 6304401132, 6300274195)","(0788802194, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
...,...,...,...,...,...,...,...,...,...,...
466,(B00004U8H5),(B000083C59),0.004957,0.002809,0.000165,0.033333,11.866667,0.000151,1.031577,Videos
322,(6302208661),(6302967945),0.004957,0.002726,0.000165,0.033333,12.226263,0.000152,1.031662,Videos
430,(B00004U8H5),(6305107807),0.004957,0.001404,0.000165,0.033333,23.733333,0.000158,1.033030,Videos
174,(094567189X),(6303243606),0.005040,0.002892,0.000165,0.032787,11.338642,0.000151,1.030909,Videos


In [13]:
apriori_rules_videos.to_csv(r'C:\Users\li_mi\Class\final_project\dev\apriori_rules_videos.csv', index = False)

### Personal Care Analysis

In [14]:
# Get data
df_personal_care = pd.read_sql_query('''SELECT * FROM personal_care_appliances_apriori_analysis''', con=cnx)
df_personal_care.head()

Unnamed: 0,customer_id,product_id,quantity
0,32114233,B00OYRW4UE,1
1,18125776,B0000537JQ,1
2,19917519,B00HXXO332,1
3,18277171,B00EOB0JA2,1
4,2592955,B00HES9CMS,1


In [15]:
# Create pivot table to run algorithm 
apriori_table_personal_care = df_personal_care.groupby(["customer_id","product_id"])["quantity"].sum().unstack().reset_index().fillna(0).set_index("customer_id")
apriori_table_personal_care

product_id,B0000532OT,B0000532OV,B0000537JP,B0000537JQ,B000068PBJ,B00008J1ZZ,B00009RB1I,B0000YS1BG,B0000YXUW6,B00012FJ9O,...,B00P6TUO5G,B00QH8QJ2C,B00QH96JQC,B00QH9M6QY,B00QR4JRHU,B00R3PFF4Q,B00RWIWFFQ,B00S02EJZW,B00TOYNBA4,B00XZJ2G46
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11344,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12674,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13044,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14147,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53092127,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53094082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53094709,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53095826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# Cleaning Apriori table to remove unnecessary numbers
apriori_cleaned_pcare = apriori_table_personal_care.applymap(encode_units)

In [17]:
# Running apriori algorithm on cleaned dataset. 
item_association_pcare = apriori(apriori_cleaned_pcare, min_support=0.0001, use_colnames=True)
item_association_pcare

Unnamed: 0,support,itemsets
0,0.000834,(B0000532OT)
1,0.001252,(B0000532OV)
2,0.001371,(B0000537JP)
3,0.005275,(B0000537JQ)
4,0.002176,(B000068PBJ)
...,...,...
405,0.000119,"(B004O23YE4, B00H8ZTLLY)"
406,0.000149,"(B004O25RM6, B004O25RJ4)"
407,0.000119,"(B00H8ZTLLY, B004O25RJ4)"
408,0.000119,"(B004O275X0, B004O2762U)"


In [22]:
# Apriori association results table and confidence levels. 
apriori_rules_pcare = association_rules(item_association_pcare, metric="lift", min_threshold=1)

In [23]:
# Show output by descending order of confidence level
apriori_rules_pcare = apriori_rules_pcare.sort_values(["confidence"], ascending=False)
apriori_rules_pcare 

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
12,(B004O25RM6),(B004O25RJ4),0.000924,0.001699,0.000149,0.16129,94.949066,0.000147,1.190282
8,(B004O23YE4),(B004O276PW),0.004053,0.009715,0.000358,0.088235,9.08201,0.000318,1.086119
13,(B004O25RJ4),(B004O25RM6),0.001699,0.000924,0.000149,0.087719,94.949066,0.000147,1.095141
17,(B004O2762U),(B004O275X0),0.001609,0.001997,0.000119,0.074074,37.097844,0.000116,1.077844
0,(B000FFYC6S),(B000Y9PERQ),0.002474,0.005037,0.000179,0.072289,14.353033,0.000166,1.072493
15,(B004O25RJ4),(B00H8ZTLLY),0.001699,0.003993,0.000119,0.070175,17.572663,0.000112,1.071177
16,(B004O275X0),(B004O2762U),0.001997,0.001609,0.000119,0.059701,37.097844,0.000116,1.061781
3,(B004O23Y9E),(B004O276PW),0.003576,0.009715,0.000149,0.041667,4.288727,0.000114,1.03334
4,(B004PV7PIK),(B004O23Y9E),0.002891,0.003576,0.000119,0.041237,11.530928,0.000109,1.039281
9,(B004O276PW),(B004O23YE4),0.009715,0.004053,0.000358,0.03681,9.08201,0.000318,1.034009


In [24]:
# Add category column (to be used in viz)
apriori_rules_pcare['Category'] = 'Personal Care'
apriori_rules_pcare

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
12,(B004O25RM6),(B004O25RJ4),0.000924,0.001699,0.000149,0.16129,94.949066,0.000147,1.190282,Personal Care
8,(B004O23YE4),(B004O276PW),0.004053,0.009715,0.000358,0.088235,9.08201,0.000318,1.086119,Personal Care
13,(B004O25RJ4),(B004O25RM6),0.001699,0.000924,0.000149,0.087719,94.949066,0.000147,1.095141,Personal Care
17,(B004O2762U),(B004O275X0),0.001609,0.001997,0.000119,0.074074,37.097844,0.000116,1.077844,Personal Care
0,(B000FFYC6S),(B000Y9PERQ),0.002474,0.005037,0.000179,0.072289,14.353033,0.000166,1.072493,Personal Care
15,(B004O25RJ4),(B00H8ZTLLY),0.001699,0.003993,0.000119,0.070175,17.572663,0.000112,1.071177,Personal Care
16,(B004O275X0),(B004O2762U),0.001997,0.001609,0.000119,0.059701,37.097844,0.000116,1.061781,Personal Care
3,(B004O23Y9E),(B004O276PW),0.003576,0.009715,0.000149,0.041667,4.288727,0.000114,1.03334,Personal Care
4,(B004PV7PIK),(B004O23Y9E),0.002891,0.003576,0.000119,0.041237,11.530928,0.000109,1.039281,Personal Care
9,(B004O276PW),(B004O23YE4),0.009715,0.004053,0.000358,0.03681,9.08201,0.000318,1.034009,Personal Care


In [25]:
apriori_rules_pcare.to_csv(r'C:\Users\li_mi\Class\final_project\dev\apriori_rules_personal_care.csv', index = False)

Interpretaion of 1st row of results:

B004O25RM6: doTERRA - White Fir Essential Oil - Helps to Balance Emotions and Soothe Anxious Feelings, Provides Soothing Effect in Massage, Relaxing Aroma; For Diffusion, Internal, or Topical Use - 15 ml

B004O25RJ4: doTERRA - Rosemary Essential Oil - 15 mL


### Video games Analysis 

In [26]:
#  Upload df
df_video_games = pd.read_sql_query('''SELECT * FROM video_games_apriori_analysis''', con=cnx)
df_video_games.head()

Unnamed: 0,customer_id,product_id,quantity
0,12039526,B001CXYMFS,1
1,48880662,B0053OLY9O,1
2,45205407,B00KVP78FE,1
3,10548951,B00273Z9WM,1
4,50165446,B003O6E800,1


In [27]:
# Create pivot table to run algorithm 
apriori_video_games = df_video_games.groupby(["customer_id","product_id"])["quantity"].sum().unstack().reset_index().fillna(0).set_index("customer_id")
apriori_video_games

product_id,B0009VXBAQ,B000ERVMI8,B000FQ9R4E,B000M4KIME,B000NUBY0C,B000OYMYZQ,B000XJNTNS,B000ZKA0J6,B0013OL0BK,B0015AARJI,...,B00JK00S0S,B00KSQHX1K,B00KTNSKZU,B00KVOVBGM,B00KVP78FE,B00KVSQ848,B00MU1YENG,B00NFXON1Q,B00O9JLAX4,B00RSXRLUE
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10481,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10670,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
11026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53092633,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53092767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53093124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53093730,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
# Cleaning Apriori table to remove unnecessary numbers
apriori_video_games = apriori_video_games.applymap(encode_units)

In [29]:
# Running apriori algorithm on cleaned dataset. 
item_association_video_games = apriori(apriori_video_games, min_support=0.0001, use_colnames=True)

In [30]:
#  Apriori association results table and confidence levels. 
apriori_rules_video_games = association_rules(item_association_video_games, metric="lift", min_threshold=1)

In [31]:
# Show output by descending order of confidence level
apriori_rules_video_games  = apriori_rules_video_games.sort_values(["confidence"], ascending=False)
apriori_rules_video_games

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
692,"(B00BGA9X9W, B00JK00S0S)",(B00BGA9WK2),0.000213,0.058590,0.000117,0.551724,9.416636,0.000105,2.100068
674,"(B00BGA9X9W, B00BI83EVU)",(B00BGA9WK2),0.000308,0.058590,0.000161,0.523810,8.940199,0.000143,1.976960
668,"(B00BGA9X9W, B00BGAA3S2)",(B00BGA9WK2),0.000896,0.058590,0.000382,0.426230,7.274737,0.000329,1.640743
661,"(B003O6FV8S, B0086V5UF0)",(B0053BG26C),0.000257,0.009579,0.000103,0.400000,41.757548,0.000100,1.650701
679,"(B00BGA9WK2, B00ENFVJJO)",(B00BGA9X9W),0.000749,0.013763,0.000279,0.372549,27.068716,0.000269,1.571815
...,...,...,...,...,...,...,...,...,...
657,(B0050SXKU4),"(B003O6CBIG, B007XVTR5S)",0.020259,0.000448,0.000110,0.005435,12.137741,0.000101,1.005014
681,(B00BGA9WK2),"(B00BGA9X9W, B00ENFVJJO)",0.058590,0.000844,0.000279,0.004761,5.639746,0.000229,1.003935
675,(B00BGA9WK2),"(B00BGA9X9W, B00BI83EVU)",0.058590,0.000308,0.000161,0.002756,8.940199,0.000143,1.002455
693,(B00BGA9WK2),"(B00BGA9X9W, B00JK00S0S)",0.058590,0.000213,0.000117,0.002005,9.416636,0.000105,1.001795


In [32]:
# Add category column (to be used in viz)
apriori_rules_video_games['Category'] = 'Video Games'
apriori_rules_video_games

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
692,"(B00BGA9X9W, B00JK00S0S)",(B00BGA9WK2),0.000213,0.058590,0.000117,0.551724,9.416636,0.000105,2.100068,Video Games
674,"(B00BGA9X9W, B00BI83EVU)",(B00BGA9WK2),0.000308,0.058590,0.000161,0.523810,8.940199,0.000143,1.976960,Video Games
668,"(B00BGA9X9W, B00BGAA3S2)",(B00BGA9WK2),0.000896,0.058590,0.000382,0.426230,7.274737,0.000329,1.640743,Video Games
661,"(B003O6FV8S, B0086V5UF0)",(B0053BG26C),0.000257,0.009579,0.000103,0.400000,41.757548,0.000100,1.650701,Video Games
679,"(B00BGA9WK2, B00ENFVJJO)",(B00BGA9X9W),0.000749,0.013763,0.000279,0.372549,27.068716,0.000269,1.571815,Video Games
...,...,...,...,...,...,...,...,...,...,...
657,(B0050SXKU4),"(B003O6CBIG, B007XVTR5S)",0.020259,0.000448,0.000110,0.005435,12.137741,0.000101,1.005014,Video Games
681,(B00BGA9WK2),"(B00BGA9X9W, B00ENFVJJO)",0.058590,0.000844,0.000279,0.004761,5.639746,0.000229,1.003935,Video Games
675,(B00BGA9WK2),"(B00BGA9X9W, B00BI83EVU)",0.058590,0.000308,0.000161,0.002756,8.940199,0.000143,1.002455,Video Games
693,(B00BGA9WK2),"(B00BGA9X9W, B00JK00S0S)",0.058590,0.000213,0.000117,0.002005,9.416636,0.000105,1.001795,Video Games


In [33]:
apriori_rules_video_games.to_csv(r'C:\Users\li_mi\Class\final_project\dev\apriori_rules_video_games.csv', index = False)

Interpretaion of 1st row of results:
    
B00JK00S0S: The Last of Us Remastered - PlayStation 4 

B00BGA9X9W: DualShock 4 Wireless Controller for PlayStation 4 - Jet Black 

B00BGA9WK2: PlayStation 4 500GB Console

### Watches Analysis 

In [34]:
#  Upload df
df_watches = pd.read_sql_query('''SELECT * FROM watches_apriori_analysis''', con=cnx)
df_watches.head()

Unnamed: 0,customer_id,product_id,quantity
0,27324930,B00DKYC7TK,1
1,7211452,B000EQS1JW,1
2,912779,B005JVP0FU,1
3,805483,B000JQJS6M,1
4,32555369,B00NC8PMUK,1


In [35]:
# Create pivot table to run algorithm 
apriori_watches = df_watches.groupby(["customer_id","product_id"])["quantity"].sum().unstack().reset_index().fillna(0).set_index("customer_id")

In [36]:
# Cleaning Apriori table to remove unnecessary numbers
apriori_watches = apriori_watches.applymap(encode_units)

In [37]:
# Running apriori algorithm on cleaned dataset. 
item_association_watches = apriori(apriori_watches, min_support=0.0001, use_colnames=True)

In [38]:
#  Apriori association results table and confidence levels. 
apriori_rules_watches = association_rules(item_association_watches, metric="lift", min_threshold=1)

In [39]:
# Show output by descending order of confidence level
apriori_rules_watches  = apriori_rules_watches.sort_values(["confidence"], ascending=False)
apriori_rules_watches

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
206,"(B004P0UUE2, B005KKLEWS)",(B004P0UUBK),0.000275,0.004040,0.000122,0.444444,110.016835,0.000121,1.792728
194,"(B003EKIS4S, B003EKNMAI)",(B008D902Q2),0.000286,0.017822,0.000122,0.428571,24.047346,0.000117,1.718812
205,"(B004P0UUBK, B005KKLEWS)",(B004P0UUE2),0.000296,0.004458,0.000122,0.413793,92.819380,0.000121,1.698277
200,"(B003EKIU3W, B003EKNMAI)",(B008D902Q2),0.000337,0.017822,0.000122,0.363636,20.403809,0.000116,1.543423
198,"(B008D902Q2, B003EKIU3W)",(B003EKNMAI),0.000418,0.007753,0.000122,0.292683,37.750321,0.000119,1.402832
...,...,...,...,...,...,...,...,...,...
201,(B008D902Q2),"(B003EKIU3W, B003EKNMAI)",0.017822,0.000337,0.000122,0.006869,20.403809,0.000116,1.006577
123,(B005JVP0LE),(B004D35W8A),0.019740,0.004142,0.000122,0.006202,1.497308,0.000041,1.002073
109,(B004YM2FV2),(B003S7T8NM),0.028115,0.004254,0.000153,0.005443,1.279419,0.000033,1.001195
44,(B000T9VK56),(B000HFRO8O),0.042540,0.003387,0.000224,0.005276,1.557706,0.000080,1.001899


In [40]:
# Add category column (to be used in viz)
apriori_rules_watches['Category'] = 'Watches'
apriori_rules_watches

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
206,"(B004P0UUE2, B005KKLEWS)",(B004P0UUBK),0.000275,0.004040,0.000122,0.444444,110.016835,0.000121,1.792728,Watches
194,"(B003EKIS4S, B003EKNMAI)",(B008D902Q2),0.000286,0.017822,0.000122,0.428571,24.047346,0.000117,1.718812,Watches
205,"(B004P0UUBK, B005KKLEWS)",(B004P0UUE2),0.000296,0.004458,0.000122,0.413793,92.819380,0.000121,1.698277,Watches
200,"(B003EKIU3W, B003EKNMAI)",(B008D902Q2),0.000337,0.017822,0.000122,0.363636,20.403809,0.000116,1.543423,Watches
198,"(B008D902Q2, B003EKIU3W)",(B003EKNMAI),0.000418,0.007753,0.000122,0.292683,37.750321,0.000119,1.402832,Watches
...,...,...,...,...,...,...,...,...,...,...
201,(B008D902Q2),"(B003EKIU3W, B003EKNMAI)",0.017822,0.000337,0.000122,0.006869,20.403809,0.000116,1.006577,Watches
123,(B005JVP0LE),(B004D35W8A),0.019740,0.004142,0.000122,0.006202,1.497308,0.000041,1.002073,Watches
109,(B004YM2FV2),(B003S7T8NM),0.028115,0.004254,0.000153,0.005443,1.279419,0.000033,1.001195,Watches
44,(B000T9VK56),(B000HFRO8O),0.042540,0.003387,0.000224,0.005276,1.557706,0.000080,1.001899,Watches


In [41]:
apriori_rules_watches.to_csv(r'C:\Users\li_mi\Class\final_project\dev\apriori_rules_watches.csv', index = False)

higher confidence analysis:

XOXO Women's XO5429 Rhinestone-Accented Two-Tone Bracelet Watch (B005KKLEWS)

XOXO Women's XO5302A Rhinestone-Accented Gold-Tone Bracelet Watch (B004P0UUE2)

XOXO Women's XO5301A Rhinestone-Accented Silver-Tone Bracelet Watch (B004P0UUBK)

In [42]:
# Concatenate dfs for visualization 
df_viz_2 = pd.concat([apriori_rules_videos,apriori_rules_pcare,apriori_rules_video_games,apriori_rules_watches], axis=0)
df_viz_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
9175,"(0788802194, 0788812408, 0788812807)","(6304401132, 0788805533, 0788806270)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11817,"(0788812408, 155890641X, 0788802194, 630440113...",(6300274195),0.000165,0.001157,0.000165,1.000000,864.571429,0.000165,inf,Videos
11819,"(155890641X, 6300274195, 0788802194, 630440113...",(0788812408),0.000165,0.003139,0.000165,1.000000,318.526316,0.000165,inf,Videos
11820,"(155890641X, 0788812408, 0788802194, 6300274195)","(6304401132, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11821,"(155890641X, 0788812408, 6304401132, 6300274195)","(0788802194, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
...,...,...,...,...,...,...,...,...,...,...
201,(B008D902Q2),"(B003EKIU3W, B003EKNMAI)",0.017822,0.000337,0.000122,0.006869,20.403809,0.000116,1.006577,Watches
123,(B005JVP0LE),(B004D35W8A),0.019740,0.004142,0.000122,0.006202,1.497308,0.000041,1.002073,Watches
109,(B004YM2FV2),(B003S7T8NM),0.028115,0.004254,0.000153,0.005443,1.279419,0.000033,1.001195,Watches
44,(B000T9VK56),(B000HFRO8O),0.042540,0.003387,0.000224,0.005276,1.557706,0.000080,1.001899,Watches


In [44]:
# Filter results confidence level > 60%
df_viz_2 = df_viz_2[df_viz_2['confidence'] >= 0.6]
df_viz_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Category
9175,"(0788802194, 0788812408, 0788812807)","(6304401132, 0788805533, 0788806270)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11817,"(0788812408, 155890641X, 0788802194, 630440113...",(6300274195),0.000165,0.001157,0.000165,1.000000,864.571429,0.000165,inf,Videos
11819,"(155890641X, 6300274195, 0788802194, 630440113...",(0788812408),0.000165,0.003139,0.000165,1.000000,318.526316,0.000165,inf,Videos
11820,"(155890641X, 0788812408, 0788802194, 6300274195)","(6304401132, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
11821,"(155890641X, 0788812408, 6304401132, 6300274195)","(0788802194, 0788812807)",0.000165,0.000165,0.000165,1.000000,6052.000000,0.000165,inf,Videos
...,...,...,...,...,...,...,...,...,...,...
7767,"(6304401132, 0788806270)","(0788812408, 0788812807, 6300274195)",0.000248,0.000165,0.000165,0.666667,4034.666667,0.000165,2.999504,Videos
3883,"(6304500831, 0788812807)","(6302794331, 6303314015)",0.000248,0.000165,0.000165,0.666667,4034.666667,0.000165,2.999504,Videos
3872,"(6304500831, 0788812807)","(155890641X, 6302158095)",0.000248,0.000248,0.000165,0.666667,2689.777778,0.000165,2.999256,Videos
644,"(155890641X, 0788802194)",(6300274195),0.000413,0.001157,0.000248,0.600000,518.742857,0.000247,2.497108,Videos


In [46]:
df_viz_2  = df_viz_2.drop(['antecedent support', 'consequent support', 'support','lift','leverage','conviction'], axis=1)
df_viz_2

Unnamed: 0,antecedents,consequents,confidence,Category
9175,"(0788802194, 0788812408, 0788812807)","(6304401132, 0788805533, 0788806270)",1.000000,Videos
11817,"(0788812408, 155890641X, 0788802194, 630440113...",(6300274195),1.000000,Videos
11819,"(155890641X, 6300274195, 0788802194, 630440113...",(0788812408),1.000000,Videos
11820,"(155890641X, 0788812408, 0788802194, 6300274195)","(6304401132, 0788812807)",1.000000,Videos
11821,"(155890641X, 0788812408, 6304401132, 6300274195)","(0788802194, 0788812807)",1.000000,Videos
...,...,...,...,...
7767,"(6304401132, 0788806270)","(0788812408, 0788812807, 6300274195)",0.666667,Videos
3883,"(6304500831, 0788812807)","(6302794331, 6303314015)",0.666667,Videos
3872,"(6304500831, 0788812807)","(155890641X, 6302158095)",0.666667,Videos
644,"(155890641X, 0788802194)",(6300274195),0.600000,Videos


In [47]:
df_viz_2.to_csv(r'C:\Users\li_mi\Class\final_project\dev\df_viz_2.csv', index = False)