# Business Understanding
Tujuan utama dari final project ini adalah untuk menganalisis pola pembelian di tiga outlet coffee shop di Amerika berdasarkan data transaksi yang dikumpulkan selama bulan April tahun 2019. Sementara itu, tujuan bisnisnya adalah untuk merancang produk bundling berdasarkan hasil analisis pola pembelian pelanggan dari pemodelan yang sudah dibuat. Dengan adanya produk bundling pada coffee shop diharapkan dapat mengoptimalkan penjualan dan membantu coffee shop untuk mencapai keuntungan maksimal.

Untuk menganalisis pola pembelian pelanggan dapat menggunakan metode asosiasi dan algoritma yang paling umum digunakan adalah algoritma apriori.

# Data Understanding

In [None]:
# Library

import pandas as pd
import numpy as np

In [None]:
#Load data
from google.colab import drive
drive.mount ('/content/drive')

In [None]:
# Load Data from Drive
path='/content/drive/MyDrive/coffeeshop_fix2.xlsx'

# Membentuk Data Frame
df = pd.read_excel(path)
df.head()

  and should_run_async(code)


Unnamed: 0,id_transaksi,tanggal_transaksi,id_outlet,kota_outlet,id_customer,id_produk,kategori_produk,nama_produk,quantity,harga_jual,total_harga
0,1745,2019-04-01,8,New York,8089,43,Tea,Lemon Grass Lg,2,3.0,6.0
1,73,2019-04-01,8,New York,8446,78,Bakery,Scottish Cream Scone,1,4.5,4.5
2,820,2019-04-01,3,Long Island City,466,74,Bakery,Ginger Biscotti,1,3.5,3.5
3,1619,2019-04-01,8,New York,8200,58,Drinking Chocolate,Dark chocolate Rg,1,3.5,3.5
4,1291,2019-04-01,3,Long Island City,428,44,Tea,Peppermint Rg,2,2.5,5.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24852 entries, 0 to 24851
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id_transaksi       24852 non-null  int64         
 1   tanggal_transaksi  24852 non-null  datetime64[ns]
 2   id_outlet          24852 non-null  int64         
 3   kota_outlet        24852 non-null  object        
 4   id_customer        24852 non-null  int64         
 5   id_produk          24852 non-null  int64         
 6   kategori_produk    24852 non-null  object        
 7   nama_produk        24852 non-null  object        
 8   quantity           24852 non-null  int64         
 9   harga_jual         24852 non-null  float64       
 10  total_harga        24852 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(5), object(3)
memory usage: 2.1+ MB


  and should_run_async(code)


# Data Preparation

In [None]:
df.isnull().sum()

  and should_run_async(code)


id_transaksi         0
tanggal_transaksi    0
id_outlet            0
kota_outlet          0
id_customer          0
id_produk            0
kategori_produk      0
nama_produk          0
quantity             0
harga_jual           0
total_harga          0
dtype: int64

In [None]:
df.isna().sum()

  and should_run_async(code)


id_transaksi         0
tanggal_transaksi    0
id_outlet            0
kota_outlet          0
id_customer          0
id_produk            0
kategori_produk      0
nama_produk          0
quantity             0
harga_jual           0
total_harga          0
dtype: int64

In [None]:
df.id_outlet.unique()

  and should_run_async(code)


array([8, 3, 5])

Id Outlet 3 = Outlet Broadway - Long Island City <br>
Id Outlet 5 = Outlet Church Street - New York <br>
Id Outlet 8 = Outlet Avenue - New York

## Transformasi Data Outlet Broadway - Long Island City

In [None]:
# Transformasi Data
coffeeshop3=(df[df['id_outlet']==3].groupby(['id_transaksi', 'kategori_produk'])['quantity'].count()\
            .unstack().reset_index().fillna(0)\
            .set_index('id_transaksi')
            )

coffeeshop3.head()

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.0,1.0,8.0,1.0,1.0,1.0,0.0,0.0,4.0
2,3.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,6.0
3,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,5.0
4,1.0,0.0,4.0,0.0,1.0,1.0,0.0,1.0,4.0
5,3.0,0.0,5.0,0.0,1.0,1.0,0.0,0.0,5.0


In [None]:
# Encoded data
def encode_units(x):
  if x <= 0:
    return 0
  if x >= 0:
    return 1

coffeeshop3_sets = coffeeshop3.applymap(encode_units)
coffeeshop3_sets.head(5)

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,1,1,1,1,1,0,0,1
2,1,0,1,0,0,0,0,0,1
3,0,0,1,1,0,0,0,0,1
4,1,0,1,0,1,1,0,1,1
5,1,0,1,0,1,1,0,0,1


## Transformasi Data Outlet Church Street - New York

In [None]:
# Transformasi Data
coffeeshop5=(df[df['id_outlet']==5].groupby(['id_transaksi', 'kategori_produk'])['quantity'].count()\
           .unstack().reset_index().fillna(0)\
           .set_index('id_transaksi')
           )

coffeeshop5.head()

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.0,0.0,4.0,0.0,2.0,0.0,0.0,0.0,2.0
2,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,5.0
3,1.0,0.0,3.0,0.0,1.0,2.0,0.0,0.0,3.0
4,3.0,2.0,5.0,0.0,1.0,0.0,0.0,0.0,1.0
5,4.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,3.0


In [None]:
# Encoded data
def encode_units(x):
  if x <= 0:
    return 0
  if x >= 0:
    return 1

coffeeshop5_sets = coffeeshop5.applymap(encode_units)
coffeeshop5_sets.head(5)

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,0,1,0,1,0,0,0,1
2,1,0,1,0,0,0,0,0,1
3,1,0,1,0,1,1,0,0,1
4,1,1,1,0,1,0,0,0,1
5,1,0,1,0,1,0,0,0,1


## Transformasi Data Outlet Avenue - New York

In [None]:
# Transformasi Data
coffeeshop8=(df[df['id_outlet']==8].groupby(['id_transaksi', 'kategori_produk'])['quantity'].count()\
           .unstack().reset_index().fillna(0)\
           .set_index('id_transaksi')
           )

coffeeshop8.head()

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,3.0
2,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,3.0
3,0.0,0.0,4.0,2.0,0.0,2.0,0.0,0.0,3.0
4,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,4.0
5,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0


In [None]:
# Encoded data
def encode_units(x):
  if x <= 0:
    return 0
  if x >= 0:
    return 1

coffeeshop8_sets = coffeeshop8.applymap(encode_units)
coffeeshop8_sets.head(5)

  and should_run_async(code)


kategori_produk,Bakery,Branded,Coffee,Coffee beans,Drinking Chocolate,Flavours,Loose Tea,Packaged Chocolate,Tea
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,0,1,1,0,0,0,0,1
2,0,0,1,0,1,0,0,0,1
3,0,0,1,1,0,1,0,0,1
4,0,0,1,0,0,1,0,0,1
5,1,1,1,0,0,0,0,0,1


# Modeling

In [None]:
pip install mlxtend

In [None]:
pip install apyori

In [None]:
from apyori import apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import datetime

## Outlet Broadway - Long Island City

In [None]:
# Membangun model
frequent3_itemsets = apriori(coffeeshop3_sets, min_support=0.2, use_colnames=True)

rules3 = association_rules(frequent3_itemsets, metric='lift', min_threshold=1)
rules3 = rules3.sort_values(['confidence','lift'], ascending=[False, False])
rules3

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1,(Bakery),(Coffee),0.292595,0.641547,0.219856,0.7514,1.171232,0.032143,1.441887,0.206668
3,(Bakery),(Tea),0.292595,0.550459,0.201507,0.68869,1.25112,0.040446,1.44403,0.283736
2,(Tea),(Bakery),0.550459,0.292595,0.201507,0.366071,1.25112,0.040446,1.115906,0.446491
0,(Coffee),(Bakery),0.641547,0.292595,0.219856,0.342697,1.171232,0.032143,1.076223,0.407858


## Outlet Church Street - New York

In [None]:
# Membangun model
frequent5_itemsets = apriori(coffeeshop5_sets, min_support=0.2, use_colnames=True)

rules5 = association_rules(frequent5_itemsets, metric='lift', min_threshold=1)
rules5 = rules5.sort_values(['confidence','lift'], ascending=[False, False])
rules5

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1,(Bakery),(Coffee),0.39897,0.704834,0.318542,0.798411,1.132765,0.037335,1.4642,0.195006
5,"(Tea, Bakery)",(Coffee),0.287242,0.704834,0.215531,0.750345,1.06457,0.013073,1.182296,0.085097
3,(Bakery),(Tea),0.39897,0.607765,0.287242,0.71996,1.184602,0.044762,1.400639,0.259279
6,"(Coffee, Bakery)",(Tea),0.318542,0.607765,0.215531,0.676617,1.113286,0.021932,1.21291,0.149325
4,"(Tea, Coffee)",(Bakery),0.351823,0.39897,0.215531,0.612613,1.535486,0.075164,1.551496,0.538032
9,(Bakery),"(Tea, Coffee)",0.39897,0.351823,0.215531,0.540218,1.535486,0.075164,1.409751,0.580238
2,(Tea),(Bakery),0.607765,0.39897,0.287242,0.472621,1.184602,0.044762,1.139654,0.3973
0,(Coffee),(Bakery),0.704834,0.39897,0.318542,0.451939,1.132765,0.037335,1.096649,0.39708
7,(Tea),"(Coffee, Bakery)",0.607765,0.318542,0.215531,0.354628,1.113286,0.021932,1.055916,0.259433
8,(Coffee),"(Tea, Bakery)",0.704834,0.287242,0.215531,0.30579,1.06457,0.013073,1.026717,0.20549


## Outlet Avenue - New York

In [None]:
# Membangun model
frequent8_itemsets = apriori(coffeeshop8_sets, min_support=0.2, use_colnames=True)

rules8 = association_rules(frequent8_itemsets, metric='lift', min_threshold=1)
rules8 = rules8.sort_values(['confidence','lift'], ascending=[False, False])
rules8

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1,(Bakery),(Coffee),0.281917,0.683106,0.229801,0.815136,1.19328,0.037222,1.714205,0.225564
0,(Coffee),(Bakery),0.683106,0.281917,0.229801,0.336406,1.19328,0.037222,1.082112,0.511128


# Evaluation

Berdasarkan pemodelan yang sudah dibuat, dari ketiga outlet coffee shop, aturan asosiasi antara kategori produk bakery dan coffee merupakan aturan asosiasi yang memiliki nilai support dan confidence tertinggi. Sehingga dapat dirancang strategi produk bundling yang menggabungkan produk dari kategori bakery dan coffee.
