# Implementasi Aturan Asosiasi dengan Algoritma Apriori untuk menentukan Pola Kunjungan Wisatawan Mancanegara ke Indonesia

|Nama|NIM|
|-|-|
|Ilhan Mahardika Pratama|H1D020058|
|Abdalhaqq Muhammad Saih|H1D020068|
|Ahmad Karomi Alhamidy|H1D020069|

Note: Buka [notebook](https://colab.research.google.com/drive/1OmGIFFGK4x7SoaBVIg9m6eooMgGkGdkH#scrollTo=rtdDOZUTOqQ3) untuk menjalankan program secara interaktif


## Mengimpor library yang dibutuhkan

In [None]:
import io
import pandas as pd
from google.colab import files
from mlxtend.frequent_patterns import apriori, association_rules

## Memuat dataset

In [None]:
uploaded = files.upload()
filename = list(uploaded)[0]
df = pd.read_csv(io.BytesIO(uploaded[filename]), index_col=0)
df.dropna(inplace=True)
df

Saving Jumlah Kunjungan Wisatawan Mancanegara ke Indonesia Menurut Kebangsaan 2012-2020.csv to Jumlah Kunjungan Wisatawan Mancanegara ke Indonesia Menurut Kebangsaan 2012-2020.csv


Unnamed: 0,2012,2013,2014,2015,2016,2017,2018,2019,2020
Brunei Darussalam,38679.0,16932.0,19078.0,18262.0,23695.0,23455.0,17279.0,19278.0,2701.0
Malaysia,1173351.0,1380686.0,1418256.0,1431728.0,1541197.0,2121888.0,2503344.0,2980753.0,980118.0
Filipina,210029.0,247573.0,248182.0,267700.0,298910.0,308977.0,217874.0,260980.0,50413.0
Singapura,1324839.0,1432060.0,1559044.0,1594102.0,1515699.0,1554119.0,1768744.0,1934445.0,280492.0
Thailand,115036.0,125059.0,114272.0,118579.0,124569.0,138235.0,124153.0,136699.0,21303.0
Vietnam,31106.0,43249.0,48018.0,49845.0,60986.0,77466.0,75816.0,96024.0,19608.0
Myanmar,22304.0,31001.0,33559.0,39923.0,44720.0,48133.0,28612.0,46381.0,12669.0
Hong Kong,84985.0,95258.0,94560.0,93529.0,101369.0,98272.0,91182.0,50324.0,2625.0
India,181791.0,231266.0,267082.0,306960.0,422045.0,536902.0,595636.0,657300.0,111724.0
Jepang,423113.0,497399.0,505175.0,528606.0,545392.0,573310.0,530573.0,519623.0,92228.0


## Membuat perbandingan wisatawan dari tahun ke tahun

In [None]:
it = iter(df)
a = next(it)
def fp(a, b):
    return float(b-a)/a*100
comparations = {}
for b in it:
    for x in df.index:
        na = df[a][x]
        nb = df[b][x]
        if not comparations.get(b):
            comparations[b] = {}
        comparations[b][x] = fp(na, nb)
    a = b
pd.DataFrame(comparations)

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
Brunei Darussalam,-56.224308,12.674226,-4.277178,29.750301,-1.012872,-26.331273,11.568957,-85.98921
Malaysia,17.670331,2.721111,0.949899,7.645936,37.677922,17.977198,19.070851,-67.118443
Filipina,17.875627,0.245988,7.86439,11.658573,3.367903,-29.485366,19.78483,-80.683194
Singapura,8.093134,8.867226,2.248686,-4.918318,2.534804,13.810075,9.368286,-85.500131
Thailand,8.712925,-8.625529,3.769077,5.051485,10.970627,-10.187,10.105273,-84.416126
Vietnam,39.037485,11.026845,3.804823,22.351289,27.022595,-2.129967,26.654004,-79.580105
Myanmar,38.993006,8.251347,18.963616,12.01563,7.631932,-40.556375,62.103313,-72.684936
Hong Kong,12.088016,-0.732747,-1.090313,8.382427,-3.055175,-7.214669,-44.809283,-94.783801
India,27.215319,15.486928,14.930995,37.491856,27.214397,10.939427,10.352631,-83.002586
Jepang,17.557012,1.563332,4.638195,3.175522,5.118887,-7.454431,-2.063806,-82.250978


## Mengubah perbandingan ke dalam fungsi keanggotaan menjadi data kategorikal

In [None]:
memberships = {}
def fk(per):
    b1 = 100/3.0
    b2 = b1*2
    b3 = -100/3.0
    b4 = b3*2
    return 'Naik Tinggi' if per >= b2 else 'Naik Sedang' if per > b1 else 'Naik Rendah' if per > 0 else 'Turun Tinggi' if per <= b4 else 'Turun Sedang' if per < b3 else 'Turun Rendah'
for year in comparations:
    if not memberships.get(year):
        memberships[year] = {}
    for k,v in comparations[year].items():
        memberships[year][k] = fk(v)
pd.DataFrame(memberships)

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
Brunei Darussalam,Turun Sedang,Naik Rendah,Turun Rendah,Naik Rendah,Turun Rendah,Turun Rendah,Naik Rendah,Turun Tinggi
Malaysia,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Naik Sedang,Naik Rendah,Naik Rendah,Turun Tinggi
Filipina,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Rendah,Naik Rendah,Turun Tinggi
Singapura,Naik Rendah,Naik Rendah,Naik Rendah,Turun Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Tinggi
Thailand,Naik Rendah,Turun Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Rendah,Naik Rendah,Turun Tinggi
Vietnam,Naik Sedang,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Rendah,Naik Rendah,Turun Tinggi
Myanmar,Naik Sedang,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Sedang,Naik Sedang,Turun Tinggi
Hong Kong,Naik Rendah,Turun Rendah,Turun Rendah,Naik Rendah,Turun Rendah,Turun Rendah,Turun Sedang,Turun Tinggi
India,Naik Rendah,Naik Rendah,Naik Rendah,Naik Sedang,Naik Rendah,Naik Rendah,Naik Rendah,Turun Tinggi
Jepang,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Naik Rendah,Turun Rendah,Turun Rendah,Turun Tinggi


## Transformasi data ke dalam bentuk transaksi

In [None]:
categories = {'NT':'Naik Tinggi', 'NS':'Naik Sedang', 'NR':'Naik Rendah', 'TT':'Turun Tinggi', 'TS':'Turun Sedang', 'TR':'Turun Rendah'}
columns = []
for country in df.index:
    for category in categories:
        columns.append(f'{category}_{country}')
transactions = []
for year in memberships:
    transaction = {}
    for country in memberships[year]:
        for category in categories:
            transaction[f'{category}_{country}'] = 1 if memberships[year][country] == categories[category] else 0
    transactions.append(transaction)

dataset = pd.DataFrame(transactions)
dataset

Unnamed: 0,NT_Brunei Darussalam,NS_Brunei Darussalam,NR_Brunei Darussalam,TT_Brunei Darussalam,TS_Brunei Darussalam,TR_Brunei Darussalam,NT_Malaysia,NS_Malaysia,NR_Malaysia,TT_Malaysia,...,NR_Selandia Baru,TT_Selandia Baru,TS_Selandia Baru,TR_Selandia Baru,NT_Afrika Selatan,NS_Afrika Selatan,NR_Afrika Selatan,TT_Afrika Selatan,TS_Afrika Selatan,TR_Afrika Selatan
0,0,0,0,0,1,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
1,0,0,1,0,0,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
2,0,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
3,0,0,1,0,0,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
4,0,0,0,0,0,1,0,1,0,0,...,1,0,0,0,0,0,1,0,0,0
5,0,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
6,0,0,1,0,0,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0
7,0,0,0,1,0,0,0,0,0,1,...,0,1,0,0,0,0,0,1,0,0


## Men-*generate* kombinasi itemset sesuai minimum support

In [None]:
min_support = float(input("Masukan nilai min. support  "))
frequent_itemsets = apriori(dataset, min_support=min_support, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets

Masukan nilai min. support  0.6


Unnamed: 0,support,itemsets,length
0,0.750,(NR_Malaysia),1
1,0.750,(NR_Filipina),1
2,0.750,(NR_Singapura),1
3,0.625,(NR_Thailand),1
4,0.625,(NR_Vietnam),1
...,...,...,...
32122,0.625,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",13
32123,0.625,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",13
32124,0.625,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",13
32125,0.625,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Amerik...",13


## Membuat kandidat aturan asosiasi

In [None]:
min_conf = float(input("Masukan nilai min. confidence  "))
candidate_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_conf)
candidate_rules["antecedents_length"] = candidate_rules["antecedents"].apply(lambda x: len(x))
candidate_rules["consequents_length"] = candidate_rules["consequents"].apply(lambda x: len(x))
candidate_rules

Masukan nilai min. confidence  0.8


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedents_length,consequents_length
0,(NR_Filipina),(NR_Malaysia),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.50,1,1
1,(NR_Malaysia),(NR_Filipina),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.50,1,1
2,(NR_Singapura),(NR_Malaysia),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.50,1,1
3,(NR_Malaysia),(NR_Singapura),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.50,1,1
4,(NR_Malaysia),(NR_India),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.50,1,1
...,...,...,...,...,...,...,...,...,...,...,...
6743436,(NR_Filipina),"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",0.750,0.625,0.625,0.833333,1.333333,0.156250,2.25,1,13
6743437,(NR_Korea Selatan),"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",0.750,0.625,0.625,0.833333,1.333333,0.156250,2.25,1,13
6743438,(NR_Jepang),"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",0.625,0.625,0.625,1.000000,1.600000,0.234375,inf,1,13
6743439,(NR_Spanyol),"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",0.750,0.625,0.625,0.833333,1.333333,0.156250,2.25,1,13


## Menyeleksi kandidat aturan asosiasi dengan consequent = 1

In [None]:
select_rules = candidate_rules["consequents"].apply(lambda x: len(x) == 1)
rules = candidate_rules.loc[select_rules].copy()
rules["support * confidence"] = rules["support"] * rules["confidence"]
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedents_length,consequents_length,support * confidence
0,(NR_Filipina),(NR_Malaysia),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.5,1,1,0.520833
1,(NR_Malaysia),(NR_Filipina),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.5,1,1,0.520833
2,(NR_Singapura),(NR_Malaysia),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.5,1,1,0.520833
3,(NR_Malaysia),(NR_Singapura),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.5,1,1,0.520833
4,(NR_Malaysia),(NR_India),0.750,0.750,0.625,0.833333,1.111111,0.062500,1.5,1,1,0.520833
...,...,...,...,...,...,...,...,...,...,...,...,...
6727131,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Swiss,...",(NR_Amerika Serikat),0.625,0.875,0.625,1.000000,1.142857,0.078125,inf,13,1,0.625000
6727132,"(NR_Jerman, NR_Kanada, NR_Finlandia, NR_Amerik...",(NR_Swiss),0.625,0.625,0.625,1.000000,1.600000,0.234375,inf,13,1,0.625000
6727133,"(NR_Jerman, NR_Kanada, NR_Swiss, NR_Amerika Se...",(NR_Finlandia),0.625,0.750,0.625,1.000000,1.333333,0.156250,inf,13,1,0.625000
6727134,"(NR_Jerman, NR_Finlandia, NR_Swiss, NR_Amerika...",(NR_Kanada),0.625,0.875,0.625,1.000000,1.142857,0.078125,inf,13,1,0.625000


## Menentukan aturan asosiasi dari nilai support * confidence tertinggi

In [None]:
max_sup_x_conf = rules.max()["support * confidence"]
select_final = rules["support * confidence"].apply(lambda x: x == max_sup_x_conf)
final_rules = rules.loc[select_final].copy()
final_rules


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedents_length,consequents_length,support * confidence
268,(NR_Jerman),(NR_Inggris),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,1,1,0.875
269,(NR_Inggris),(NR_Jerman),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,1,1,0.875
274,(NR_Jerman),(NR_Amerika Serikat),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,1,1,0.875
275,(NR_Amerika Serikat),(NR_Jerman),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,1,1,0.875
276,(NR_Jerman),(NR_Kanada),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,1,1,0.875
...,...,...,...,...,...,...,...,...,...,...,...,...
520573,"(NR_Jerman, NR_Kanada, NR_Amerika Serikat, NR_...",(NR_Inggris),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,5,1,0.875
520574,"(NR_Jerman, NR_Kanada, NR_Amerika Serikat, NR_...",(NR_Afrika Selatan),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,5,1,0.875
520575,"(NR_Jerman, NR_Kanada, NR_Afrika Selatan, NR_I...",(NR_Amerika Serikat),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,5,1,0.875
520576,"(NR_Jerman, NR_Amerika Serikat, NR_Afrika Sela...",(NR_Kanada),0.875,0.875,0.875,1.0,1.142857,0.109375,inf,5,1,0.875
