# [추천시스템 - Apriori, FP-Growth](https://www.kaggle.com/code/chocozzz/00-apriori-fp-growth/notebook)
> 인과관계를 알기 어렵다.  
> 아이템이 많아지면 성능이 많이 떨어진다.

In [2]:
import mlxtend
import numpy as np
import pandas as pd

In [3]:
data = np.array([
    ['우유', '기저귀', '쥬스'], # 0번 거래
    ['양상추', '기저귀', '맥주'], # 1번 거래
    ['우유', '양상추', '기저귀', '맥주'], # 2번 거래
    ['양상추', '맥주'] # 3번 거래
])

data

  data = np.array([


array([list(['우유', '기저귀', '쥬스']), list(['양상추', '기저귀', '맥주']),
       list(['우유', '양상추', '기저귀', '맥주']), list(['양상추', '맥주'])],
      dtype=object)

### Apriori 알고리즘

In [4]:
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,기저귀,맥주,양상추,우유,쥬스
0,True,False,False,True,True
1,True,True,True,False,False
2,True,True,True,True,False
3,False,True,True,False,False


In [5]:
%%time
from mlxtend.frequent_patterns import apriori

apriori(df, min_support=0.5, use_colnames=True)

CPU times: user 4.43 ms, sys: 1.66 ms, total: 6.09 ms
Wall time: 6.29 ms


Unnamed: 0,support,itemsets
0,0.75,(기저귀)
1,0.75,(맥주)
2,0.75,(양상추)
3,0.5,(우유)
4,0.5,"(맥주, 기저귀)"
5,0.5,"(양상추, 기저귀)"
6,0.5,"(우유, 기저귀)"
7,0.75,"(양상추, 맥주)"
8,0.5,"(양상추, 맥주, 기저귀)"


### FP-Growth 알고리즘

In [6]:
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,기저귀,맥주,양상추,우유,쥬스
0,True,False,False,True,True
1,True,True,True,False,False
2,True,True,True,True,False
3,False,True,True,False,False


In [7]:
%%time
from mlxtend.frequent_patterns import fpgrowth

fpgrowth(df, min_support=0.5, use_colnames=True)

CPU times: user 1.18 ms, sys: 54 µs, total: 1.23 ms
Wall time: 1.2 ms


Unnamed: 0,support,itemsets
0,0.75,(기저귀)
1,0.5,(우유)
2,0.75,(양상추)
3,0.75,(맥주)
4,0.5,"(맥주, 기저귀)"
5,0.5,"(양상추, 기저귀)"
6,0.5,"(양상추, 맥주, 기저귀)"
7,0.5,"(우유, 기저귀)"
8,0.75,"(양상추, 맥주)"


### 신뢰도 확인

In [9]:
from mlxtend.frequent_patterns import association_rules

association = fpgrowth(df, min_support=0.5, use_colnames=True)
association_rules(association, metric="confidence", min_threshold=0.5, support_only=False) 

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(맥주),(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
1,(기저귀),(맥주),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
2,(양상추),(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
3,(기저귀),(양상추),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
4,"(양상추, 맥주)",(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
5,"(양상추, 기저귀)",(맥주),0.5,0.75,0.5,1.0,1.333333,0.125,inf
6,"(맥주, 기저귀)",(양상추),0.5,0.75,0.5,1.0,1.333333,0.125,inf
7,(양상추),"(맥주, 기저귀)",0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
8,(맥주),"(양상추, 기저귀)",0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
9,(기저귀),"(양상추, 맥주)",0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
