# Ch11.연관규칙

## 1.기본 package 설정
- pip install mlxtend
- http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/

In [1]:
## 기본
import numpy as np  # numpy 패키지 가져오기
import pandas as pd # pandas 패키지 가져오기

## Unsupervised 모델 (책에 없음)
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## 2.데이터 가져오기

### 2.1 원본데이터(csv)를 dataframe 형태로 가져오기(pandas)

In [2]:
books_df = pd.read_csv('CharlesBookClub.csv')
books_df.head()

Unnamed: 0,Seq#,ID#,Gender,M,R,F,FirstPurch,ChildBks,YouthBks,CookBks,...,ItalCook,ItalAtlas,ItalArt,Florence,Related Purchase,Mcode,Rcode,Fcode,Yes_Florence,No_Florence
0,1,25,1,297,14,2,22,0,1,1,...,0,0,0,0,0,5,4,2,0,1
1,2,29,0,128,8,2,10,0,0,0,...,0,0,0,0,0,4,3,2,0,1
2,3,46,1,138,22,7,56,2,1,2,...,1,0,0,0,2,4,4,3,0,1
3,4,47,1,228,2,1,2,0,0,0,...,0,0,0,0,0,5,1,1,0,1
4,5,51,1,257,10,1,10,0,0,0,...,0,0,0,0,0,5,3,1,0,1


In [3]:
books_df.shape

(4000, 24)

In [4]:
# 자료구조 살펴보기
books_df.keys()

Index(['Seq#', 'ID#', 'Gender', 'M', 'R', 'F', 'FirstPurch', 'ChildBks',
       'YouthBks', 'CookBks', 'DoItYBks', 'RefBks', 'ArtBks', 'GeogBks',
       'ItalCook', 'ItalAtlas', 'ItalArt', 'Florence', 'Related Purchase',
       'Mcode', 'Rcode', 'Fcode', 'Yes_Florence', 'No_Florence'],
      dtype='object')

## 3.데이터 전처리

### 3.1 필요한 데이터만 추출

In [5]:
books_df = books_df.drop(['Seq#', 'ID#', 'Gender', 'M', 'R', 'F', 'FirstPurch'], axis=1)

In [6]:
books_df

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence,Related Purchase,Mcode,Rcode,Fcode,Yes_Florence,No_Florence
0,0,1,1,0,0,0,0,0,0,0,0,0,5,4,2,0,1
1,0,0,0,0,0,0,0,0,0,0,0,0,4,3,2,0,1
2,2,1,2,0,1,0,1,1,0,0,0,2,4,4,3,0,1
3,0,0,0,0,0,0,0,0,0,0,0,0,5,1,1,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,5,3,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,0,0,0,0,0,0,0,0,0,0,0,0,4,1,2,0,1
3996,1,1,2,2,2,0,1,0,0,0,0,1,5,4,3,0,1
3997,0,0,0,0,0,0,0,0,0,0,0,0,2,3,1,0,1
3998,1,1,3,1,0,0,0,0,0,0,0,0,5,4,3,0,1


### 3.2 구매 1 이상인 것은 1로 수정

In [7]:
books_df = (books_df > 0).astype(int)
books_df

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence,Related Purchase,Mcode,Rcode,Fcode,Yes_Florence,No_Florence
0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1
1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1
2,1,1,1,0,1,0,1,1,0,0,0,1,1,1,1,0,1
3,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1
3996,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1
3997,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1
3998,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,0,1


## 4.연관규칙

### 4.1 transactions으로 변환

In [8]:
freq_itemsets = apriori(books_df,
                    min_support=400/4000,
                    use_colnames=True)
freq_itemsets 

Unnamed: 0,support,itemsets
0,0.39400,(ChildBks)
1,0.23825,(YouthBks)
2,0.41550,(CookBks)
3,0.25475,(DoItYBks)
4,0.20475,(RefBks)
...,...,...
642,0.10200,"(Mcode, Related Purchase, DoItYBks, Rcode, Fco..."
643,0.12650,"(Mcode, Related Purchase, Rcode, GeogBks, Fcod..."
644,0.10400,"(Mcode, Related Purchase, Rcode, CookBks, Fcod..."
645,0.10900,"(Mcode, Related Purchase, DoItYBks, Rcode, Coo..."


### 4.2 연관규칙 실행

In [9]:
rules = association_rules(freq_itemsets,
                          metric="lift",
                          min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ChildBks),(YouthBks),0.39400,0.23825,0.1475,0.374365,1.571314,0.053629,1.217564
1,(YouthBks),(ChildBks),0.23825,0.39400,0.1475,0.619098,1.571314,0.053629,1.590959
2,(ChildBks),(CookBks),0.39400,0.41550,0.2420,0.614213,1.478251,0.078293,1.515086
3,(CookBks),(ChildBks),0.41550,0.39400,0.2420,0.582431,1.478251,0.078293,1.451256
4,(ChildBks),(DoItYBks),0.39400,0.25475,0.1615,0.409898,1.609022,0.061129,1.262918
...,...,...,...,...,...,...,...,...,...
11417,(Related Purchase),"(Mcode, Rcode, CookBks, GeogBks, Fcode, No_Flo...",0.47625,0.13650,0.1365,0.286614,2.099738,0.071492,1.210425
11418,(Rcode),"(Mcode, Related Purchase, CookBks, GeogBks, Fc...",1.00000,0.13650,0.1365,0.136500,1.000000,0.000000,1.000000
11419,(CookBks),"(Mcode, Related Purchase, Rcode, GeogBks, Fcod...",0.41550,0.23600,0.1365,0.328520,1.392033,0.038442,1.137785
11420,(GeogBks),"(Mcode, Related Purchase, Rcode, CookBks, Fcod...",0.26675,0.22925,0.1365,0.511715,2.232127,0.075348,1.578484


## 5.연관규칙 확인

### 5.1 lift가 높은 순서로 sorting

In [10]:
rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
rules = rules.sort_values('lift', ascending=[False])
rules

Unnamed: 0,antecedents,consequents,support,confidence,lift
7973,"(CookBks, GeogBks, Mcode)","(ChildBks, Related Purchase, Fcode)",0.1095,0.700800,2.969492
10717,"(ChildBks, Related Purchase, Rcode)","(CookBks, GeogBks, Mcode, Fcode)",0.1095,0.463983,2.969492
4051,"(CookBks, GeogBks)","(ChildBks, Related Purchase, Fcode)",0.1095,0.700800,2.969492
7997,"(ChildBks, Related Purchase)","(CookBks, GeogBks, Mcode, Fcode)",0.1095,0.463983,2.969492
7998,"(CookBks, GeogBks)","(ChildBks, Related Purchase, Mcode, Fcode)",0.1095,0.700800,2.969492
...,...,...,...,...,...
5333,"(YouthBks, CookBks, Related Purchase, No_Flore...",(Mcode),0.1040,1.000000,1.000000
5328,(Fcode),"(YouthBks, CookBks, Related Purchase, Mcode)",0.1165,0.116500,1.000000
5325,(Mcode),"(YouthBks, CookBks, Related Purchase, Fcode)",0.1165,0.116500,1.000000
5317,"(Mcode, Fcode)","(YouthBks, CookBks, Related Purchase)",0.1165,0.116500,1.000000


### 5.2 intem 갯수 확인하기

In [11]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

Unnamed: 0,antecedents,consequents,support,confidence,lift,antecedent_len
7973,"(CookBks, GeogBks, Mcode)","(ChildBks, Related Purchase, Fcode)",0.1095,0.700800,2.969492,3
10717,"(ChildBks, Related Purchase, Rcode)","(CookBks, GeogBks, Mcode, Fcode)",0.1095,0.463983,2.969492,3
4051,"(CookBks, GeogBks)","(ChildBks, Related Purchase, Fcode)",0.1095,0.700800,2.969492,2
7997,"(ChildBks, Related Purchase)","(CookBks, GeogBks, Mcode, Fcode)",0.1095,0.463983,2.969492,2
7998,"(CookBks, GeogBks)","(ChildBks, Related Purchase, Mcode, Fcode)",0.1095,0.700800,2.969492,2
...,...,...,...,...,...,...
5333,"(YouthBks, CookBks, Related Purchase, No_Flore...",(Mcode),0.1040,1.000000,1.000000,4
5328,(Fcode),"(YouthBks, CookBks, Related Purchase, Mcode)",0.1165,0.116500,1.000000,1
5325,(Mcode),"(YouthBks, CookBks, Related Purchase, Fcode)",0.1165,0.116500,1.000000,1
5317,"(Mcode, Fcode)","(YouthBks, CookBks, Related Purchase)",0.1165,0.116500,1.000000,2


### 5.3 Multi 규칙확인

In [12]:
rules[ (rules['antecedent_len'] >= 2) &
       (rules['support'] > 0.2) &
       (rules['confidence'] > 0.9) &
       (rules['lift'] > 2) ]

Unnamed: 0,antecedents,consequents,support,confidence,lift,antecedent_len
10398,"(GeogBks, No_Florence, Rcode)","(Related Purchase, Mcode, Fcode)",0.23600,1.0,2.099738,3
3090,"(ArtBks, Fcode, Rcode)",(Related Purchase),0.22300,1.0,2.099738,3
10380,"(GeogBks, Mcode, No_Florence, Rcode)","(Related Purchase, Fcode)",0.23600,1.0,2.099738,4
3093,"(ArtBks, Fcode)","(Related Purchase, Rcode)",0.22300,1.0,2.099738,2
3094,"(ArtBks, Rcode)","(Related Purchase, Fcode)",0.22300,1.0,2.099738,2
...,...,...,...,...,...,...
733,"(ArtBks, Mcode)",(Related Purchase),0.22300,1.0,2.099738,2
739,"(ArtBks, Rcode)",(Related Purchase),0.22300,1.0,2.099738,2
745,"(ArtBks, Fcode)",(Related Purchase),0.22300,1.0,2.099738,2
792,"(GeogBks, Fcode)",(Related Purchase),0.26675,1.0,2.099738,2


### 5.4 특정 규칙 확인

In [13]:
rules[rules['antecedents'] == {'No_Florence', 'GeogBks'}]

Unnamed: 0,antecedents,consequents,support,confidence,lift,antecedent_len
6436,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Fcode)",0.1365,0.57839,2.281617,2
6411,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Rcode)",0.1365,0.57839,2.281617,2
2554,"(GeogBks, No_Florence)","(CookBks, Related Purchase)",0.1365,0.57839,2.281617,2
11415,"(GeogBks, No_Florence)","(Mcode, Related Purchase, Rcode, CookBks, Fcode)",0.1365,0.57839,2.281617,2
6355,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Mcode)",0.1365,0.57839,2.281617,2
10072,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Fcode, Rcode)",0.1365,0.57839,2.281617,2
10018,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Mcode, Fcode)",0.1365,0.57839,2.281617,2
9964,"(GeogBks, No_Florence)","(CookBks, Related Purchase, Mcode, Rcode)",0.1365,0.57839,2.281617,2
1686,"(GeogBks, No_Florence)","(ChildBks, Related Purchase)",0.1265,0.536017,2.271258,2
11082,"(GeogBks, No_Florence)","(Mcode, Related Purchase, Rcode, Fcode, ChildBks)",0.1265,0.536017,2.271258,2
