# 연관규칙 활용 모듈 프로젝트  
- groceries.csv , market.csv 두 파일을 활용하여 연관규칙(지지도, 신뢰도, 향상도)을 조사  
- 의미있는 번들상품 아이템을 검색  
- 특정 고객에게 홍보하기  
__ex__  

```  
 
{돼지고기} -> {상추}   향상도:2
    
구매했던 상품을 입력하세요 : 
돼지고기
추천하고 싶은 상품은 상추입니다. 
```  
 

In [1]:
import pandas as pd
import numpy as np

In [4]:
filepath='D:/downloads/'

__groceries.csv 파일 읽어오기 (array 형태)__

In [22]:
with open(filepath+'groceries.csv') as f:
    raw_groceries = f.readlines()
    
groceries = np.array([*map(lambda x:x.strip().split(','),raw_groceries)])
print('First transaction')
print(groceries[0])

First transaction
['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups']


__Market_Basket_Optimisation.csv 파일 읽어오기 (array 형태)__

In [23]:
with open(filepath+'Market_Basket_Optimisation.csv') as f:
    raw_market_basket = f.readlines()
    
market_basket = np.array([*map(lambda x:x.strip().split(','),raw_market_basket)])
print('First transaction')
print(market_basket[0])

First transaction
['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil']


__두 배열을 통합하여 인코딩(TransactionEncoder)__

In [24]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [49]:
groceries_market_basket = np.hstack([groceries,market_basket])
te = TransactionEncoder()
df = pd.DataFrame.sparse.from_spmatrix(te.fit_transform(groceries_market_basket,sparse=True),columns=te.columns_)
df.head(2)

Unnamed: 0,asparagus,Instant food products,UHT-milk,abrasive cleaner,almonds,antioxydant juice,artif. sweetener,asparagus.1,avocado,babies food,...,white wine,whole milk,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt,yogurt cake,zucchini,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


In [50]:
df.shape # 전체 건수 17336 X 품목 종류 272

(17336, 272)

__최소 지지도 설정 후 Apriori 알고리즘을 통해 빈발집합 추출__

In [66]:
# 17336 건 중 100건 정도는 되어야하지 않을까?
ms = 100/df.shape[0]
freq_itemsets = apriori(df,min_support=ms,use_colnames=True)
freq_itemsets

Unnamed: 0,support,itemsets
0,0.018978,(UHT-milk)
1,0.008826,(almonds)
2,0.014421,(avocado)
3,0.010037,(baking powder)
4,0.029765,(beef)
...,...,...
502,0.006807,"(tropical fruit, root vegetables, whole milk)"
503,0.008249,"(root vegetables, whole milk, yogurt)"
504,0.005941,"(soda, whole milk, yogurt)"
505,0.008595,"(tropical fruit, whole milk, yogurt)"


__추출된 번들집합 후보들의 신뢰도,향상도 평가(association rules) 후 번들집합 추출__

In [83]:
# 최소 신뢰도 = 0.3 으로 필터링 하여 향상도 순으로 내림차순 정렬
bundle_itemsets = association_rules(freq_itemsets,metric='confidence',min_threshold=0.3).sort_values(by='lift',ascending=False)
bundle_itemsets

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
35,(herb & pepper),(ground beef),0.021401,0.042513,0.006922,0.323450,7.608320,0.006012,1.415250
97,"(citrus fruit, other vegetables)",(root vegetables),0.016382,0.061837,0.005884,0.359155,5.808125,0.004871,1.463947
122,"(tropical fruit, other vegetables)",(root vegetables),0.020362,0.061837,0.006980,0.342776,5.543254,0.005721,1.427464
107,"(ground beef, mineral water)",(spaghetti),0.017709,0.075335,0.007383,0.416938,5.534486,0.006049,1.585879
49,(soup),(milk),0.021862,0.056068,0.006576,0.300792,5.364735,0.005350,1.350000
...,...,...,...,...,...,...,...,...,...
84,(waffles),(whole milk),0.021804,0.144958,0.007210,0.330688,2.281259,0.004050,1.277493
15,(coffee),(whole milk),0.032937,0.144958,0.010614,0.322242,2.222993,0.005839,1.261573
79,(sausage),(whole milk),0.053299,0.144958,0.016959,0.318182,2.194986,0.009233,1.254061
6,(bottled water),(whole milk),0.062702,0.144958,0.019497,0.310948,2.145080,0.010408,1.240895


### 추천 메소드 작성

In [149]:
def get_bundle(pch_in): # 구매 목록이 선행부에 존재하는 연관 규칙 리턴
    return bundle_itemsets[bundle_itemsets['antecedents']==frozenset(pch_in)]

def run(): # 메인 
    while True:
        user_in = input('구매했던 상품을 쉼표 구분으로 입력하세요.(종료하려면 Enter) : ').strip().split(',')
        if not user_in[0]:break
        return_bundle = get_bundle(user_in)
        if not return_bundle.shape[0]:
            print('연관 상품이 존재하지 않습니다. 다른 품목으로 입력해보세요,')
        else:
            bundle_out = [*map(lambda x:','.join(list(x)),return_bundle.consequents.values)]
            if len(bundle_out)>3:
                print('추천하고 싶은 상품 목록 TOP3 입니다. :')
                for item in bundle_out[:3]:
                    print(item)
            else:
                print(f'추천하고 싶은 상품 목록 TOP{len(bundle_out)} 입니다. :')
                for item in bundle_out:
                    print(item)
        print('='*50)
        print()
    print('종료합니다.')
    return

In [154]:
run()

구매했던 상품을 쉼표 구분으로 입력하세요.(종료하려면 Enter) : beef
추천하고 싶은 상품 목록 TOP3 입니다. :
root vegetables
other vegetables
whole milk

구매했던 상품을 쉼표 구분으로 입력하세요.(종료하려면 Enter) : berries
추천하고 싶은 상품 목록 TOP3 입니다. :
yogurt
other vegetables
whole milk

구매했던 상품을 쉼표 구분으로 입력하세요.(종료하려면 Enter) : root vegetables,yogurt
추천하고 싶은 상품 목록 TOP2 입니다. :
other vegetables
whole milk

구매했던 상품을 쉼표 구분으로 입력하세요.(종료하려면 Enter) : 
종료합니다.
