## 读取交易数据

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('Data\Market_Basket.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [3]:
df.count()

0     7501
1     5747
2     4389
3     3345
4     2529
5     1864
6     1369
7      981
8      654
9      395
10     256
11     154
12      87
13      47
14      25
15       8
16       4
17       4
18       3
19       1
dtype: int64

## 增添交易记录

In [2]:
transactions = []
for i in range(0, 7501):
    transactions.append([str(df.values[i,j]) for j in range(0, 20)])

In [5]:
# transactions[1]

## 使用Apriori产生关联预测

In [6]:
from apyori import apriori
rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)

# Visualising the results
results = list(rules)

In [8]:
results[0]

RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])

## 产生关联规则

In [10]:
for rec in results:
    left_hands = rec.ordered_statistics[0].items_base
    right_hands = rec.ordered_statistics[0].items_add
    l = ';'.join([item for item in left_hands])
    r = ';'.join([item for item in right_hands])
    print('{} => {}'.format(l,r))

light cream => chicken
mushroom cream sauce => escalope
pasta => escalope
fromage blanc => honey
herb & pepper => ground beef
tomato sauce => ground beef
light cream => olive oil
whole wheat pasta => olive oil
pasta => shrimp
spaghetti;avocado => milk
cake;milk => burgers
turkey;chocolate => burgers
turkey;milk => burgers
frozen vegetables;cake => tomatoes
cereals;ground beef => spaghetti
ground beef;chicken => milk
light cream;nan => chicken
milk;chicken => olive oil
spaghetti;chicken => olive oil
frozen vegetables;chocolate => shrimp
herb & pepper;chocolate => ground beef
soup;chocolate => milk
cooking oil;ground beef => spaghetti
ground beef;eggs => herb & pepper
eggs;red wine => spaghetti
nan;mushroom cream sauce => escalope
pasta;nan => escalope
french fries;ground beef => herb & pepper
fromage blanc;nan => honey
frozen vegetables;green tea => tomatoes
frozen vegetables;spaghetti => ground beef
frozen vegetables;milk => olive oil
frozen vegetables;soup => milk
milk;tomatoes => fro

## 产生频繁交易集

In [11]:
import itertools
for ele in itertools.combinations(['milk', 'spaghetti', 'avocado'], 2):
    print(ele)

('milk', 'spaghetti')
('milk', 'avocado')
('spaghetti', 'avocado')


In [12]:
itemsets = []
for rec in results:    
    #print(rec.items)
    for ele in itertools.combinations(rec.items, 2):
        itemsets.append(ele)

In [13]:
itemsets

[('light cream', 'chicken'),
 ('escalope', 'mushroom cream sauce'),
 ('escalope', 'pasta'),
 ('fromage blanc', 'honey'),
 ('herb & pepper', 'ground beef'),
 ('tomato sauce', 'ground beef'),
 ('olive oil', 'light cream'),
 ('whole wheat pasta', 'olive oil'),
 ('shrimp', 'pasta'),
 ('spaghetti', 'milk'),
 ('spaghetti', 'avocado'),
 ('milk', 'avocado'),
 ('burgers', 'cake'),
 ('burgers', 'milk'),
 ('cake', 'milk'),
 ('burgers', 'turkey'),
 ('burgers', 'chocolate'),
 ('turkey', 'chocolate'),
 ('burgers', 'turkey'),
 ('burgers', 'milk'),
 ('turkey', 'milk'),
 ('frozen vegetables', 'cake'),
 ('frozen vegetables', 'tomatoes'),
 ('cake', 'tomatoes'),
 ('cereals', 'ground beef'),
 ('cereals', 'spaghetti'),
 ('ground beef', 'spaghetti'),
 ('ground beef', 'milk'),
 ('ground beef', 'chicken'),
 ('milk', 'chicken'),
 ('light cream', 'nan'),
 ('light cream', 'chicken'),
 ('nan', 'chicken'),
 ('olive oil', 'milk'),
 ('olive oil', 'chicken'),
 ('milk', 'chicken'),
 ('olive oil', 'spaghetti'),
 ('olive

In [None]:
import pandas
df2 = pandas.DataFrame(itemsets)
df2.columns = ['Source','Target']
df2['Type'] = 'undirected'
df2.to_csv('transactions.csv')