In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# 假设你的数据集是一个列表的列表，每个内部列表代表一个事务
data = [
    ['牛奶', '面包', '尿布'],
    ['可乐', '面包', '尿布'],
    ['牛奶', '啤酒'],
    ['牛奶', '尿布'],
    ['面包', '可乐'],
    ['牛奶', '面包', '尿布', '啤酒']
]

# 使用TransactionEncoder将数据转换为one-hot编码格式
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

# 使用apriori函数挖掘频繁项集
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

# 打印频繁项集
print(frequent_itemsets)

# 接下来，使用association_rules函数从频繁项集生成关联规则
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# 打印关联规则
print(rules)

# 现在你有了一系列的关联规则，可以根据这些规则来进行分类预测
# 例如，如果你有一个新事务，你可以检查这个事务中的物品是否与规则的前件匹配
# 如果匹配，你可以预测这个事务可能包含规则的后件

    support  itemsets
0  0.666667      (尿布)
1  0.666667      (牛奶)
2  0.666667      (面包)
3  0.500000  (牛奶, 尿布)
4  0.500000  (面包, 尿布)
  antecedents consequents  antecedent support  consequent support  support  \
0        (牛奶)        (尿布)            0.666667            0.666667      0.5   
1        (尿布)        (牛奶)            0.666667            0.666667      0.5   
2        (面包)        (尿布)            0.666667            0.666667      0.5   
3        (尿布)        (面包)            0.666667            0.666667      0.5   

   confidence   lift  leverage  conviction  zhangs_metric  
0        0.75  1.125  0.055556    1.333333       0.333333  
1        0.75  1.125  0.055556    1.333333       0.333333  
2        0.75  1.125  0.055556    1.333333       0.333333  
3        0.75  1.125  0.055556    1.333333       0.333333  


In [34]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# 示例数据集，每个事务是一个列表，最后一个元素是类别标签
data = [
    ['牛奶', '面包', '尿布', '正面'],
    ['牛奶', '面包', '尿布', '负面'],
    ['牛奶', '面包', '正面'],
    ['牛奶', '尿布', '正面'],
    ['面包', '可乐', '负面'],
    ['牛奶', '面包', '尿布', '啤酒', '正面'],
    ['尿布', 'diss面']
]

# 分离特征和类别标签
transactions = [transaction[:-1] for transaction in data]
labels = [transaction[-1] for transaction in data]

# 将事务数据转换为one-hot编码格式
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# 使用apriori函数挖掘频繁项集
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

# 生成关联规则
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print(rules)
# 定义一个函数，根据关联规则对新事务进行分类
def classify_transaction(new_transaction, rules, labels):
    max_confidence = 0
    classified_label = None
    
    for rule in rules.itertuples():
        # 检查新事务是否满足关联规则的前件
        if all(item in new_transaction for item in rule.antecedents):
            # 如果满足，则根据置信度更新分类结果
            if rule.confidence > max_confidence:
                max_confidence = rule.confidence
                classified_label = list(rule.consequents)[0]  # 将frozenset转换为列表

    if classified_label is not None:
        return classified_label
    else:
        return 'Unknown'

# 示例新事务
new_data = ['牛奶']

print(classify_transaction(new_data, rules, labels))


  antecedents consequents  antecedent support  consequent support   support  \
0        (牛奶)        (面包)            0.714286            0.714286  0.571429   
1        (面包)        (牛奶)            0.714286            0.714286  0.571429   

   confidence  lift  leverage  conviction  zhangs_metric  
0         0.8  1.12  0.061224    1.428571          0.375  
1         0.8  1.12  0.061224    1.428571          0.375  
面包


In [35]:
import time
data = pd.read_csv(r'C:\Users\86131\Desktop\毕设\data\car.csv')
data

Unnamed: 0,buing,maint,doors,persons,lug_boot,safty,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good
