In [1]:
# 数据读取
import numpy as np
import pandas as pd
import os

pd.set_option('display.max_columns', None)
data1 = pd.read_csv("data/data1.csv", encoding="gbk")
data2 = pd.read_csv("data/data2.csv", encoding="gbk")
data3 = pd.read_csv("data/data3.csv", encoding="gbk")

data1.columns = ['序号', '校园卡号', '性别', '专业名称', '门禁卡号']
data2.columns = ['流水号', '校园卡号', '校园卡编号', '消费时间', '消费金额', '存储金额', '余额', '消费次数', '消费类型', '消费项目编码', '消费项目序列号', '消费操作编码', '操作编码', '消费地点']
data3.columns = ['序号', '门禁卡号', '进出时间', '进出地点', '是否通过', '描述']


In [2]:
data2 = data2.merge(data1[['校园卡号','性别']],on='校园卡号')

data2['校园卡号'] = data2['校园卡号'].apply(lambda x: str(x))+'-'+data2['性别']

# 时间格式调整,转换成比较标准的格式，方便后面的处理
import datetime
def st_pt(x):
    #'2019/4/20 20:17'=>'2019-04-20 20:17:00'
     return str(datetime.datetime.strptime(x, "%Y/%m/%d %H:%M"))

# 时间离散化，每个五分钟一个类型
def time_5(s):
    #'2022-02-22 17:46:07'=>'2022-02-22 17_9'
    a = str(round(int(s.split(':')[1])/5))
    return s.split(':')[0]+'_'+a

# 数据处理，处理成标准的格式
df = data2
df = df.sort_values(by='消费时间',ascending=True) 
df['消费时间_F'] = df['消费时间'].apply(st_pt)
df['消费时间_5'] = df['消费时间_F'].apply(time_5)
all_list = []
for v in df['消费时间_5'].unique():
    one = df[df['消费时间_5']==v]['校园卡号'].unique().tolist()
    all_list.append(one)


In [3]:
df.to_csv('data/df.csv',header=True,index=False)

In [4]:
# 关联算法
from efficient_apriori import apriori

itemsets, rules = apriori(all_list, min_support=0.005,  min_confidence=1)


In [11]:
# 一元数据？
itemsets[1]

{('180641-女',): 83,
 ('181021-女',): 60,
 ('181036-女',): 63,
 ('181049-女',): 36,
 ('180691-女',): 77,
 ('180283-女',): 51,
 ('182881-女',): 65,
 ('182403-女',): 74,
 ('181016-女',): 83,
 ('181026-女',): 93,
 ('184264-女',): 36,
 ('181034-女',): 87,
 ('180411-女',): 84,
 ('180417-女',): 55,
 ('180393-女',): 69,
 ('183363-女',): 40,
 ('183110-男',): 41,
 ('182384-女',): 39,
 ('184287-男',): 66,
 ('182352-女',): 68,
 ('182029-男',): 78,
 ('181931-女',): 40,
 ('181481-男',): 86,
 ('184190-女',): 71,
 ('182379-女',): 95,
 ('182356-女',): 75,
 ('182679-女',): 44,
 ('181204-女',): 55,
 ('182376-女',): 73,
 ('184336-女',): 41,
 ('182387-女',): 83,
 ('184191-女',): 92,
 ('181854-女',): 51,
 ('183359-女',): 48,
 ('181866-女',): 67,
 ('181623-男',): 67,
 ('180926-男',): 70,
 ('180920-男',): 115,
 ('180921-男',): 100,
 ('181504-男',): 96,
 ('183200-女',): 68,
 ('181339-男',): 81,
 ('180665-女',): 91,
 ('181181-女',): 34,
 ('180367-女',): 98,
 ('182196-女',): 86,
 ('180466-女',): 82,
 ('181473-女',): 73,
 ('183999-女',): 82,
 ('180294-女',): 34

In [10]:
# 二元数据
itemsets[2]

{('180011-女', '180012-女'): 34,
 ('180011-女', '180042-女'): 33,
 ('180026-女', '180048-女'): 35,
 ('180026-女', '180055-女'): 38,
 ('180045-女', '180071-女'): 35,
 ('180048-女', '180055-女'): 37,
 ('180074-男', '180077-男'): 38,
 ('180087-女', '180098-女'): 41,
 ('180087-女', '180144-女'): 37,
 ('180091-女', '180203-女'): 48,
 ('180095-女', '180144-女'): 31,
 ('180098-女', '180144-女'): 47,
 ('180105-女', '180164-女'): 42,
 ('180109-女', '180139-女'): 38,
 ('180111-女', '180167-女'): 31,
 ('180118-女', '180164-女'): 36,
 ('180122-女', '180133-女'): 52,
 ('180137-女', '180159-女'): 43,
 ('180142-女', '181889-男'): 33,
 ('180169-女', '180173-女'): 43,
 ('180171-女', '180189-女'): 34,
 ('180203-女', '180208-女'): 33,
 ('180240-女', '180295-女'): 36,
 ('180252-女', '180316-女'): 32,
 ('180252-女', '183952-女'): 32,
 ('180262-女', '180820-女'): 39,
 ('180274-女', '180305-女'): 34,
 ('180274-女', '180339-女'): 32,
 ('180287-女', '180296-女'): 35,
 ('180293-女', '180309-女'): 37,
 ('180309-女', '180328-女'): 35,
 ('180316-女', '183952-女'): 39,
 ('18036

In [12]:
# 三元数据
itemsets[3]

{('180363-女', '181876-女', '183979-女'): 40,
 ('180711-女', '180732-女', '180738-女'): 35,
 ('180792-女', '180822-女', '180849-女'): 35,
 ('181338-男', '181343-男', '181344-男'): 40,
 ('181503-男', '181507-男', '181508-男'): 33,
 ('181552-男', '181571-男', '181582-男'): 39,
 ('181556-男', '181559-男', '181568-男'): 35,
 ('181848-女', '181865-女', '181871-女'): 35,
 ('182304-女', '182329-女', '182340-女'): 36,
 ('182304-女', '182329-女', '182403-女'): 32,
 ('183305-女', '183308-女', '183317-女'): 32,
 ('183419-女', '183420-女', '183422-女'): 49,
 ('183419-女', '183420-女', '183424-女'): 45,
 ('183419-女', '183422-女', '183424-女'): 48,
 ('183420-女', '183422-女', '183424-女'): 51,
 ('183641-女', '183688-女', '183690-女'): 32,
 ('183671-女', '183701-女', '183742-女'): 35,
 ('183713-女', '183726-女', '183737-女'): 36}

In [15]:
itemsets[4]

{('183419-女', '183420-女', '183422-女', '183424-女'): 42}