/
Prefixspan.cpp
executable file
·105 lines (89 loc) · 3.19 KB
/
Prefixspan.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#include "Prefixspan.h"
/********************************************************************
* Read file
********************************************************************/
void Prefixspan::read(const string &_filename, Pairdata &pairdata) {
string line;
int item;
unsigned int id = 0;
istream *is;
if (_filename == "-") {
is = &cin;
} else {
is = new ifstream(_filename.c_str());
}
Transaction transaction;
while (getline (*is, line)) {
transaction.second.clear();
vector<unsigned int> &itemsets = transaction.second;
istrstream istrs ((char *)line.c_str());
while (istrs >> item)
itemsets.push_back(item);
transaction.first = id++;
pairdata.database.push_back(transaction);
pairdata.indeces.push_back(0);
}
if (_filename != "-") {
delete is;
}
}
/********************************************************************
* Run prefixspan
********************************************************************/
void Prefixspan::run(const string &_filename)
{
Pairdata pairdata;
read(_filename, pairdata);
project(pairdata);
}
/********************************************************************
* Print frequent sequential patterns
********************************************************************/
void Prefixspan::print_pattern(Pairdata &projected) {
for (vector<unsigned int>::iterator it = pattern.begin(); it != pattern.end(); it++) {
cout << *it << " ";
}
cout << endl << "( ";
for (vector<Transaction>::iterator it = projected.database.begin(); it != projected.database.end(); it++) {
cout << it->first << " ";
}
cout << ") : " << projected.database.size() << endl;
}
/********************************************************************
* Project database
********************************************************************/
void Prefixspan::project(Pairdata &projected) {
if (projected.database.size() < min_sup)
return;
if ((max_sup == 0 || projected.database.size() <= max_sup) && pattern.size() >= min_pat)
print_pattern(projected);
if (max_pat != 0 && pattern.size() == max_pat)
return;
map<unsigned int, unsigned int> map_item;
const vector<Transaction> &database = projected.database;
for (unsigned int i = 0; i < database.size(); i++) {
const vector<unsigned int> &itemset = database[i].second;
for (unsigned int iter = projected.indeces[i]; iter < itemset.size(); iter++)
++map_item[itemset[iter]];
}
Pairdata pairdata;
vector<Transaction> &new_database = pairdata.database;
vector<unsigned int> &new_indeces = pairdata.indeces;
for (map<unsigned int, unsigned int>::iterator it_1 = map_item.begin(); it_1 != map_item.end(); it_1++) {
for (unsigned int i = 0; i < database.size(); i++) {
const Transaction &transaction = database[i];
const vector<unsigned int> &itemset = transaction.second;
for (unsigned int iter = projected.indeces[i]; iter < itemset.size(); iter++) {
if (itemset[iter] == it_1->first) {
new_database.push_back(transaction);
new_indeces.push_back(iter + 1);
break;
}
}
}
pattern.push_back(it_1->first);
project(pairdata);
pattern.pop_back();
pairdata.clear();
}
}