-
Notifications
You must be signed in to change notification settings - Fork 4
suffix array
Changi Cho edited this page Oct 16, 2022
·
1 revision
접미사 배열 : 문자열의 접미사들을 사전순으로 정렬한 배열. 부분 문자열 자체를 저장하는 대신 간단하게 접미사가 시작하는 인덱스를 저장.
// use Manber Myers algorithm by STL sort
// time : O(N * log_2^2(N))
// space : O(N)
vector<int> getSuffixArray(string &s) {
int size = s.size();
vector<int> suffixArray(size), rank(size + 1);
for (int i = 0; i < size; ++i) {
suffixArray[i] = i, rank[i] = s[i];
}
for (int d = 1; d < size; d *= 2) {
auto compare = [&](const int &a, const int &b) {
return rank[a] < rank[b] ||
(rank[a] == rank[b] && rank[a + d] < rank[b + d]);
};
sort(suffixArray.begin(), suffixArray.end(), compare);
vector<int> newRank(size + 1, 0);
newRank[suffixArray[0]] = 1;
for (int i = 1; i < size; i++) {
newRank[suffixArray[i]] = newRank[suffixArray[i - 1]];
if (compare(suffixArray[i - 1], suffixArray[i])) {
newRank[suffixArray[i]]++;
}
}
rank = newRank;
}
return suffixArray;
}
// use Manber Myers algorithm by counting sort
// time : O(N * log_2(N))
// space : O(N)
vector<int> getSuffixArrayByCount(string &s) {
int size = s.size();
int maximum = max(256, size) + 1;
vector<int> suffixArray(size), rank(size + size);
vector<int> count(maximum), idx(size);
// initialize
for (int i = 0; i < size; ++i) {
suffixArray[i] = i, rank[i] = s[i];
}
for (int d = 1; d < size; d *= 2) {
vector<int> newRank(size + size);
auto compare = [&](const int &a, const int &b) {
return rank[a] < rank[b] ||
(rank[a] == rank[b] && rank[a + d] < rank[b + d]);
};
for (int i = 0; i < maximum; ++i) count[i] = 0;
for (int i = 0; i < size; ++i) count[rank[i + d]]++;
for (int i = 1; i < maximum; ++i) count[i] += count[i - 1];
for (int i = size - 1; i >= 0; --i) idx[--count[rank[i + d]]] = i;
for (int i = 0; i < maximum; ++i) count[i] = 0;
for (int i = 0; i < size; ++i) count[rank[i]]++;
for (int i = 1; i < maximum; ++i) count[i] += count[i - 1];
for (int i = size - 1; i >= 0; --i)
suffixArray[--count[rank[idx[i]]]] = idx[i];
newRank[suffixArray[0]] = 1;
for (int i = 1; i < size; i++) {
newRank[suffixArray[i]] = newRank[suffixArray[i - 1]];
if (compare(suffixArray[i - 1], suffixArray[i])) {
newRank[suffixArray[i]]++;
}
}
rank = newRank;
// reduce running time
if (rank[suffixArray[size - 1]] == size) break;
}
return suffixArray;
}
// use Kasai algorithm
// time : O(N)
// space : O(N)
vector<int> getLCP(string &s, vector<int> &suffixArray) {
int size = s.size();
vector<int> LCPArray(size), idxOfSuffixArray(size);
for (int i = 0; i < size; i++) {
idxOfSuffixArray[suffixArray[i]] = i;
}
for (int k = 0, i = 0; i < size; ++i) {
if (idxOfSuffixArray[i] > 0) {
int j = suffixArray[idxOfSuffixArray[i] - 1];
while (s[i + k] == s[j + k]) {
k++;
}
LCPArray[idxOfSuffixArray[i]] = (k ? k-- : 0);
}
}
LCPArray[0] = -1;
return LCPArray;
}