Skip to content

suffix array

Changi Cho edited this page Oct 16, 2022 · 1 revision

Suffix Array

접미사 배열 : 문자열의 접미사들을 사전순으로 정렬한 배열. 부분 문자열 자체를 저장하는 대신 간단하게 접미사가 시작하는 인덱스를 저장.


구현

Suffix Array and LCP Array

// use Manber Myers algorithm by STL sort
// time : O(N * log_2^2(N))
// space : O(N)
vector<int> getSuffixArray(string &s) {
  int size = s.size();
  vector<int> suffixArray(size), rank(size + 1);

  for (int i = 0; i < size; ++i) {
    suffixArray[i] = i, rank[i] = s[i];
  }

  for (int d = 1; d < size; d *= 2) {
    auto compare = [&](const int &a, const int &b) {
      return rank[a] < rank[b] ||
             (rank[a] == rank[b] && rank[a + d] < rank[b + d]);
    };
    sort(suffixArray.begin(), suffixArray.end(), compare);

    vector<int> newRank(size + 1, 0);
    newRank[suffixArray[0]] = 1;
    for (int i = 1; i < size; i++) {
      newRank[suffixArray[i]] = newRank[suffixArray[i - 1]];

      if (compare(suffixArray[i - 1], suffixArray[i])) {
        newRank[suffixArray[i]]++;
      }
    }
    rank = newRank;
  }
  return suffixArray;
}

// use Manber Myers algorithm by counting sort
// time : O(N * log_2(N))
// space : O(N)
vector<int> getSuffixArrayByCount(string &s) {
  int size = s.size();
  int maximum = max(256, size) + 1;

  vector<int> suffixArray(size), rank(size + size);
  vector<int> count(maximum), idx(size);
  // initialize
  for (int i = 0; i < size; ++i) {
    suffixArray[i] = i, rank[i] = s[i];
  }
  for (int d = 1; d < size; d *= 2) {
    vector<int> newRank(size + size);
    auto compare = [&](const int &a, const int &b) {
      return rank[a] < rank[b] ||
             (rank[a] == rank[b] && rank[a + d] < rank[b + d]);
    };

    for (int i = 0; i < maximum; ++i) count[i] = 0;
    for (int i = 0; i < size; ++i) count[rank[i + d]]++;
    for (int i = 1; i < maximum; ++i) count[i] += count[i - 1];
    for (int i = size - 1; i >= 0; --i) idx[--count[rank[i + d]]] = i;

    for (int i = 0; i < maximum; ++i) count[i] = 0;
    for (int i = 0; i < size; ++i) count[rank[i]]++;
    for (int i = 1; i < maximum; ++i) count[i] += count[i - 1];
    for (int i = size - 1; i >= 0; --i)
      suffixArray[--count[rank[idx[i]]]] = idx[i];

    newRank[suffixArray[0]] = 1;
    for (int i = 1; i < size; i++) {
      newRank[suffixArray[i]] = newRank[suffixArray[i - 1]];

      if (compare(suffixArray[i - 1], suffixArray[i])) {
        newRank[suffixArray[i]]++;
      }
    }
    rank = newRank;

    // reduce running time
    if (rank[suffixArray[size - 1]] == size) break;
  }
  return suffixArray;
}

// use Kasai algorithm
// time : O(N)
// space : O(N)
vector<int> getLCP(string &s, vector<int> &suffixArray) {
  int size = s.size();
  vector<int> LCPArray(size), idxOfSuffixArray(size);
  for (int i = 0; i < size; i++) {
    idxOfSuffixArray[suffixArray[i]] = i;
  }
  for (int k = 0, i = 0; i < size; ++i) {
    if (idxOfSuffixArray[i] > 0) {
      int j = suffixArray[idxOfSuffixArray[i] - 1];
      while (s[i + k] == s[j + k]) {
        k++;
      }

      LCPArray[idxOfSuffixArray[i]] = (k ? k-- : 0);
    }
  }
  LCPArray[0] = -1;

  return LCPArray;
}

문제

Clone this wiki locally