-
-
Notifications
You must be signed in to change notification settings - Fork 53
/
DynamicProgLcs.scala
130 lines (117 loc) · 4.3 KB
/
DynamicProgLcs.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
* Copyright 2024 Diffson Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package diffson.lcs
import cats.Eq
import cats.implicits._
import scala.annotation.tailrec
/** Implementation of the LCS using dynamic programming.
*
* @author Lucas Satabin
*/
class DynamicProgLcs[T: Eq] extends Lcs[T] {
def lcs(s1: List[T], s2: List[T], low1: Int, high1: Int, low2: Int, high2: Int): List[(Int, Int)] = {
val seq1 = s1.slice(low1, high1)
val seq2 = s2.slice(low2, high2)
if (seq1.isEmpty || seq2.isEmpty) {
// shortcut if at least on sequence is empty, the lcs, is empty as well
Nil
} else if (seq1 === seq2) {
// both sequences are equal, the lcs is either of them
seq1.indices.map(i => (i + low1, i + low2)).toList
} else if (seq1.startsWith(seq2)) {
// the second sequence is a prefix of the first one
// the lcs is the second sequence
seq2.indices.map(i => (i + low1, i + low2)).toList
} else if (seq2.startsWith(seq1)) {
// the first sequence is a prefix of the second one
// the lcs is the first sequence
seq1.indices.map(i => (i + low1, i + low2)).toList
} else {
// we try to reduce the problem by stripping common suffix and prefix
val (prefix, middle1, middle2, suffix) = splitPrefixSuffix(seq1, seq2, low1, low2)
val indexedMiddle1: Vector[T] = middle1.toVector
val indexedMiddle2: Vector[T] = middle2.toVector
val offset = prefix.size
val lengths: Array[Array[Int]] = Array.ofDim[Int](middle1.size + 1, middle2.size + 1)
// fill up the length matrix
// impl chosen based on microbenchmarks
val cols = indexedMiddle1.length
val rows = indexedMiddle2.length
@tailrec
def fillJs(i: Int, j: Int): Unit = {
if (j < rows) {
if (indexedMiddle1(i) == indexedMiddle2(j))
lengths(i + 1)(j + 1) = lengths(i)(j) + 1
else
lengths(i + 1)(j + 1) = math.max(lengths(i + 1)(j), lengths(i)(j + 1))
fillJs(i, j + 1)
}
}
@tailrec
def fillIs(i: Int): Unit = {
if (i < cols) {
fillJs(i, 0)
fillIs(i + 1)
}
}
fillIs(0)
// and compute the lcs out of the matrix
@tailrec
def loop(idx1: Int, idx2: Int, acc: List[(Int, Int)]): List[(Int, Int)] =
if (idx1 == 0 || idx2 == 0) {
acc
} else if (lengths(idx1)(idx2) == lengths(idx1 - 1)(idx2)) {
loop(idx1 - 1, idx2, acc)
} else if (lengths(idx1)(idx2) == lengths(idx1)(idx2 - 1)) {
loop(idx1, idx2 - 1, acc)
} else {
assert(seq1(offset + idx1 - 1) == seq2(offset + idx2 - 1))
loop(idx1 - 1, idx2 - 1, (low1 + offset + idx1 - 1, low2 + offset + idx2 - 1) :: acc)
}
prefix ++ loop(indexedMiddle1.size, indexedMiddle2.size, Nil) ++ suffix
}
}
/* Extract common prefix and suffix from both sequences */
private def splitPrefixSuffix(seq1: List[T],
seq2: List[T],
low1: Int,
low2: Int): (List[(Int, Int)], List[T], List[T], List[(Int, Int)]) = {
val size1 = seq1.size
val size2 = seq2.size
val prefix =
seq1
.zip(seq2)
.takeWhile { case (e1, e2) =>
e1 == e2
}
.indices
.map(i => (i + low1, i + low2))
.toList
val suffix =
seq1.reverse
.zip(seq2.reverse)
.takeWhile { case (e1, e2) =>
e1 == e2
}
.indices
.map(i => (size1 - i - 1 + low1, size2 - i - 1 + low2))
.toList
.reverse
(prefix, seq1.drop(prefix.size).dropRight(suffix.size), seq2.drop(prefix.size).dropRight(suffix.size), suffix)
}
def savedHashes: Lcs[T] =
new HashedLcs(new DynamicProgLcs[Hashed[T]])
}