forked from abdonkov/DSA
/
StringSearch.RabinKarp.cs
348 lines (293 loc) · 16.2 KB
/
StringSearch.RabinKarp.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
using System;
using System.Collections.Generic;
namespace DSA.Algorithms.Strings
{
/// <summary>
/// A static class containing methods for string pattern searching.
/// </summary>
public static partial class StringSearch
{
/// <summary>
/// Searches for the first occurrence of a pattern in a target <see cref="string"/> using Rabin-Karp's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="pattern">The <see cref="string"/> to search for.</param>
/// <returns>Returns the position of the first occurrence of the pattern. If not found returns -1.</returns>
public static int RabinKarpSearchFirst(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// Save for faster access
int patternLength = pattern.Length;
if (target.Length < patternLength) return -1;
ulong targetHash = 0;
ulong patternHash = 0;
ulong alphabetSize = 256; // max char value
ulong moduloValue = 65537; // custom selected prime number for the hashing
// Calculating hash of pattern and the beggining of target
for (int i = 0; i < patternLength; i++)
{
patternHash = (patternHash * alphabetSize + pattern[i]) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i]) % moduloValue;
}
// Check if pattern is in the beginning
if (patternHash == targetHash)
if (string.Equals(target.Substring(0, patternLength), pattern))
return 0;
// Calculate pow value (used in the hashing proccess)
ulong pow = 1;
for (int i = 0; i < patternLength - 1; i++)
{
pow = (pow * alphabetSize) % moduloValue;
}
// Hashing the rest of the target and searching for the pattern
int endOfSearch = target.Length - patternLength;
for (int i = 0; i < endOfSearch; i++)
{
// Some Rabin-Karp magic
targetHash = (targetHash + moduloValue - pow * target[i] % moduloValue) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i + patternLength]) % moduloValue;
// If the hashes are equal check the string( because collisions are possible) and return if found
if (targetHash == patternHash)
if (string.Equals(target.Substring(i + 1, patternLength), pattern))
return i + 1;
}
// The pattern was not found
return -1;
}
/// <summary>
/// Searches for all occurences of a pattern in a target <see cref="string"/> using Rabin-Karp's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="pattern">The <see cref="string"/> to search for.</param>
/// <returns>Returns <see cref="IList{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs. <see cref="IList{T}"/> is empty if none found.</returns>
public static IList<int> RabinKarpSearchAll(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// Save for faster access
int patternLength = pattern.Length;
// List with the positions where the pattern was found
var matches = new List<int>();
if (target.Length < patternLength) return matches;
ulong targetHash = 0;
ulong patternHash = 0;
ulong alphabetSize = 256; // max char value
ulong moduloValue = 65537; // custom selected prime number for the hashing
// Calculating hash of pattern and the beggining of target
for (int i = 0; i < patternLength; i++)
{
patternHash = (patternHash * alphabetSize + pattern[i]) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i]) % moduloValue;
}
// Check if pattern is in the beginning
if (patternHash == targetHash)
if (string.Equals(target.Substring(0, patternLength), pattern))
matches.Add(0);
// Calculate pow value (used in the hashing proccess)
ulong pow = 1;
for (int i = 0; i < patternLength - 1; i++)
{
pow = (pow * alphabetSize) % moduloValue;
}
// Hashing the rest of the target and searching for the pattern
int endOfSearch = target.Length - patternLength;
for (int i = 0; i < endOfSearch; i++)
{
// Some Rabin-Karp magic
targetHash = (targetHash + moduloValue - pow * target[i] % moduloValue) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i + patternLength]) % moduloValue;
// If the hashes are equal check the string( because collisions are possible) and return if found
if (targetHash == patternHash)
if (string.Equals(target.Substring(i + 1, patternLength), pattern))
matches.Add(i + 1);
}
// Retrun the list with all starting positions of the pattern
return matches;
}
/// <summary>
/// Searches for the first occurrence of multiple patterns in a target <see cref="string"/> using Rabin-Karp's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param>
/// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="int"/> values of the position of first occurence.
/// If a pattern is not found there is no entry in the dictionary.</returns>
public static Dictionary<string, int> RabinKarpMultipleSearchFirst(string target, IList<string> patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with pattern hashes for all strings
var patternHashes = new Dictionary<string, ulong>();
// Dictionary with target hashes for all different string lengths
var targetHashes = new Dictionary<int, ulong>();
// Dictionary with pow values for all different string lengths
var pows = new Dictionary<int, ulong>();
// Dictionary with all strings with a specific length
var patternLengths = new Dictionary<int, List<string>>();
// Dictionary with found positions for every string
var matches = new Dictionary<string, int>();
ulong alphabetSize = 256; // max char value
ulong moduloValue = 65537; // custom selected prime number for the hashing
// Calculating hash of patterns and all target hashes and pow values
for (int i = 0; i < patterns.Count; i++)
{
// Chech if target hash for current string length has to be computed
bool hasToComputeTargetHashAndPow = !targetHashes.ContainsKey(patterns[i].Length);
// Populate pattern lengths dictionary
if (hasToComputeTargetHashAndPow) patternLengths.Add(patterns[i].Length, new List<string>() { patterns[i] });
else patternLengths[patterns[i].Length].Add(patterns[i]);
ulong patternHash = 0;
ulong targetHash = 0;
ulong pow = 1;
for (int j = 0; j < patterns[i].Length; j++)
{
patternHash = (patternHash * alphabetSize + patterns[i][j]) % moduloValue;
if (hasToComputeTargetHashAndPow)
{
targetHash = (targetHash * alphabetSize + target[j]) % moduloValue;
if (j != 0) // used to skip one iteration. Pow is calculated with one less iteration
pow = (pow * alphabetSize) % moduloValue;
}
}
// Add hashes in collections
patternHashes.Add(patterns[i], patternHash);
if (hasToComputeTargetHashAndPow)
{
targetHashes.Add(patterns[i].Length, targetHash);
pows.Add(patterns[i].Length, pow);
}
}
// Check if pattern is in the beginning
foreach (var patKVP in patternHashes)
{
if (patKVP.Value == targetHashes[patKVP.Key.Length])
if (string.Equals(target.Substring(0, patKVP.Key.Length), patKVP.Key))
matches.Add(patKVP.Key, 0);
}
// Hashing the rest of the target and searching for the pattern
// Patters are grouped by their length
foreach (var patternsWithSpecificLength in patternLengths)
{
int patternLength = patternsWithSpecificLength.Key;
int endOfSearch = target.Length - patternLength;
for (int i = 0; i < endOfSearch; i++)
{
ulong targetHash = targetHashes[patternLength];
// Some Rabin-Karp magic
targetHash = (targetHash + moduloValue - pows[patternLength] * target[i] % moduloValue) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i + patternLength]) % moduloValue;
targetHashes[patternLength] = targetHash;
// Search all patterns for a match
foreach (var pat in patternsWithSpecificLength.Value)
{
if (!matches.ContainsKey(pat))
{
// If the hashes are equal check the string( because collisions are possible) and return if found
if (targetHash == patternHashes[pat])
if (string.Equals(target.Substring(i + 1, patternLength), pat))
matches.Add(pat, i + 1);
}
if (matches.Count == patterns.Count) return matches;
}
if (matches.Count == patterns.Count) return matches;
}
}
// Return matches
return matches;
}
/// <summary>
/// Searches for all occurrences of multiple patterns in a target <see cref="string"/> using Rabin-Karp's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param>
/// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="List{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs.
/// If a pattern is not found there is no entry in the dictionary.</returns>
public static Dictionary<string, List<int>> RabinKarpMultipleSearchAll(string target, IList<string> patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with pattern hashes for all strings
var patternHashes = new Dictionary<string, ulong>();
// Dictionary with target hashes for all different string lengths
var targetHashes = new Dictionary<int, ulong>();
// Dictionary with pow values for all different string lengths
var pows = new Dictionary<int, ulong>();
// Dictionary with all strings with a specific length
var patternLengths = new Dictionary<int, List<string>>();
// Dictionary with found positions for every string
var matches = new Dictionary<string, List<int>>();
ulong alphabetSize = 256; // max char value
ulong moduloValue = 65537; // custom selected prime number for the hashing
// Calculating hash of patterns and all target hashes and pow values
for (int i = 0; i < patterns.Count; i++)
{
// Chech if target hash for current string length has to be computed
bool hasToComputeTargetHashAndPow = !targetHashes.ContainsKey(patterns[i].Length);
// Populate matches dictionary and pattern lengths dictionary
matches.Add(patterns[i], new List<int>());
if (hasToComputeTargetHashAndPow) patternLengths.Add(patterns[i].Length, new List<string>() { patterns[i] });
else patternLengths[patterns[i].Length].Add(patterns[i]);
ulong patternHash = 0;
ulong targetHash = 0;
ulong pow = 1;
for (int j = 0; j < patterns[i].Length; j++)
{
patternHash = (patternHash * alphabetSize + patterns[i][j]) % moduloValue;
if (hasToComputeTargetHashAndPow)
{
targetHash = (targetHash * alphabetSize + target[j]) % moduloValue;
if (j != 0) // used to skip one iteration. Pow is calculated with one less iteration
pow = (pow * alphabetSize) % moduloValue;
}
}
// Add hashes in collections
patternHashes.Add(patterns[i], patternHash);
if (hasToComputeTargetHashAndPow)
{
targetHashes.Add(patterns[i].Length, targetHash);
pows.Add(patterns[i].Length, pow);
}
}
// Check if pattern is in the beginning
foreach (var patKVP in patternHashes)
{
if (patKVP.Value == targetHashes[patKVP.Key.Length])
if (string.Equals(target.Substring(0, patKVP.Key.Length), patKVP.Key))
matches[patKVP.Key].Add(0);
}
// Hashing the rest of the target and searching for the pattern
// Patters are grouped by their length
foreach (var patternsWithSpecificLength in patternLengths)
{
int patternLength = patternsWithSpecificLength.Key;
int endOfSearch = target.Length - patternLength;
for (int i = 0; i < endOfSearch; i++)
{
ulong targetHash = targetHashes[patternLength];
// Some Rabin-Karp magic
targetHash = (targetHash + moduloValue - pows[patternLength] * target[i] % moduloValue) % moduloValue;
targetHash = (targetHash * alphabetSize + target[i + patternLength]) % moduloValue;
targetHashes[patternLength] = targetHash;
// Search all patterns for a match
foreach (var pat in patternsWithSpecificLength.Value)
{
// If the hashes are equal check the string( because collisions are possible) and return if found
if (targetHash == patternHashes[pat])
if (string.Equals(target.Substring(i + 1, patternLength), pat))
matches[pat].Add(i + 1);
}
}
}
// Remove all patterns that are not found
for (int i = 0; i < patterns.Count; i++)
{
if (matches[patterns[i]].Count == 0)
{
matches.Remove(patterns[i]);
}
}
// Return matches
return matches;
}
}
}