-
Notifications
You must be signed in to change notification settings - Fork 29
/
Utilities.fs
357 lines (281 loc) · 11.5 KB
/
Utilities.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
namespace CWTools.Utilities
open System
open System.Collections.Concurrent
open System.Collections.Generic
open CWTools.Utilities.Position
open System.Globalization
open System.IO
open VDS.Common.Tries
module Utils =
let inline (==) (x: string) (y: string) =
x.Equals(y, StringComparison.OrdinalIgnoreCase)
type InsensitiveStringComparer() =
interface IComparer<string> with
member __.Compare(a, b) =
String.Compare(a, b, StringComparison.OrdinalIgnoreCase)
type LocKeySet = Microsoft.FSharp.Collections.Tagged.Set<string, InsensitiveStringComparer>
let memoize keyFunction memFunction =
let dict = new System.Collections.Generic.Dictionary<_, _>()
fun n ->
match dict.TryGetValue(keyFunction (n)) with
| true, v -> v
| _ ->
let temp = memFunction (n)
dict.Add(keyFunction (n), temp)
temp
type LogLevel =
| Silent
| Normal
| Verbose
/// For the default logger only
let mutable loglevel = Silent
let logInner level message =
match loglevel, level with
| Silent, _ -> ()
| Normal, Normal -> Printf.eprintfn "%s: %s" (System.DateTime.Now.ToString("HH:mm:ss")) message
| Verbose, _ -> Printf.eprintfn "%s: %s" (System.DateTime.Now.ToString("HH:mm:ss")) message
| _, _ -> ()
// |Verbose -> logWith logger format
let private defaultLogVerbose message = logInner Verbose message
let private defaultLogNormal message = logInner Normal message
let private defaultLogAll message =
Printf.eprintfn "%s: %s" (System.DateTime.Now.ToString("HH:mm:ss")) message
let mutable logDiag = defaultLogVerbose
let mutable logInfo = defaultLogNormal
let mutable logWarning = defaultLogNormal
let mutable logError = defaultLogAll
let log m = logInfo m
let duration f s =
let timer = new System.Diagnostics.Stopwatch()
timer.Start()
let returnValue = f ()
//log (sprintf "Elapsed Time: %i %s" timer.ElapsedMilliseconds s)
returnValue
let mkZeroFile file =
mkRange file (mkPos 0 0) (mkPos 10000 0)
let repeatN f n x =
let mutable x = x
for i = 1 to n do
x <- f x
x
let getAllFoldersUnion dirs =
let rec getAllFolders depth dirs =
if Seq.isEmpty dirs || depth > 20 then
Seq.empty
else
seq {
yield! dirs |> Seq.collect Directory.EnumerateDirectories
yield! dirs |> Seq.collect Directory.EnumerateDirectories |> getAllFolders (depth + 1)
}
seq {
yield! dirs
yield! getAllFolders 0 dirs
}
let structSnd struct (_, x) = x
let structFst struct (x, _) = x
[<Literal>]
let magicChar = '\u1E00'
[<Literal>]
let magicCharString = "\u1E00"
let quoteCharArray = [| '"' |]
module TryParser =
// convenient, functional TryParse wrappers returning option<'a>
let tryParseWith tryParseFunc =
tryParseFunc
>> function
| true, v -> Some v
| false, _ -> None
let parseDate: string -> _ = tryParseWith System.DateTime.TryParse
let parseInt: string -> _ = tryParseWith System.Int32.TryParse
let parseIntWithDecimal: string -> _ =
tryParseWith (fun s ->
System.Int32.TryParse(
s,
Globalization.NumberStyles.AllowDecimalPoint
||| Globalization.NumberStyles.Integer,
CultureInfo.InvariantCulture
))
let parseSingle: string -> _ = tryParseWith System.Single.TryParse
let parseDouble: string -> _ =
tryParseWith (fun s ->
System.Double.TryParse(
s,
(Globalization.NumberStyles.Float ||| Globalization.NumberStyles.AllowThousands),
CultureInfo.InvariantCulture
))
let parseDecimal: string -> _ =
tryParseWith (fun s ->
System.Decimal.TryParse(
s,
(NumberStyles.Float ||| NumberStyles.AllowThousands),
CultureInfo.InvariantCulture
))
// etc.
// active patterns for try-parsing strings
let (|Date|_|) = parseDate
let (|Int|_|) = parseInt
let (|Single|_|) = parseSingle
let (|Double|_|) = parseDouble
type StringToken = int
type StringLowerToken = int
type StringTokens =
struct
val lower: StringLowerToken
val normal: StringToken
/// We throw away the quotes when we intern, but we do need to keep that info, but don't want to have multiple tokens with/without quotes
val quoted: bool
new(lower, normal, quoted) =
{ lower = lower
normal = normal
quoted = quoted }
end
type StringMetadata =
struct
val startsWithAmp: bool
val containsDoubleDollar: bool
val containsQuestionMark: bool
val containsHat: bool
val startsWithSquareBracket: bool
val containsPipe: bool
new(startsWithAmp,
containsDoubleDollar,
containsQuestionMark,
containsHat,
startsWithSquareBracket,
containsPipe) =
{ startsWithAmp = startsWithAmp
containsDoubleDollar = containsDoubleDollar
containsQuestionMark = containsQuestionMark
containsHat = containsHat
startsWithSquareBracket = startsWithSquareBracket
containsPipe = containsPipe }
end
[<Sealed>]
type StringResourceManager() =
// TODO: Replace with arrays?
let strings = new ConcurrentDictionary<string, StringTokens>()
let ints = new ConcurrentDictionary<StringToken, string>()
let metadata = new ConcurrentDictionary<StringToken, StringMetadata>()
let mutable i = 0
// let mutable j = 0
let monitor = Object()
member x.InternIdentifierToken(s) =
// j <- j + 1
// eprintfn "%A" j
let mutable res = Unchecked.defaultof<_>
let ok = strings.TryGetValue(s, &res)
if ok then
res
else
lock monitor (fun () ->
let retry = strings.TryGetValue(s, &res)
if retry then
res
else
let ls = s.ToLower().Trim('"')
let quoted = s.StartsWith "\"" && s.EndsWith "\""
let lok = strings.TryGetValue(ls, &res)
if lok then
let stringID = i
i <- i + 1
let resn = StringTokens(res.lower, stringID, quoted)
ints.[stringID] <- s
metadata.[stringID] <- metadata.[res.lower]
strings.[s] <- resn
resn
else
let stringID = i
let lowID = i + 1
i <- i + 2
// eprintfn "%A" i
let res = StringTokens(lowID, stringID, quoted)
let resl = StringTokens(lowID, lowID, false)
let (startsWithAmp,
containsQuestionMark,
containsHat,
containsDoubleDollar,
startsWithSquareBracket,
containsPipe) =
if ls.Length > 0 then
let startsWithAmp = ls.[0] = '@'
let containsQuestionMark = ls.IndexOf('?') >= 0
let containsHat = ls.IndexOf('^') >= 0
let first = ls.IndexOf('$')
let last = ls.LastIndexOf('$')
let containsDoubleDollar = first >= 0 && first <> last
let startsWithSquareBracket = ls.[0] = '[' || ls.[0] = ']'
let containsPipe = ls.IndexOf('|') >= 0
// let quoted =
startsWithAmp,
containsQuestionMark,
containsHat,
containsDoubleDollar,
startsWithSquareBracket,
containsPipe
else
false, false, false, false, false, false
metadata.[lowID] <-
StringMetadata(
startsWithAmp,
containsDoubleDollar,
containsQuestionMark,
containsHat,
startsWithSquareBracket,
containsPipe
)
metadata.[stringID] <-
StringMetadata(
startsWithAmp,
containsDoubleDollar,
containsQuestionMark,
containsHat,
startsWithSquareBracket,
containsPipe
)
ints.[lowID] <- ls
ints.[stringID] <- s
strings.[ls] <- resl
strings.[s] <- res
res)
member x.GetStringForIDs(id: StringTokens) = ints.[id.normal]
member x.GetLowerStringForIDs(id: StringTokens) = ints.[id.lower]
member x.GetStringForID(id: StringToken) = ints.[id]
member x.GetMetadataForID(id: StringToken) = metadata.[id]
module StringResource =
let mutable stringManager = StringResourceManager()
type StringTokens with
member this.GetString() =
StringResource.stringManager.GetStringForIDs this
member this.GetMetadata() =
StringResource.stringManager.GetMetadataForID this.normal
module Utils2 =
type LowerStringSparseTrie() =
inherit
AbstractTrie<string, char, string>(
(fun s -> s.ToLower(CultureInfo.InvariantCulture)),
new SparseCharacterTrieNode<string>(null, Unchecked.defaultof<char>)
)
let idValueList = ResizeArray<StringTokens>()
override this.CreateRoot(key) =
new SparseCharacterTrieNode<string>(null, key)
member this.AddWithIDs(key, value) =
base.Add(key, value)
idValueList.Add(StringResource.stringManager.InternIdentifierToken value)
member _.IdValues = idValueList
member _.StringValues =
idValueList |> Seq.map (fun i -> StringResource.stringManager.GetStringForIDs i)
member _.Count = idValueList.Count
// type StringSet = Microsoft.FSharp.Collections.Tagged.Set<string, InsensitiveStringComparer>
type PrefixOptimisedStringSet = LowerStringSparseTrie
type LowerCaseStringSet(strings: string seq) =
let dictionary = HashSet<string>()
do
for e in strings do
dictionary.Add(e.ToLowerInvariant()) |> ignore
new() = LowerCaseStringSet(Seq.empty)
member this.Contains(x: string) =
dictionary.Contains(x.ToLowerInvariant())
let createStringSet items =
let newSet = PrefixOptimisedStringSet()
items |> Seq.iter (fun x -> newSet.AddWithIDs(x, x))
newSet