-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
93 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
namespace Propulsion.Tool | ||
|
||
open FSharp.Control | ||
open Propulsion.Feed | ||
open Propulsion.Internal | ||
|
||
/// <summary>Parses CR separated file with items dumped from a Cosmos Container containing Equinox Items</summary> | ||
/// <remarks>The recommended process is via the Equinox tool's eqx query mechanism <br/> | ||
/// But any alternate way way that yields the full JSON will also work /// e.g. the cosmic tool at https://github.com/creyke/Cosmic <br/> | ||
/// dotnet tool install -g cosmic <br/> | ||
/// # then connect/select db per https://github.com/creyke/Cosmic#basic-usage <br/> | ||
/// cosmic query 'select * from c order by c._ts' > file.out <br/> | ||
/// </remarks> | ||
/// | ||
type [<Sealed; AbstractClass>] CosmosDumpSource private () = | ||
|
||
static member Start(log, statsInterval, filePath, skip, parseFeedDoc, sink, ?truncateTo) = | ||
let isNonCommentLine (line: string) = System.Text.RegularExpressions.Regex.IsMatch(line, "^\s*#") |> not | ||
let truncate = match truncateTo with Some count -> Seq.truncate count | None -> id | ||
let lines = Seq.append (System.IO.File.ReadLines filePath |> truncate) (Seq.singleton null) // Add a trailing EOF sentinel so checkpoint positions can be line numbers even when finished reading | ||
let crawl _ _ _ = taskSeq { | ||
let mutable i = 0 | ||
for line in lines do | ||
i <- i + 1 | ||
let isEof = line = null | ||
if isEof || (i >= skip && isNonCommentLine line) then | ||
let lineNo = int64 i + 1L | ||
try let items = if isEof then Array.empty | ||
else System.Text.Json.JsonDocument.Parse line |> parseFeedDoc |> Seq.toArray | ||
struct (System.TimeSpan.Zero, ({ items = items; isTail = isEof; checkpoint = Position.parse lineNo }: Core.Batch<_>)) | ||
with e -> raise <| exn($"File Parse error on L{lineNo}: '{line.Substring(0, 200)}'", e) } | ||
let source = | ||
let checkpointStore = Equinox.MemoryStore.VolatileStore() | ||
let checkpoints = ReaderCheckpoint.MemoryStore.create log ("consumerGroup", TimeSpan.minutes 1) checkpointStore | ||
Propulsion.Feed.Core.SinglePassFeedSource(log, statsInterval, SourceId.parse filePath, crawl, checkpoints, sink, string) | ||
source.Start(fun _ct -> task { return [| TrancheId.parse "0" |] }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters