Skip to content

Commit 8d43020

Browse files
committed
feat: pull over HTTP, storage cache
1 parent 1a3549a commit 8d43020

337 files changed

Lines changed: 915 additions & 542254 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Flowthru.slnx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
<Project Path="src/extensions/Flowthru.Extensions.Excel/Flowthru.Extensions.Excel.csproj" />
1717
<Project Path="src/extensions/Flowthru.Extensions.EFCore/Flowthru.Extensions.EFCore.csproj" />
1818
<Project Path="src/extensions/Flowthru.Extensions.GQL/Flowthru.Extensions.GQL.csproj" />
19+
<Project Path="src/extensions/Flowthru.Extensions.Http/Flowthru.Extensions.Http.csproj" />
1920
<Project Path="src/extensions/Flowthru.Extensions.MLNet/Flowthru.Extensions.MLNet.csproj" />
2021
<Project Path="src/extensions/Flowthru.Extensions.Parquet/Flowthru.Extensions.Parquet.csproj" />
2122
<Project Path="src/extensions/Flowthru.Extensions.Python/Flowthru.Extensions.Python.csproj" />
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SOURCE_TYPE=repo
2+
SOURCE_ADDRESS=https://github.com/databricks/appkit.git
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SOURCE_TYPE=repo
2+
SOURCE_ADDRESS=https://github.com/databricks/cli.git

examples/advanced/RetailDataSplitFlow/.gitignore

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Generated metadata
22
Metadata/
33

4+
# HTTP response cache (populated at runtime by CachedHttpStorageMedium)
5+
.http-cache/
6+
47
# Ignore all generated data artifacts
58
Data/**
69

@@ -9,8 +12,9 @@ Data/**
912
!Data/*/Schemas/
1013
!Data/_01_Raw/Datasets/
1114

12-
# Allow raw input dataset files
13-
!Data/_01_Raw/Datasets/*
15+
# Allow fixture files in the raw Datasets directory (JSON lookups, etc.)
16+
# Raw CSV data is fetched at runtime via HttpStorageMedium — do not commit large CSVs.
17+
!Data/_01_Raw/Datasets/*.json
1418

1519
# Allow all schema and catalog .cs files
1620
!Data/**/*.cs
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
using Flowthru.Core.Data;
2+
using Flowthru.Core.Data.Storage;
23

34
namespace RetailDataMultipipeline.Data;
45

56
public partial class CoreCatalog : CatalogAbstract
67
{
78
private readonly string _basePath;
9+
private readonly IStorageMediumResolver? _resolver;
810

9-
public CoreCatalog(string basePath)
11+
public CoreCatalog(string basePath, IStorageMediumResolver? resolver = null)
1012
{
1113
_basePath = basePath;
14+
_resolver = resolver;
1215
InitializeCatalogProperties();
1316
}
1417
}

examples/advanced/RetailDataSplitFlow/Data/_01_Raw/CoreCatalog.Raw.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@ namespace RetailDataMultipipeline.Data;
66
public partial class CoreCatalog
77
{
88
/// <summary>
9-
/// All daily retail transaction CSV files from the by-day directory, read as a
10-
/// single concatenated sequence. Read-only — immutable raw source data.
9+
/// Full online-retail dataset downloaded from the Spark: The Definitive Guide
10+
/// GitHub repository. The resolver routes this https:// URI through
11+
/// HttpStorageMedium at runtime; local file paths fall back to FileStorageMedium.
12+
/// Read-only — immutable raw source data.
1113
/// </summary>
1214
public IItem<IEnumerable<RetailTransactionSchema>> RetailTransactionsRaw =>
1315
CreateItem(
1416
() =>
15-
ItemFactory.Enumerable.CsvDirectory<RetailTransactionSchema>(
17+
ItemFactory.Enumerable.Csv<RetailTransactionSchema>(
1618
label: "RetailTransactionsRaw",
17-
directoryPath: $"{_basePath}/_01_Raw/Datasets"
19+
filePath: "https://raw.githubusercontent.com/databricks/Spark-The-Definitive-Guide/refs/heads/master/data/retail-data/all/online-retail-dataset.csv",
20+
resolver: _resolver
1821
)
1922
);
2023

examples/advanced/RetailDataSplitFlow/Data/_01_Raw/Datasets/2010-12-01.csv

Lines changed: 0 additions & 3109 deletions
This file was deleted.

examples/advanced/RetailDataSplitFlow/Data/_01_Raw/Datasets/2010-12-02.csv

Lines changed: 0 additions & 2110 deletions
This file was deleted.

examples/advanced/RetailDataSplitFlow/Data/_01_Raw/Datasets/2010-12-03.csv

Lines changed: 0 additions & 2203 deletions
This file was deleted.

examples/advanced/RetailDataSplitFlow/Data/_01_Raw/Datasets/2010-12-05.csv

Lines changed: 0 additions & 2726 deletions
This file was deleted.

0 commit comments

Comments
 (0)