diff --git a/.claude/ralph-loop.local.md b/.claude/ralph-loop.local.md new file mode 100644 index 000000000..b05ed19e2 --- /dev/null +++ b/.claude/ralph-loop.local.md @@ -0,0 +1,9 @@ +--- +active: true +iteration: 71 +max_iterations: 100 +completion_promise: null +started_at: "2026-01-03T20:32:07Z" +--- + +Read docs/prompts/backend/data_context.md and implement it 100% diff --git a/.secrets.baseline b/.secrets.baseline index 37ccb5318..ed04a59ba 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -161,6 +165,15 @@ "line_number": 8 } ], + "backend/src/dataing/adapters/datasource/document/mongodb.py": [ + { + "type": "Basic Auth Credentials", + "filename": "backend/src/dataing/adapters/datasource/document/mongodb.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 47 + } + ], "backend/src/dataing/demo/seed.py": [ { "type": "Secret Keyword", @@ -179,9 +192,9 @@ { "type": "Basic Auth Credentials", "filename": "backend/src/dataing/demo/seed.py", - "hashed_secret": "2e7112b932e45681d3d8a00c0ab49fb5eba3245e", + "hashed_secret": "c4e4e4239f4120bfc6964d9bb2e7cf117ee98a29", "is_verified": false, - "line_number": 160 + "line_number": 161 } ], "backend/src/dataing/entrypoints/api/deps.py": [ @@ -190,57 +203,5392 @@ "filename": "backend/src/dataing/entrypoints/api/deps.py", "hashed_secret": "bd76a4bb28ee841a8bb26bc5a893184a1d9bbcc7", "is_verified": false, - "line_number": 131 + "line_number": 150 }, { "type": "Secret Keyword", "filename": "backend/src/dataing/entrypoints/api/deps.py", "hashed_secret": "3db759e75c1e2f49b885646f393d3d7fcbca434d", "is_verified": false, - "line_number": 132 + "line_number": 151 } ], - "demo/docker-compose.demo.yml": [ + "dashboard/e2e/fixtures/api-responses.har": [ { - "type": "Secret Keyword", - "filename": "demo/docker-compose.demo.yml", - "hashed_secret": "2e7112b932e45681d3d8a00c0ab49fb5eba3245e", + "type": "Base64 High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "750e0dbba1f9f3e789ccfe21fd237f642676f8f8", "is_verified": false, - "line_number": 9 + "line_number": 136 }, { - "type": "Basic Auth Credentials", - "filename": "demo/docker-compose.demo.yml", - "hashed_secret": "2e7112b932e45681d3d8a00c0ab49fb5eba3245e", + "type": "Base64 High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "ba9680a38564ed3b58220597e394cd8a16e8a208", "is_verified": false, - "line_number": 27 - } - ], - "docs/prompts/demo_prompt_2.md": [ + "line_number": 199 + }, { - "type": "Basic Auth Credentials", - "filename": "docs/prompts/demo_prompt_2.md", - "hashed_secret": "2e7112b932e45681d3d8a00c0ab49fb5eba3245e", + "type": "Base64 High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "493d61a1c08d2ec667a9092fad0d1cd25ca9f172", "is_verified": false, - "line_number": 426 + "line_number": 262 + }, + { + "type": "Base64 High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "d6b165a1dbe9dc56d19cb010eb33fe2de5843c02", + "is_verified": false, + "line_number": 325 + }, + { + "type": "Base64 High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "7932a58b21e23eae5a963f306f505f5c405ae1aa", + "is_verified": false, + "line_number": 388 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/e2e/fixtures/api-responses.har", + "hashed_secret": "2cd18530eaae72e88d0b794e47b31662acf1ced3", + "is_verified": false, + "line_number": 1773 } ], - "justfile": [ + "dashboard/src/lib/api/audit.ts": [ { - "type": "Basic Auth Credentials", - "filename": "justfile", - "hashed_secret": "2e7112b932e45681d3d8a00c0ab49fb5eba3245e", + "type": "Secret Keyword", + "filename": "dashboard/src/lib/api/audit.ts", + "hashed_secret": "f18626679d250b75841bbc5c0a1c3f83dc0e8856", "is_verified": false, - "line_number": 163 + "line_number": 106 + }, + { + "type": "Secret Keyword", + "filename": "dashboard/src/lib/api/audit.ts", + "hashed_secret": "ab5f5a2ae5fc9b74a0a925ab4f74080921a29b43", + "is_verified": false, + "line_number": 107 + }, + { + "type": "Secret Keyword", + "filename": "dashboard/src/lib/api/audit.ts", + "hashed_secret": "665b1e3851eefefa3fb878654292f16597d25155", + "is_verified": false, + "line_number": 119 } ], - "tests/fixtures/data_sources.py": [ + "dashboard/tsconfig.tsbuildinfo": [ { - "type": "Secret Keyword", - "filename": "tests/fixtures/data_sources.py", - "hashed_secret": "a5aa8c108715d08777130833538183a80e6aad92", + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "008786d51269e6770a3b084951a98a8704d22181", "is_verified": false, - "line_number": 89 + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "00e0d2df705fe352b10583833e4b3616c9ad337a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "01111510342c9990efad901b1219acfd7799f39a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "012dec6d159115ce215c83693b156491cf372ddc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0130bc2eacd9ab85eebdfef0a188e399ca29cb88", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "01a5dd1cdf7578d466e679ccb2552336d5362a4a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "02989ac76a74e47b5cc6f084719fb31715b98243", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "031175551baee74ab00018c4cddc6208ab252537", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0334190c7f27ec188d2af2085033db84d2dd4c9e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "035616abefa81985df899f11d5da64e1241d4195", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "03ec2ace19f80bf0cbe8bb9a5534a6fdcdb7c27e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "05eb0c09413f37519527954deda49f2b5541ed11", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "05fec4d3248fdc835778c6327df38df37f9e035d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "062677dc861d9bac26173a2dbb638ddc85b4e6b6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "068905abbf11bc9c6c5bdf7765b863a3eb7df7dd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0801593818393058af2f41ec09bc878ef0dd8e53", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "081092d963a8429c6ff388b812c93d90c008164e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "082cf8ab4d1ef92ce62881be42da8e9663874d00", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "08330fce65f16fb9269c3ea85cad91c471cef539", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0888ee9b5ffeb347b8d8343a5f61133184e191b3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "089ffb25b39b924475333d0af21a0004d914fd3b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0a4e65b6873215ad1c18c466725433bc7053e0ae", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0bdd4f1cfc267a0d3ce89944de66156b657bf592", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0be16c83f218758c0d8ce801b9fbd42bf79fb660", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0be1cf562133984f177b120b1b97454549d7166a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0c0967230d409b43917829f23d0bcde8bff3e001", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0c30508069468ae9349c187eb766fad5d18875ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0c34a65d26e8e47c173b466fb470e2b01985c5cd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0c6ecf445fe47f1516da2decd3d1aa4e0b6b63c0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0c732b91d70f72300279a6cef6a96bf6cd44f64c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0d161b7e852eec738eaf368a721ef1a6efaa5575", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0d1f882ab50ceb114fcd2c945ab0fa549d1066f3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0e0ee676ba803f3f61dc24c961e8c940db44fd07", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0e400236d3d6749cafe7fa794df6931b59fe2d17", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0e8e8d61db732d59b5b0dafbb600943f4eec0c71", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0ebfe684b6b6eed17d99da1cd60f909974ac5b59", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0ed96c492f03f8df3698f60978f19b31fbe979f7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0f10c535e99824c201622e5b1c77035f0951fde3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "0f537dd2d1efa62ac03d98c7ccefecc524d0cae4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1046fa55ffcb203be9ffed674bd226a196f70c19", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "106ec9de6d3f7984ffca38a7cdbcf009e9830bf3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1073f645c57ebc20e6bf5cc62db8bc3513a05a60", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "10b835a350129d3cc7ddf2db8512b582eb45916d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "10e9a79456cbf89cfe2dab69a6c59d9f74fe4b4e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "10ecb5307658d54646acec79fcee4700810f044f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "10fb7bcf30d8a9fe2f6eddda271a4815a520f32a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "11b642549357c693576c29a2fc721654c7bbb4e7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "11b94f3ac698c55074fe4f514eb89ec5c411447f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1207eca342e7e1dac5ed04e1b54b7e4f1b2cdbb4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "12ce61fcf1039dc87d12027f87467e1bebc32d29", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "130f2e4a54d4e6f28d88cc2aa9f3ff75ec291aa0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "13d1910d2bbdf36e79f76b36dc44bf1746f9c59f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "14b6993b8203a7959fec3e02533226e167363bfc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "14cd2421400282c7de82e26433cb75d271e84997", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "14e25856fdb75bf38c4a247ecf504e24ba2e70ad", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1503f932fcf7e0fbdb4ace84d79623cad3760b40", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1515d38f62112d0e6a7a1a2c0667d8093ccc31b7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "15236d809d050cfeb8154ba575c71b8870de759d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "154db74aa59bcdfc5d7a0ca9b9178140d33e9863", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "15626028f7d4124eeea4dd04a8ab8aa97b77ce41", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "15850de7bb2c952c00c3f92050a2c227cd504127", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "164d540dddeb5e0bad7bc775f14937b5d4987a8a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1677ae7077a2701ad4623d76b93ac5ac837d61ff", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "16b17681dc1388f744899c9e13065b8a5e8a5b0d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "170724c29b2a23ffc1ff789c29f28e2a780eecb9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "17898335158b1b77f727c5e3ee58164e1ec91a9a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "17a548a953954eda6ecea290b90e073f5f02f5db", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "186c550eb602815828da6a967051a4254319a760", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "18a8622d86e5c71982c99155531898e9cc99865a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "18e0510c235115291ca135d93234a09b6914ea37", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "18ef35ce7c46d293810e92fb00ef1f615e43a6ef", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "18fd191db7036307a0cce4392f56281590a5a948", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "19252ac67d2fcc7b9845853a7f44f48c4e05f9e0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1935eefa553ffec37af16d95c5567f87d03980eb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1938c74bf6dcb6ecb39a3e7d46860c4ecc16b166", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1a22a7eba88a63f62c11e934c994acffee552f0a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1a8247ffb711e55d196fc180115f8eb7952e04ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1a9d6825f687ee03d60d72c4d9db8c2a45e7057b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1b9a9def21bb17f5fd8ecc640172726db3a01655", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1c0450e8593505ea2f82066f59ac72c2f93e977d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1c2d38546389f722171475f81048ffc2d554fbfa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1c32674b6e225de7917e987d6ef1297a00294aa2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1c83baa721fbb8647669647a20af101c79f8397d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1d3cdcd58729fa8af83275b376bea51195f5900d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1d70a543fe8c8fac9c8bdee6edda054ce44b2248", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1d77ef4909e2781ba4bf16ae358321b2a2159ac4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1d8d479c3ca13e389e3843b0fee5519dc4efb56c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1e0574c5a284f112999d777d5f4cbe00146f3ef4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1e1306cc982c8a25e0ea8b38b3854b6ec6e3a8dd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1f36d7acff55a3609524434aa55e1c1b71297603", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1f500f164e5d8b0785a7566443a85a401f79459e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1f5d1b982c24f4b4ed56f87151b81f146b9994b1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1f9afd7f6d37af679d9a333186c7083fd2ea56f2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "1fe8ed9607497902dde684f91074d3a4335522da", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2052728a2cbacd318d3739cf4786c3e004bbb19b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "206137ba0330b5c38ab7408cf6517679e9dfa735", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2065514afba5e936388748f788ed1c56f40ba444", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "20e32e89be377902c50b20eb7bfabbedd8222a51", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "21be5abf1cd575e11c0ad7900b9682a9ee142781", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "21c995b10b4391f0056f13d138c95499d36c200f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "224f5c6730e6306a91f24989a5291d807e2e873d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "227747ea283cfe2fbcc4cde2b8ecda5623810c3c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "227dae9386e156a2dcaf01fc079b1c4b4d2f3046", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "23669375475cd157cbad441af85f33b62583e774", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "239351f2ad56facf5b057bd0f8b227c71831df74", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "23aae19c4bfc1b9362f95d67cfb1e0c1d8ad55ee", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "241a348cf56790b79caee71f86b9bdddf7e6af5f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "244f421f896bdcdd2784dccf4eaf7c8dfd5189b5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "248d7e5e70ae0dcd7db71f1e7115ab827098c8ae", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "24fad32b882e2d51b621750a3bc81eaf4714b82f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "255a4a5eef8a355985d01a183ccecc8123b7e7d9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "25ebf2a954cf20a9f7c251be136639764720d3a5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "25f23d796004652e81c2872d845a5a42dbf777d6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "25f98fe940151afdc27e36f2a1e8fac081d1f8b1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "26302c2955ca12fa562f0b2427275eecf743ee7f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2634d0e32379b41440283e266def8351570623d6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "26cc3f21d635b2090c2cd5990077eb3e11217e06", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "27a5aa4579eeaccdf3cb44e88968386ee8455100", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "28b4e687d30f0ad4a5b47c63cf2f8665908288cf", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "28f853183a709b81b10cc1bc594bfea698311a44", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "299352a88635df69c6cfe4a8d3f85b258f78a9f3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "29db7d92f7778aff88edccaa75d0d3a7493f8510", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "29f8d684618e0323a2ca61b6266e552d9fb5063a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2a183578ab209b24d407114d9765a67e33355473", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2a50d06bc59ff195fe1eb98ff43b40f0c4e830fa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2a846e6bca9c53b2f029b1f5c59930ab2b6dabc4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2ab6326a8501ee7cb605fb0b7573ff78e3225a1b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2b6137e93654336b92c0dcc5e66d6a64e5275ffd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2b671cf14c5e73212cb3327efd6904f2e314a17b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2b76d0af5b96a2605ec3efd1ea19c2129018e9b4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2c1ff45eaa0574ca5bc84e420a7e28a29e6f6ab5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2c7ae6db79fbb1b45ca4a649d1076db053cb9a46", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2cbb10030f40ba9b6098f3e56fb9d8628afe65b0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2cbbc48fd7941eb08f2cdb96c87bf7d64f1b653f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2d5501f8816dc7b6052088424d1e16c0cd076274", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2d71f1dbb70689ca16d70ec686092f19ec09ff49", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2dc1dbad450274b7c783f054707451da97ca8e6c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2dca168ae0800e7a7e5d37188ad09c151b95edc4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2dd9193eb5f8b1e213f686091bffc8657e1ba79c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2df4c8e9f5fd3ad7a084e478ce4a33e2d508b9de", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2e3169f3d7d538a6fbe3ce594b58972d7693fb9c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2e772397384f565f1c644d39639ca578d9e70f55", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2eccd2f6598592f0229ca444fe0661d70b7203fe", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2ef768475126dce371bf67eea529b4c650c96f45", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2f3d025b594c84395cf180be4a227a1816bc174a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "2fabb17c0dd52acc812c29a15f71d2f775df10ec", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "306b623d3c702f6e5ba80fa16efb23b2a81f8faa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "30a66a89e12b2b762d9b77b9cfdd22d727146b5e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "313951e1876b0431b51bfac37be6a2aa2b86cb3b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "318733f67c986e0ee34bf54d5d1e3ee3d2b0b8a0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "319bb2e410f043242d74386f23beeb5c4a256cf9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "31c838c6cd0c1c4f41900dce41b3caae9e3cc6da", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "31ef22bb3f3d0246d31c470dbc3838501eaeb283", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "32114effb588416132a9968f2073a863d01aedae", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "323592715b0bc7790f7fbf5f298206290cba6519", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3263ee4f41df007aba2f85f641e6788cb44c3d94", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "329a71b39deb127b1e78a2d3b27507e07dd5c2a0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "32bfc182f02f833eb64f927d0c7a16f3685c3d64", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "342d67f2341f84514b2a52a50912b53ffee895f4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "34ba85d37be851dc97fef872591138e7ee87b634", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "351c9e5b3c874256deedb068d73747ade7f1bf84", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "35876825fca1406769fdecb6e709d812dca88ccd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "35a288a55954a501171dfc950780bba7e48e9400", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "35b67656dc5c59a8defc53771b013464af6cd63e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "35e0c3a3e75b47709854e3a9643f927d80aff6c1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "35f0bdf28f9c2f850a05f65c115bbc90ff281ee1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "366b1630255cd4be529a9ccc796dee8ab08bb120", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "367e1d2904a0ce03286993e5ef60750705ca0ce4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "369ba60228550db794497001cd24435c818cd9c7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "36c390cadc3419bb29034380184049670d0585e6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "36c5bc182ae7b5acbf046b898f10b4a4a164b3ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "36ec52b61ef4d6e7e80ccb529af1e48fa2856567", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "379ea223450027e119e42547327aad6bd1a57d16", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "37a241fbc74d3713de28a58e26690bd8bfe52925", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "37b4dddf1bc074ef5bfe4969a1b52559fb7f350c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "386cfc7bf7337a44f3343fa6263838399c768920", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "38a3a6b63b69498d48a97f2ec6c73377a5e6bb8f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3908e6db72766c3b5c9e3876dc6ff62945a9ede0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "392004aa43030d09fc2fa98d737c823dfecf3032", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "39267dd820e06a0e570c9707ac5e57e48944b7fb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "39b0d6f36ba46f302c460d6e9e903a68bba0d5c5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "39bcbf8f55181df69fd77bc1ebc60f954a66ddd3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3a2d054b7c87e550db11ebb89c400ad4c7e864ef", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3a6b88d8970139305dd96f71014f24fd5c9428c1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3b0d70dbba295078f6532b7694a0da9f54736a7a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3b8b5c2e9231cf57f37affb48a6ec4fcd0035652", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3bbed97526726967ed51c69fe14833759f0c0427", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3c130015bea047f9e1dadf22fd33bec79e78b3fe", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3ce0b3b16947fc090c24b5c8921a70ccb6606579", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3ce0fab71cdd77a3fc270beed6a4fba2f4bc3def", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3ce1da3dd15013c87f4b3fb891fc83ef149a72d1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3d0155f595210b2f25c3de654d22c53f42c8edb2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3da6812c9ba6be17565a0e165e64de0639f94249", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3dcc1a360c419784dffa5ad767651e6883d53089", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3e92abc0a1736a025642ee8e96ff8652273e0695", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3eb2cd52f86785d8443f4ade7bc50753647df755", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3f8346d487759e37d35cda934661393c52c9252b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3f8999e19c76199ba244277eaff9464ca413c5af", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3f9148d29730ee6e9fa15a387efca359a1aeff8f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "3fffb186d7d69fe4b9155638f30f68364abe61c7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "404326b3658bcab35b8bfc99f0b36be0e98dfda3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4086183ec6ee3d0fbbe9228683ec34b8046d433b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "415dba92b5e970d421f341482a878dd5ea3f2856", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "417b90d84bc584d28b5ed729745f55787ed0003b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "41ffa300a9c42f3bfcdfc933197eeb82d878e9d5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "423c84c3b4eaaf41acc2cc42c5d6d5663d5663dc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "431148625045067d7c9870515be36be1f763a2aa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "441ce9e71794d4731c88a613ebf0af0f0e3aae96", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "44514bd3ae0997d440ed06712ebf99d2082ce6d9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4459f9aed33f32cedb4c1c9cb6a6dc7d592683b0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "44abdf938491dc8effc03de9982196ebef3b0776", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "45e904ac1908040f15878d9ff895b755cde6e5c4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "464ed7771192584fa42f991d7b04bfdd1fa81c01", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "46cb6baaafbb707179ceb4bca386fd5e81d47f07", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "482ee75f0d0690cc32ce4ffb7ab2cc52e36e5497", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "485c808b07f8a7154b97d9e767f22c90106e8dfb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "48975aeb49d934585a3a4d8378e806285da62e90", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "48ad3a567997008a086bcce1933233d341b970ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "48c681f09f023c4c04bec70efebc2a53b05fcecb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "490eaa4144cef5b4b2b76d8d3442a51c523e4772", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "49252d58dc0f4a2468661ea60a507cfc004a01fc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "49376641d8f8b6c4c92717342d2279f771bbe46d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "496211c2c54f55a8d17dfb5a1a216c252f434083", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "496e0ad02b8544f0f7c5a45c3c22b1ede1d3b4e4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4989a3fa1d99fbca0e21a0f858303fbc26ee3166", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "49d0dab757f170233ea2b89129a397e4c83a1b43", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4a0703e6079ebc74b3c065ba4668e43668cec8ba", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4a885ce636b64cf715d29e9dbc809decbf590e3e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4ab72253035072acf46a73f844944217d6139495", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4ad83423ff9894d5c6855388542f3152c86dc93e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4b2f445477448c69f7c0541aa99e4e0374e1ca31", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4b6f9921e142c83498e2b8b5316de7abb29a12be", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4c4c55c5e811a5004bbdb4f13b2f4b07c2ca98ab", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4cb6f787a8c4cbcba89b3e9bbed5b5a0eb351a63", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4cf0431559cb334ba269d76b84e3f11f42c665b0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4cf0f0dab0ecadad7b0d4b50b1cf341ea3950b55", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4d4bcb6a63a688c46d4db3b9cf08807aee07cfec", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4d54b54d56384f1d2cd09e2037133765db9f983b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4d95480c084a249dff097ad2179784c1d9d10490", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4e050c5f0d69745252f1e585ca6e6a1dde2d4a2a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4e0e0985a6e5d22a4fd0d2449567236321fc2ffa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4e57f925c66984213733f0c942ea807d147828b7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4ea5f0f9a84a0a4d4a60537967c94cb1098e72f0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4f4093b957a72f1783b105072d9b029cd440ff1b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4fac1dd5b62c9e2e235ea2eb5ff554adc39a98df", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "4fcaedef9cad9efafe9003954eb407fb1e8d7f67", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "50114fecd96cb307e097e14b6f017ca86297d14c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "50a82fe3bc00de1275947459b25881a6241f380d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "51e8a0c49690506d428d4a0a447fb44c241be593", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "524399ff64170664f9cbfcfe7ae5ce78142c1e93", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "528c8d2e2a52398ef6fe3cb5cc72419aecbad305", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5401d1b1c2fd4becbe3a950cd1da034acd1d05c4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "541e1479c0ad0c83fcef7e04610dcc3db0e60a3a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "543d6a44b98a3bbc82333e8413583591170793fc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5457ecb87cb55affee3c4645d078c995d93f5abc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "55614f00dbc8c9c7383dfa3434d83943278f1c9b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5594bed0d8729257e4ac0f636c02d8e5d96c15ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "55eea634c3bd901d940e1d29e8c4eb532b0528a4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "55f16abf485c8c9a73d48064f100307e9fde3c25", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "560c08e83124711e266aa48e1a010bfeaad3eaa2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "56122b45a43f65a1ab405e69adca1857eb73836d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "565bd30e321a6fc5b164a3ae86fca3a7f24dab7b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "566832357040ecee11813d3c1274535b410455f5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "56d7f21f358d20a143a59053f6e203d17f53b882", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "575967f2d14e001966478833e46dfffab26adda8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "57d7a621bfd993eb5ff65f6ae850130e86cd26e5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "57f4e93069449fa68256e04c8fedcd55142877e9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "585017759ea68ba0d110c46e3b5a5b63a8fc2fcc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "586cbb744926ea1795c1f2ccc2112dca368a6be9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "591d126686ad4fa5ed1149a79e9b8ba06868e693", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "59a99308aea13cf83c9e0255d6d68c6648b492bd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "59e3f43427b4d6200428b1feae83b45b62c94a3d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5a2da23f6d7b269a26c11d5845472202f708091b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5a78fcca3108580f250ae1a05de1028551d96a0d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5b6caaac7a60f69eb3a7fc372e63c0fc6374868d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5ba64519cdbcc13f2d406a597661083eebc63454", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5babd86c3b7e6930bdfe48cc4c85b88d5051655e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5bda57ba376918485fb81a0e4d79d8ed634371a5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5c2369b2887505ef4952f88b5624fe055cfefa69", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5c5c7e63379335febdf8ba62ef71e90a916328ed", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5c7046f2441627d113410d65ee05dade1d72c321", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5d21d84987ae5946459fd366a725eb0281b0cc64", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5d2b82a08d0d7e97c7e521fde4c1834ed57972bb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5d30bdb238ee0e7d801855497f8d762b3d9cb103", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5d86b6c99a6e73a0e3a4b987619a5002f7b92bc0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5da7ad4814ecf1d3a21091ed9e95197cfcc27f45", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5dade386e849f79960bfd09e40e7bcf74e3ee43f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5deb8be46c2677e0022490a9268d1b83abf35181", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5dfb73c1451e7cdc252c07a42b8d9367b2ecb468", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e0b619e5837832071e4581422b2738b349da313", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e1dadfb1532d22084ec6e3834ad65e86ddbc147", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e292b0b7886192220679c129fae9c7a388a0859", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e448817f1fc1b90a199bf7350dc22e9522bd2d6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e564629faa930150fba291e24f668432ecaf4c2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5e91bb97a3d0cd5166b6d85418753ff6ece2f717", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5ef692945d2d36d20da15aadb19a235d500761bc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "5f0e8f919810d6e75b39d125f983286d735ca3d1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6043b749f5bf6e24a68bd5067848e4f01b0ed3ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "605ec8599a16f074652216e1c2518a746635d8d9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6094c74e7692bd83ca06d7571fe5202e59ce1afe", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "60eb3fe77ea606ae008d45f134ae8a7d50f93786", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6101ab0fd828294f40c312f52379c082593cd6eb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "61cf0131e31dfce7c268be0d779c4fde4ac1d79f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "62416b2caed3ca9fc924eb52823f34be0b242eef", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "625332d8a8440c20a45bb42f5cd8ba4c93154d99", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "630c1e1e843b840e480de5d66ebbe5d8d302b19f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "63202e93675527eb2f3963ab3d5ffe19d2cb4751", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6335d35bbfbe3f462a65b15a083713b696f9a071", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "639e5e3328c9e40e62f91362ea22556cbd917093", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "63bc71416c35945e5fa9c08d25412a9d5fb0a2aa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "63e4e1fdc29bfb7d8043d38aa2eeda31ee5de6b8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "647667dbc9835c651129a67f182c26021277bd80", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "647b0810cea519b71627ede4dadce13afa85cf21", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "659098faa2302d93889fd8f4e8b2e274d17997ea", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "66380f157adeddea96d896aacd0816d4d164795e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6677fe3f964964e6436e92c71da7b0988709d0ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "66e6026eefe4faa3640ee2f7400604b1b30230c9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "677c914ff8e1c79fd74eb4c3e4a1d26baabc7c20", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "67c8200708d1ce600894cf933301d791123aab40", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "680b2bd7e3344ac96c1b5a8817a85e5f8bbfcbaa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "68b985a7a2e920e9c1f204ca23827e3e0bef05cf", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6937cb64da65b9090b9d3edf330623d170011e56", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "694e3f431f39f558aaa064dcf5b5ef876b32615e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "69b6446b27973495bd9e2540ab93af1ebada657b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "69fa72d549a72094af308b21bb519b1ea3255409", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6a026024c9bbdc2e4020f6aa11aa55ea2010d3c6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6a46d53d2bd4b9e9f87f31c51527d9a4d1947311", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6aef44a677cdb1ddafc7ac0f89d45f3801f1686e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6afafa443ae80fead6898772effbd6487e4ca7dc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6bf952d9be52dc27dd20cf20c457b35e8a1f4b2a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6c9909a2b6ebbc048daa0590b9c3efec225b46ad", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6c9cd054aa88ab446edff24fc8b8d2e20bb6485b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6cd2b8bb0e7510f7b141f1da0de1b83310d45c00", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6e1957ce64792b64f5c4f54e52d28df37ed83d55", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6eafa49327b1e80f2056c4c9c626063fa9656404", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6f1fd23eb76bf6554bb610e3e4d738e5c5d9f994", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6f5e508e18de2ad3251d2b69076dcb9fdd0e627a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6f7cb4cf548fbdc0ff0830d9e0b3a2e1dc9dbd5c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "6fa347ba35a6f5d36fa54ad5551e968d099d3d73", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "703ea8b4b40754ea0011494bf860fd467cc3282d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "70a28d4537f3443e28c2fff8cae5dfa8e48beb9e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7129d74158796189ce55f51f15cfe1e3522d427f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "71bd7ba9efdccfb06e3c8d2fd4828ee8b83c75d7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "71e632b5164ee401e185d1fc2bc1f90b51ff32c9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "72ef6b7e1aab09787bf1a63af86fa541df117096", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "731fad87dec9341e7c1eb783fca398fb90e59a51", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "73b9c1e63f6ed1649e1d4d6f6cb09c387e1e0713", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "746085bd0ebec5d2d6d434ed5b183ce33de83319", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "74613bf27607d6219c6648c8b76509cb7fca3a83", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "74638cec7dbb4daa3710eb050dde6288a1585771", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "747b4d1e54e4447498d83f92e80e5ad4bb597325", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "74eee1fc4ee1a24c9940a0fcca20090bd84980fb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "75fd6ce990e3746134fab16aff0c928635afb707", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7633df9d5d4646ace50db491eeb5d5a14dc50d47", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "76d54d5c054ef47c5af4a3b0f4e78d2fed63ffd6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "76e163e7d16e72e9f6f8106f37583f88c07f1820", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "77f82c499e88b7cf448ffdf6aff1b31b584507e1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "78cad2132a92c3248ad4fdfc9c68074fb65d4a0b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "797b8bc1e434ce2c8983a32c889e14b9fe3ceffd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "798a0570873c42baf918eae36172f74252566f11", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7a1336e6927c5318ea6f4908a83f4b25a3933fea", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7a18d1c0af5d55da57130614bb2fc2064ec5a596", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7adc78931f277d76f7853acc5b2f242b7eef724e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7b03c4d5858952a214e8b73ec50ac394f15129b6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7b0ad17c4fead322de943003b50e79811e42d41d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7bd97a3fba768a13efd5c1a603a3f9bd7b4e29f2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7be1bb503875a1596a48d031cfd05a1450e70390", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7c9f18afb0fbe74073dffdce613fd5f69909b164", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7cc8690604a8341d089f607532f9b9ad14bb4376", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7cd9d6f6a527812d6a8e74265b95e9ba9ee01b4c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7ce2382a876526f0147b9d0de54360b4dbd8dae8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7d40165ee6657cbac529818630ba2f4733200830", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7d594c64f70c18d4cfdc969410ef57a53993f8ae", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7d9ffb1550c26ffc77d81ab75169c02d593307e0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7e029f815984c9781d97a3cbdf7169727ef0c603", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7ef1dd21a38fcd7ae009d2e746642ae34aa97824", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7ef8c0bb23590a1a46273cfbcc28df32677e2fc2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7efc43f9882c273215e18d95d7528dacdee26530", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7f607b391a3ce0da177d550a98fc3084bf4253a7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7f8455045f3b41c6c9bddc5844bc82c02f06edbf", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7fc6eb6618a3c6747a08208565f284a95705fbb0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7fd5e75d0f853e52c6866dd97b8949e9eadb32e6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "7ff514f854487b2aefd9e7ba2c41cfb4f65c31e3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8026697ce2971b80357fbe2bf158564711d6dd25", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8043ae14830707fa51a7b2f18c5c3ed88d5871fd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "805271c7f889f816a5729fb95b6ecae9d030781b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "80aad77f2da34092802da7d4e0201b335f32b874", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "80e0f8cb9f179214cab5481a0ca3c84676a1d2b7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "80e3b40fe09c62ca6a90a25c488011039bed53c0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "81590c0227a0c62fd1acf033b99787910cbf84f6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "81ab73a12f077d426c1326e17e88e8eb96af266c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8244d7ba4b33ec98afd7f16297a8cad1b2ce5de6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8258b9160f495e2d125fadd087f497ae4caf2138", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "82ba304538986d2a94618f664d726859371c4260", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "83b9f012c791f031e2bda2c59afc33494cfdf796", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "83d4ec96a61cece5c169d7db8aa464861b323330", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "83f7e5cd3b5e4cef1116595c880f25eb16657c1e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8429d7b02908c52cb7ce45a24976f568cc3d88bc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "849555321a351bab3b6518fea713548a5a67d997", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "849e3c627de4070c857b591ad29de7c79c9f6644", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "85e56d281535c09e7f8717c254d0a35b9d326417", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "85f4aefe1cb7196dfc549654519eab95d859b0f0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "861b21fd304f7da4ea44f95332bad2dc8d8a986b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "866a165552e0124673eddff0aaba898f225f6b22", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "86b1423368d749a0d912320f673bae63f7b48372", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "86d7d08ab0841e81bbad9da57d2f01b3739c99ba", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "86e49a1846eb09bfdd5a81112246bfb7addade31", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "86f20cf17c972e0f9680bc18109427405eaca4ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8724e87587992dba7ac1e943988b847855297a56", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8734f13b780f46cccf92ba16ba9467454ccc8ff4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8814b8dca78fcac8f911a26101a2d57da781ad4c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "881b9fab7a4d745538232e151604b59e1d6d0807", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8871811d4ca669de0e1341e08139d0a71994e37e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "88d08fc8b6ecbe48ad344ffb0fb3122de40e164e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "893c551c545dcc379d83a38c9b803378bd470a7e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "89762c6dcce6809362c9fcf92caf6d80ecc171b2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8981f07296bcabbe227608188bae4629255f70cb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8a304163f96e49f75478ef5cb44327db9e78d620", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8a35a8c4f5092531921b3ad7a5f2ab8cf1d2eb01", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8a76e45701a1aee9cf4825890b059cc681765184", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8add3d86794742477aefc0283f8775b6b4265b53", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8bf14d41594e65cf1fc3b7572730ee309d17f1ec", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8bf738e530f4efe0aed7619f1a390c69417ddca9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8c5b57109d2715a44726117b23971b0bef582561", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8c676ea1616abd3915232d527bc0f60c4255a80a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8cdff4cc8e48bb186c6efb5d22bf69fc3eafc4a9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8d664319c27f9d8bd6195f7e07925522be39ae3d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8da23adbf66d147df0cdb4a7e1f9362b43e513f1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8db73634d226108cf5a0b27d370b123100ecf301", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8e13f5ada948719d7a44e1e941a38cbdb6b3b58d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8e7230e2945df6274e6845979381c15176937ee7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8e92661431a8b0b23f184719119fe54b3f6f2680", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8eef53f39dee6b7fc42e270c5a95418f85485972", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8f46926fcf1645dee872fdf6199a4d9292001d18", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8f4957d6ad9ad3db0961b90f5689fe9e34d9a77c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8f5eaff483e69be92d792f3c3f1e3756c33b8177", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8f8ab8e62904cef07118ca9705adc777722bb800", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8ff2fb78e9eeecec6f51ffa45912bcdbb14da4aa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "8ff5480c56f80c6877ad1560eddc96c8a73bbcd4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "906905e565e1f1597e35f3b9f2332ac4160b95c3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9075f76d042d395af83d4db44f67a7abe970be23", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "910d26082f66da805d6541fa3bbb8d72fe69fffa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "91a419e8720fcd44b5344742290b3fceca015ccc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "92083b25f04b784be4d53d0280b862f3c1d8dad3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "930f55341e6c8344155a61014dc21af903378b06", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "937178cb108f01b25bada79e5516ac7e253e1534", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "939b85c40ead8bf47058cbb912c26c053be299c1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "944ab6a043378d56122dd57c3b4aabf74d2dfca0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "94e29d19b69f09d61d44e352e8197d7371d813fb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "95221a5fe396b1d0bbcb5e66673d9150308c29c1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "953f13dd1abf9bdaed944d08efd015f0409daa18", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "95b2da96bed2990807dcffea271a9107f740baf9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "95f2ac1fdff87b970c912ee76c865b4c53b393df", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "95f91e9cabcb2c9e2f0590d6f6a74bb1ce19b402", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "968922988fcc95201bb0081b53f70d19b9f9c05f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "969c47ddbd254bd76d48b683ff6a56bd646f5e0d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "96c347f4513e196f075c4fbde082a02f4cfe8a49", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "96c6b5af9e2dfe6f840b6d726bf1d196c5bcd7c0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "96e26b5e762e73028b18082fd9acb335f7b331ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9720ff7be9c9882a2e244099ca48fcac6c096504", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "97b495d16fa86a0d4a80b8f11a5f9a6ebf86022c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "97b88f7aa04155c18f0516798bbf418fe647fafe", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "97de5259949775462e9d868614f403f9db389f55", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "97ee038c15727b8d467c713fa339b0ae3ea2259d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "981d10cb37b108d4fd3ddb5ff26c101df45a3b8d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "982fc78817625d0fe1f0dbbb9a8490fc2941ccb7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "984dfd1baa4f5f6f78667a0d7a6621e292ed2d9d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "988bd08a27d5e36ce309b95d8d0f69179364760f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "98c7e1c61f8b929249cfd6fc322e01c8f1eb6861", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "992569961e8c71bf4970bb97dfa628c9ab7e6039", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9925fc8a5fd8592a495a90da7f29d749d6fb40a6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "99287110dad4059d46970573e5e13f804706949d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "99b69cdb09e385ca0dd0ef3d0a71cb0b4ffedf01", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9a277a30f04b2d3c860499ea98bcba90979c54eb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9a29fa852f579c71c73199e04e0c6cfe9e993167", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9ab8513bafdcdc997aa79d6a11353eca65af5831", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9c6176187083d5da0d99aa4291455cf332527772", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9ddd7016fa1cdb6c074716723d9bc0075f5f363b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9e939c134890f5dffc0a098b4d836b3355a58c9b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9eaf08eea1c21c7b8720f750d86770a7f176e79d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9ed1916536feb6f170469b893f35d685b3ebb323", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9edd4ea24337797367ab5571a7c792c618ca3257", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9ede48e425098c4750ebded2f2481bf61dca063b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9f36cbbe7285c718a2de4f98db730df52497d0a7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9f69361da6968f01d47f9df4968159c4fd670ab8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "9fbf44718a75db44ca11929a4b79f4dae848feb4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a017638ecb95154b480501556fadd22d8875a265", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a04c4aae97cbd200c9c8d643c438aae90a460fc5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a08bd192ec00bb9de439009b6838dbb3fa5be66c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a0f311702a5e7f85c744a954206f62d3ab9903cd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a1293bb58d6338a7045d481b09017cec5f06a9c6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a1591762b5485a768450ff225a8f8ee0de0db21a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a15fe4c633998cc146f6bcc9786d3a87687044ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a18bed6850b2a2e6d671eb686ae50f86c3ca3ff3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a1922d8199b8ad982beb42460264a08176cad0cb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a1eb803227a33a36fe0dea8e9d7769bb4b34f132", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a203a1b9c8bc50c077ebbc8395969eb87ed51f9b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a2975d204971cff4c92ca67df49b917c4793687a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a2c4c21c96071b3aa1f085906b78b22eab9fd6e8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a376671bac2842f38c5376ec282129274dc1fd58", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a3b74a1971c59fb144337938fd9077a9f843d537", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a3e8d1d4a64c25d5338344ea0bdd1c5258990ced", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a479c73472ad4bf22a7a467d36778c57e1acd659", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a51479f53d3bb6523f3bf263dabf408d7e8fd476", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a51c2b0032836c5ec70bc2031d87678687272a8c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a60948ed94cfdcd7dd27b642086ffc46ec594342", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a60cb16b77ce9a769fe35b58ff2645a8fa7b94be", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a625afd03d3b95319e5738a4aabd9274873fc0db", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a69f83d1f36687a9f0c114ebb8b82b73d2d76f8d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a7303600f759a14f2924d96fb34faad20c90cc9d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a758eeb00f48c380bb00b32449177fdb316368d1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a78c2ec8c682183fc77f670e784f1233283d505a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a7b7862e76d908b895181ed452e264ab6208fad9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a7c7b1129dfceeb84603f71ec320a3b7dec773c9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a7d2264d701b27d35e84fcde23735530b8044be2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a7e2e02286f207979543393a961ed57cc6bdcf79", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a81f8de03fe46663dfff957fdcddccd69ecbbf26", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a843ab3103980681e5eba0c4870bd2eaf1ae4724", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a8d0e123f360ca6d516f9d7e194cdaf7e553e0fb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a911525300c7125cc4e2191530e8698e3b55d4bc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a97a554257696cdda984f09d865b5bc26d1277b6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "a9c03e9bc2690f7368546124d97475a3167a79cd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "aba1d6b6d2b2eaa49e0946a6320314e0a2cf9f00", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "abadb22652e84636f7aa4ed83c2fa6719bf679c5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "abc19fa8e4a995cd01a7c9a7145d2fa2d0bc5fd6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "abc51f7d4835e33d58de73568e43703ddd1cedf8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "abda02489e740c8719ebb82c2da9c80a04328e06", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "abf9d521aecbadcb097740de2d4b918e13aafdf7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "aca5d2a982160c04fee12fec2ef57047ee3705e4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "acd3fc4a96fd5fd8c16b99844a498fee8c8a5fa0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ad1e3ae56356df7137580ccd1d071a03ff19d72f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ad2285c3516fb2cc445665b9c366e4cc3174e637", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "adbaba8a4ba64b5c29365e32c425ccef8bc61b86", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "add8b48065dd867bd75bdfa46125aa086e74b4a8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ae4b3b921b6a9e31d0339dc4afd06ae12793c14d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b06762f0ad76fd2bf0aa45bbbf92ccca460d0223", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b0789fd2ce1cac548a7ea9d638a9862662966ab9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b0c8bd93b9a2187a3a3952a5b6ea925e1328cc39", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b0c93f715f30846522868231d7a987b78ee3f48a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b0df63cfc2cb4ba8997396608ac9e71426dc4c51", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b204400d64acbc091c83c90ef53b14c48f097bfa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b238024c0e2348969ae57d2ab3b4ddd74582e7d6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b268af1b19da38c60b38450c0b6a41023dbb4668", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b299d107931784be8d38ce4d61eaa848fb76bc5c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b30602fb654c4c966ae30c3adec51419b25b9775", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b4cf3f49d5843d7027234ea2178cae247e177c8a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b4fc5921f5ec863f33f183c6e1026472e5239098", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b5480545b59f687317363df222c5862107e3da7f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b57644cbff1ce9945ec3469cbce429a806cfdbd7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b6421fb13e88e59b18d6c284e8f3947f74b2f6ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b6580168f26d5dc69835a2ef2831041b9ae6bb80", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b6a697aa9eee66299759fcf819ea2634767e7c05", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b835e22b3d061fbd0a5a3238dbd140dd55040447", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b87a1af7d1fa200281f3e864d68c79e6cc58ef44", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b87ec615807ddf9026eabd3cb9c6f997df5a9205", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b9705de83f95c9ce64ad87da980c5179687401ce", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "b9e954571332cb2f11a260e5fb9801e33ad87620", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ba76ae3d953777fdd67436926ba86d7c4e1f66ae", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bb149c8e205778a3623acd27ac2b4f9b047cbb53", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bb4baf304fab793ae45b0fa4c0161dd7aa90daf7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bc1af7ce2cd1273f87afd8a013ce4106a5df1839", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bc5e95286e79594d55524b3488b055c893773aba", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bc660cd62e6c970d5a84a3503fc7ce26b250474b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bc71ce69e95159cd6338402ee4147b1fbbe2fac6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bd31fe6c2e4598ea60b1fd58864879df6b03dc02", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bd5fe0b730c805eb956871fd158198c3cc3c5ea2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bd78ba716204754d8e60105a08b84d369ecb3e45", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bde670579973fb22b022dab0750a8daf5fd183be", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "be0bcfc478dc28e411a4111986a241dfef69ea3f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "be1ba535e5c56e49f784344dc076f17793fa85ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "be361569c19fbe05f78fb08d0f092bf59e968d61", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "be86d1ff7c4bbcc589542ddd26341c795a399499", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bec09f400e7e277988a39f17eda6f297fdff9efe", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "becc6e4060d63f2f08587024f1946680f7df095b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bee240f6dc8d719e2fba9e10d748f59099f9df39", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bef0ab4d7af5e4ab73f763916cde1b23b15639a4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bf7e31c700d9cd0c349940397ede6870132a606e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "bfa81f0969d10297d66f442324c4ab06a60ad09c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c00b6177b7330610f62a2cafa23bff6e442441fb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c00d661426004f2e2fdf68d6ebf975bfe92cacb0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c0185656b44a7912f40a93674e97fd765e3666cb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c1040ba6a634abfd63f72ae32960fecfa2785f04", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c14054da2ad869575516f46fd4696019333c325c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c2f6e863f863c538d9a1ad4d0ae3b4a831e945ef", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c33314ffd58b9c85608600fc63b52c848f293254", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c3a7d3427a0c8a838de606c4dbf079afbba43758", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c3c8e600a1e3763168b423d4d96be12383c22f5f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c414758df5fc0661ed2a13e90fc52bdb07957a09", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c45edbf8ebc487153c806ae86f0e588417843aa1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c4d56a843fb129e7d2c1e26b0800402f5e744943", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c52385a6fcab7894e88645079d89a108095c44bc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c535dc4f4c2f6785553806932b7fbb7bb9143731", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c56893a99954315ddc5e9e849c3feb1f86e6e52a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c5abe2129e791283e1569dba55a8eca10b2b3619", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c5e19d9f18eaffc48619a3287891ecbe0fbef7ca", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c644b953b623d43ff0072cb34b8088c33ad70ea4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c7411f596fd474f3efd1949e99c1465e59ee6ae6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c7d553294b01332b0fd4c9b67c3d7582c7cd1b30", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c8a63dc4102a0db3099a92dedc208f7001ac50af", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c962428eb9291fe10abfd42fbab84aa943fc0a07", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c966cb8c2e59728f256c800c7c2b166342a53992", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "c9acf78fd5b59574621d06ffc348a151810be286", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cb222c8c4bb1ca24ec8e2e5fbaa8f602b87a1b2f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cb337a7e86465e00cd3041b89f90a4f201932dac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cb9be6a943aae66a85fac743579d70a8096c16fc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cbc79f3921e138d0d83780b63ddbc99a0487e557", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cc168c3e9f90c0982a8fb0e1a9c3fc0fee05fe50", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cc6d9060fde17d14bace121d7995ed562cdeaed0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cc920c7b8d3711b3707d4b5b60101c8c2de05dfc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cca77f58330c2f108c332c54bd4fa17894d7a57d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cd0ee117594e4d7c5e133b152338e8ae6f2558f3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cd7a49238d087687ff9ae9c59b6bb5c43b55bf9a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cdc1f5d2962a5fce602949c075a531fc3180ba7b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cddc0abb57a5c3371d778e079564d898e0fe9d58", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ce1e224e84052a2230bf359ba4196770dba765c1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ce897d46d7090e509a0a1be87f4ccaca74e83737", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cf2cba531aba72684cab094d33380eb04e6178a2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cf5b310e89f4e04fa51faa37a3165a0c76bc8ddb", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cf5e01090d6f372bc342f420d666ee240c7ee46d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cfe74c918ec44b5328ed8bf00790a30fbfee78ac", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "cff8efb6416ed08873f40daadbed8464811de0e7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d0265a3a03fd321a82353b08fc6be0490f0896e9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d04f06b9d8a38c07d7ec5038f05ac78d1fc3d51e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d1448b73a6dae41ad0148c682d1556a4bb382f8f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d16f189dd9743ab144f3517c36a9c95eb38510d0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d1a361643294b556e8cc4b779cd390f143988400", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d231474ef501b242b81b25b367455446a32e7c59", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d24f7ede21c8c3e4aa719a1c279725111825dac4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d4033afd5644d8cf11cced236c8fdd226b29527f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d46f8cbdd5face01f34dbf3459e8aa33d0dead08", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d4885f86b925eeebe48d22965296c952be7075fc", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d4889798017fe580c25373c63b9cf135873d1d92", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d4d995422f4bb0a86f1b42b2091d1174c1d4222a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d58beef2fe48d86b7fde2d9755eedebfd4b6c0e8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d5d74109818c6579b190627c6a95cbc58a4b3051", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d656bcfc4d5f17768230fe9a523d92772cf52655", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d677101dd59c9c6a7c12a56dd77f385c30a83fc1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d685f615adb2b01bd9abd8a4de3bcdfa5cec0677", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d6c5c9df56e206cbad0171d822ffc4c9fd6330d9", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d72d23ed9b4aa389e2e2a3ec6f6eaccb405df8f2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d78a071dbc9e4f64be8e705cf276f8927426cff0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d79c79387a25547c8f9054a9a4d22781100c78b3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d87331c7118363a59bef98b10dfb97f653464a0f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d903ea4af768f3f592166ed68db98e751ca71858", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d94585205a228beee6fd986776d6006b72f21219", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d9504b8f27dab4653626e4674a0dfd324d1740c8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d96851a172f86a206e83cfa61945a8d706063946", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d972129f92278b512ed8b104b17ffe86976da4b8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "d97c4bab77931b27c52e56de0fb0562d360e5a0e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "da0df2a3271e6810af965dc72560edaac845863e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "da13620132c93e129b2485475185be4aaefba84a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "daaa29ea8b3ae106d14424734403300f7927b8ef", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "db11af4583690449b028add622762412ab3b7e10", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dc118ce9785a6459a646bbd9c1e86be619a91eaa", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dd10ef912df4e26a1324162e1459f6b71cdebae5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dd439038bf0d04be23aff58a0a2449f7ff584e33", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dda7b062adf6cbb52358c026d9c5dc8f731d942d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ddcbc86f5a1c42154a0ff7e32eca3554fee403de", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "deaf18a0214d592880becca6f810c96017548f58", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dedc3ef553ee88d84d167bd1b4ce64f16365645e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "dee7d16ac5d3ba06505ca7c5a03ccf09cb85ec56", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e0236ca8c5edc4b19d988bbc65b7b5554dab4f83", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e02aa624096b8f21d567742590ae518ca984ff97", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e057eab635ee78cc9dda0bbc74957aa3d1689ba1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e07318265bac1618f77e6f7727b02a365f695c5f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e0a0169d69b234958d8dc9b836958924d247f5ce", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e0a92efa2bb187c3aee0eff5bec9d26845598619", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e13e051978398d405c8d7c04be0ac20571c5039c", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e15bfdc9992fb3f39d50cbbadcca67572b01c339", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e165e31e544769755c4ae140b22385d87cb04d9b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e193e6e193acbf17643f9db0a5a5efa2bf66e93f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e1a3fc76c65f6fae2fdcc6e1e76374677e24bb04", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e1bac5565b2c7b1383c3a54b5f3ed8e9e6ce5917", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e1c1bdaa57853b7b917cfa74c3aa42eaddbe2e19", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e24a8e68525a5a9ebf8857f52a05e3ff1ebfbbb4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e2abe5732f02e93db9c778194356b50f901fc193", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e381abe630bdf8e575b6bf4824e6f10fa0cd93d7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e388917056c942475f8fc66c818b10b28825581f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e430acdedbadc5cc290286af0e68d588c415f941", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e43caa8147b344f15b2d006ad0b40547365adfec", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e47531bab8f35012c1a86ecc7290cb514cff390d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e4cae8f2a8949958258aa86506944a4ffb0be3e4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e65fe41d2abfbe9764b3437493bd5db4c5a8a870", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e730bc8d5fba8275406e24567cd0abce527d509e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e80366d275d375a3e50d681e7faf5f0df9f8a075", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e8075e7e2b6447e0a9be4675db5ebff82e024191", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e88094f56711429fd5abc1c6e7234f6cf8c67748", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e8e1b7d3a31b2c13d45f4b3371e82bb8a7d366a7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e8f922c5f3ca4cd9f0b04ba98894adbed4a199c6", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e90acfb468727532420c26132488c52bd247a5c2", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e9763d2f4c3d0423f9940b9cf5ec423ebc62228e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "e981aee1a3ee105c804827c6bd0b7a88c46d3701", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea256b5681473852027bc180f38d43f9d9d3ad4a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea4f7f4ec904d49ec97ad578f25d773444c553b4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea53126f50cef81d733360a1229f9fdac6bab8d4", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea6b50e8c7b2a4ab9c4c989f5e06d75609dfe77a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea7d4c41eab7df28ee7e909f05655016d441c11d", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ea7e8f8c4c50b389fe866c5df2aa77acac9ef5ed", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "eaa1239c9e76be7c58cd669221a8c81e4fe4692a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "eb39462d8e2e2ce350b182da227ce78988f9adc1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "eb74401483f5c1b2d79f1325cc7ea46436bf25cd", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ebce46f2419628b02b10925b0fb55248f6ae6b3a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ec600efe30ce158a449810ac62ed3c7900382d47", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ec8082ffe6789edd9b504476481ec847bae8cda8", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ed3ac343fc0c14f4d3b4ec687ee2dec871e70a72", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ed5bfc4260e3f45503c277a7730b2817f2cbf481", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "edb358c930ecd7226c065e6cf19acb9c4b73c85b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ee66ec6c830dc7f4a042a1e5d5657d703fe62923", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ee8e5b29cd12587aab33a1b8e30d0df7baa1a391", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "eeb8b769c4ed1cd81b6b4da9a23b15b573001b68", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "eee62837bd14f8970479882a976ab5092a1ea957", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ef2158e8de388178e53f2bfc9b7f211949fdc155", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ef5a12275d225c17d077b0d62cf8e8c909507d25", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "efa188a29d7701d5c87134c6dee7731775aff4b0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "efe9a3603d460d03691de08753722c5d2165ee32", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f0aff2f9c2979e8b938071a69a8f5702c5f0c0a5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f0b936a8bae507cc5d55892b7885009a51002648", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f1d6dcd489b9ceb017270d82e4e9033b86757f6a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f3ce85c52d04d2352c6d777007cdfeddf4c5cab1", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f433b21b01e19f0768ef4e6de846edce063c7773", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f4956130b71095b510c05421cdec5d6508260e30", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f4b37f7ff89e4cff2c034b30a7ade32cb5941b8a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f60f8f83d565da433ce4c8f6a8c23e76d9d1ccc7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f6459f3097e5941e34dbe248357c52ed582712a5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f7a628586ff43a4d8aeb0b72055f0a99b99d0da0", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f7bb8fc2488c9e2edac2e7bbec8d79406737da44", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f7ceb0b80b2730c319bf9f6924447ef593b6be91", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f7ed2906b7e77c02087710460a374b18aa00eff3", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f8355330019453c99d6ca98fbca2312e27dc201f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f957c886ca6633508633223250354917f180c634", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f9c59f6f9dc57450f922b3ce2ee54f86e9b9bdea", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "f9d34575908aa76b599fd64f3392688ececf1203", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fa12329125251338ca79453183602c18c632e76b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fabe5e43649511d199fe9c6646e348c1506a1989", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fb2f68bb0c56022dc54f5e46f38a430fc92bd706", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fb63c784f81376c5f29871cd6c51c7c88dfc9f3e", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fb945c807926ab516f3e6501b82a53333760d669", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fbc4d07d1bb875f22a58f3fe164c159a5425474b", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fdb01bee770da62e62ef237c3f451980512a5e57", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fe11c47ddb92a6185ab1f36d40f4ac1733427a8a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fe3660dedbb9f4910da301b2564da14ba72e632a", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "feaf7c053d02d35f68dd95f3f867dd3ddf5d45c5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "feb8e2efc67b3f662c0001e17f78aae7a9522083", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fec97c4cd3d8511b95b9c0361e7cbb74c1547823", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "fef78b73a58154ca75a06043798a7df847f7bab7", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ff1ead9be2c9b775f4536f2ca475e4a5af010251", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ff4968dacedadc630fd75fae2725fca769c5246f", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ff5ddd81d0261b33bd7414716ef6e2e6aceb5b55", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ff9968b9c9430851ddebfce4256ec252ad9b6aa5", + "is_verified": false, + "line_number": 1 + }, + { + "type": "Hex High Entropy String", + "filename": "dashboard/tsconfig.tsbuildinfo", + "hashed_secret": "ffede682b8747d0c10811565e3ade1b6654f569c", + "is_verified": false, + "line_number": 1 + } + ], + "demo/docker-compose.demo.yml": [ + { + "type": "Secret Keyword", + "filename": "demo/docker-compose.demo.yml", + "hashed_secret": "c4e4e4239f4120bfc6964d9bb2e7cf117ee98a29", + "is_verified": false, + "line_number": 9 + }, + { + "type": "Basic Auth Credentials", + "filename": "demo/docker-compose.demo.yml", + "hashed_secret": "c4e4e4239f4120bfc6964d9bb2e7cf117ee98a29", + "is_verified": false, + "line_number": 27 + } + ], + "docs/prompts/demo_prompt_2.md": [ + { + "type": "Basic Auth Credentials", + "filename": "docs/prompts/demo_prompt_2.md", + "hashed_secret": "c4e4e4239f4120bfc6964d9bb2e7cf117ee98a29", + "is_verified": false, + "line_number": 426 + } + ], + "justfile": [ + { + "type": "Basic Auth Credentials", + "filename": "justfile", + "hashed_secret": "c4e4e4239f4120bfc6964d9bb2e7cf117ee98a29", + "is_verified": false, + "line_number": 163 + } + ], + "tests/fixtures/data_sources.py": [ + { + "type": "Secret Keyword", + "filename": "tests/fixtures/data_sources.py", + "hashed_secret": "a5aa8c108715d08777130833538183a80e6aad92", + "is_verified": false, + "line_number": 89 + } + ], + "tests/unit/adapters/datasource/test_postgres.py": [ + { + "type": "Secret Keyword", + "filename": "tests/unit/adapters/datasource/test_postgres.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 111 + }, + { + "type": "Secret Keyword", + "filename": "tests/unit/adapters/datasource/test_postgres.py", + "hashed_secret": "f2b14f68eb995facb3a1c35287b778d5bd785511", + "is_verified": false, + "line_number": 151 } ], "tests/unit/adapters/llm/test_client.py": [ @@ -326,5 +5674,5 @@ } ] }, - "generated_at": "2026-01-03T19:49:38Z" + "generated_at": "2026-01-04T11:34:23Z" } diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..f2b6730c3 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,127 @@ +# CLAUDE.md + +DO NOT worry about legacy code or keeping backwards compatibility +We are pre-launch +The main focus should be on innovating forward, without regard for old logic +As long as you are making code better, that is fine + +## Pre-commit Guidelines + +To avoid pre-commit failures, follow these patterns: + +**Ruff:** +- All public methods need docstrings (D102) - add `"""Brief description."""` +- All `__init__` methods need docstrings (D107) - add `"""Initialize the class."""` +- Lines must be <= 100 characters (E501) - break long strings across lines +- Use `isinstance(x, A | B)` instead of `isinstance(x, (A, B))` (UP038) +- In except blocks, use `raise ... from e` or `raise ... from None` (B904) + +**Mypy:** +- Avoid returning `Any` - use explicit type annotations: `result: str = func()` then `return result` +- For untyped external library calls, add `# type: ignore[no-untyped-call]` +- Use `dict[str, Any]` for mixed-type dictionaries +- Logger methods don't accept kwargs - use f-strings: `logger.info(f"msg: {var}")` + +## Project Overview + +Dataing is an AI-powered autonomous data quality investigation platform. It automatically detects and diagnoses data anomalies by: +1. Gathering context (schema, lineage) +2. Generating hypotheses using LLM +3. Testing hypotheses via SQL queries in parallel +4. Synthesizing findings into root cause analysis + +## Development Commands + +```bash +# Setup (install all dependencies) +just setup + +# Run full demo (backend + frontend + PostgreSQL + seed data) +just demo +# Demo API key: dd_demo_12345 +# Frontend: http://localhost:3000, Backend: http://localhost:8000 + +# Development +just dev # Run backend + frontend +just dev-backend # Backend only (FastAPI on port 8000) +just dev-frontend # Frontend only (Vite on port 3000) + +# Testing +just test # Run all tests +just test-backend # Backend only +just test-frontend # Frontend only + +# Single test file +cd backend && uv run pytest tests/unit/core/test_orchestrator.py -v + +# Single test function +cd backend && uv run pytest tests/unit/core/test_orchestrator.py::test_name -v + +# Linting & Formatting +just lint # Run ruff + mypy (backend) + eslint (frontend) +just format # Format code +just typecheck # Type checking only + +# Generate OpenAPI client for frontend +just generate-client +``` + +## Architecture + +### Hexagonal Architecture (Ports & Adapters) + +The backend follows hexagonal architecture where the core domain depends only on protocol interfaces, never on concrete implementations. + +**Core Domain** (`backend/src/dataing/core/`): +- `orchestrator.py` - Investigation workflow: Context -> Hypothesize -> Parallel Investigation -> Synthesis +- `interfaces.py` - Protocol definitions (DatabaseAdapter, LLMClient, ContextEngine) +- `domain_types.py` - Core domain types (AnomalyAlert, Hypothesis, Evidence, Finding) +- `state.py` - Event-sourced investigation state + +**Adapters** (`backend/src/dataing/adapters/`): +- `datasource/` - Unified data source adapters (SQL, Document, API, Filesystem) + - All adapters inherit from `BaseAdapter` and implement connection, schema discovery, and queries + - Supported: PostgreSQL, MySQL, Trino, Snowflake, BigQuery, Redshift, DuckDB, MongoDB, DynamoDB, Cassandra, S3, GCS, Salesforce, HubSpot, Stripe +- `context/` - Context gathering (schema, lineage, anomaly confirmation, correlations) +- `llm/` - LLM client (Anthropic Claude) +- `db/` - Application database (PostgreSQL for app state) +- `notifications/` - Slack, email, webhook notifications + +**Entrypoints** (`backend/src/dataing/entrypoints/`): +- `api/` - FastAPI application with routes, middleware (auth, rate limiting, audit) +- `mcp/` - Model Context Protocol server for IDE integration + +### Investigation Flow + +1. **Context Engine** gathers schema (required, fail-fast) and lineage (optional) +2. **LLM** generates hypotheses based on alert and context +3. **Orchestrator** investigates hypotheses in parallel with retry/reflexion loops +4. **Circuit Breaker** stops runaway investigations (query limits, stall detection) +5. **LLM** synthesizes evidence into root cause finding + +### Frontend + +React + TypeScript + Vite + TailwindCSS + shadcn/ui components. + +Key paths: +- `frontend/src/features/` - Feature-based organization (dashboard, investigations, datasources, settings) +- `frontend/src/components/ui/` - Reusable UI components (shadcn/ui) +- `frontend/src/lib/api/` - API client (generated via orval from OpenAPI) +- `frontend/src/lib/auth/` - Authentication context + +## Key Conventions + +- **Python**: Google docstring convention, strict mypy typing, ruff for linting +- **Frontend**: TypeScript strict mode, ESLint, Prettier +- **Tests**: pytest-asyncio with `asyncio_mode = "auto"` +- **Multi-tenancy**: All operations scoped to tenant via API key authentication + +## Demo Fixtures + +Pre-baked e-commerce data with anomalies in `demo/fixtures/`: +- `null_spike` - NULL values in user_id (mobile app bug) +- `volume_drop` - Missing EU events (CDN misconfiguration) +- `schema_drift` - Price stored as string +- `duplicates`, `late_arriving`, `orphaned_records` + +Generate: `cd demo && uv run python generate.py` diff --git a/README.md b/README.md index 7e01139b5..4ea893238 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DataDr +# Dataing Autonomous Data Quality Investigation - an AI-powered system that automatically detects and diagnoses data anomalies. diff --git a/backend/migrations/001_initial.sql b/backend/migrations/001_initial.sql index 57453064f..93a2af5b6 100644 --- a/backend/migrations/001_initial.sql +++ b/backend/migrations/001_initial.sql @@ -1,4 +1,4 @@ --- DataDr v2 Initial Schema Migration +-- Dataing v2 Initial Schema Migration -- This migration creates all tables for the application database -- Tenants diff --git a/backend/src/dataing/adapters/__init__.py b/backend/src/dataing/adapters/__init__.py index c83cda508..186b6cb84 100644 --- a/backend/src/dataing/adapters/__init__.py +++ b/backend/src/dataing/adapters/__init__.py @@ -4,24 +4,17 @@ Protocol interfaces defined in the core module. Adapters are organized by type: -- db/: Database adapters (Postgres, Trino, Mock) +- datasource/: Data source adapters (PostgreSQL, DuckDB, MongoDB, etc.) - llm/: LLM client adapters (Anthropic) - context/: Context gathering adapters """ from .context.engine import DefaultContextEngine from .context.lineage import LineageContext, OpenLineageClient -from .db.mock import MockDatabaseAdapter -from .db.postgres import PostgresAdapter -from .db.trino import TrinoAdapter from .llm.client import AnthropicClient from .llm.prompt_manager import PromptManager __all__ = [ - # Database adapters - "PostgresAdapter", - "TrinoAdapter", - "MockDatabaseAdapter", # LLM adapters "AnthropicClient", "PromptManager", diff --git a/backend/src/dataing/adapters/context/__init__.py b/backend/src/dataing/adapters/context/__init__.py index 251f3d281..24c6bff90 100644 --- a/backend/src/dataing/adapters/context/__init__.py +++ b/backend/src/dataing/adapters/context/__init__.py @@ -1,18 +1,19 @@ """Context gathering adapters. This package provides modular context gathering for investigations: -- DatabaseContext: Resolves tenant data source adapters - SchemaContextBuilder: Builds and formats schema context - QueryContext: Executes queries and formats results - AnomalyContext: Confirms anomalies in data - CorrelationContext: Finds cross-table patterns - ContextEngine: Thin coordinator for all modules + +Note: For resolving tenant data source adapters, use AdapterRegistry +from dataing.adapters.datasource instead of the old DatabaseContext. """ from .anomaly_context import AnomalyConfirmation, AnomalyContext, ColumnProfile from .correlation_context import Correlation, CorrelationContext, TimeSeriesPattern -from .database_context import DatabaseContext -from .engine import ContextEngine, DefaultContextEngine, EnrichedContext +from .engine import ContextEngine, DefaultContextEngine, EnrichedContext, InvestigationContext from .lineage import OpenLineageClient from .query_context import QueryContext, QueryExecutionError from .schema_context import SchemaContextBuilder @@ -22,8 +23,7 @@ "ContextEngine", "DefaultContextEngine", "EnrichedContext", - # Database resolution - "DatabaseContext", + "InvestigationContext", # Schema "SchemaContextBuilder", # Query execution diff --git a/backend/src/dataing/adapters/context/correlation_context.py b/backend/src/dataing/adapters/context/correlation_context.py index 048fed7a2..db308480c 100644 --- a/backend/src/dataing/adapters/context/correlation_context.py +++ b/backend/src/dataing/adapters/context/correlation_context.py @@ -12,9 +12,11 @@ import structlog +from dataing.adapters.datasource.types import SchemaResponse, Table + if TYPE_CHECKING: - from dataing.core.domain_types import AnomalyAlert, SchemaContext - from dataing.core.interfaces import DatabaseAdapter + from dataing.adapters.datasource.base import BaseAdapter + from dataing.core.domain_types import AnomalyAlert logger = structlog.get_logger() @@ -84,16 +86,16 @@ def __init__(self, lookback_days: int = 7) -> None: async def find_correlations( self, - adapter: DatabaseAdapter, + adapter: BaseAdapter, anomaly: AnomalyAlert, - schema: SchemaContext, + schema: SchemaResponse, ) -> list[Correlation]: """Find correlations between the anomaly and related tables. Args: - adapter: Connected database adapter. + adapter: Connected data source adapter. anomaly: The anomaly to investigate. - schema: Schema context with table information. + schema: SchemaResponse with table information. Returns: List of detected correlations. @@ -106,8 +108,8 @@ async def find_correlations( correlations: list[Correlation] = [] - # Get the target table schema - target_table = schema.get_table(anomaly.dataset_id) + # Get the target table from schema + target_table = self._get_table(schema, anomaly.dataset_id) if not target_table: logger.warning("target_table_not_found", table=anomaly.dataset_id) return correlations @@ -138,7 +140,7 @@ async def find_correlations( async def analyze_time_series( self, - adapter: DatabaseAdapter, + adapter: BaseAdapter, table_name: str, column_name: str, center_date: str, @@ -205,9 +207,9 @@ async def analyze_time_series( async def find_upstream_anomalies( self, - adapter: DatabaseAdapter, + adapter: BaseAdapter, anomaly: AnomalyAlert, - schema: SchemaContext, + schema: SchemaResponse, ) -> list[dict[str, Any]]: """Find anomalies in upstream/related tables. @@ -252,32 +254,51 @@ async def find_upstream_anomalies( return upstream_anomalies + def _get_all_tables(self, schema: SchemaResponse) -> list[Table]: + """Extract all tables from the nested schema structure.""" + tables = [] + for catalog in schema.catalogs: + for db_schema in catalog.schemas: + tables.extend(db_schema.tables) + return tables + + def _get_table(self, schema: SchemaResponse, table_name: str) -> Table | None: + """Get a table by name from the schema.""" + table_name_lower = table_name.lower() + for table in self._get_all_tables(schema): + if ( + table.native_path.lower() == table_name_lower + or table.name.lower() == table_name_lower + ): + return table + return None + def _find_related_tables( self, - schema: SchemaContext, + schema: SchemaResponse, target_table: str, ) -> list[dict[str, str]]: """Find tables related to the target table. Args: - schema: Schema context. + schema: SchemaResponse. target_table: The target table name. Returns: List of related table info with join columns. """ - target = schema.get_table(target_table) + target = self._get_table(schema, target_table) if not target: return [] - target_cols = set(target.columns) + target_cols = {col.name for col in target.columns} related = [] - for table in schema.tables: - if table.table_name == target_table: + for table in self._get_all_tables(schema): + if table.name == target.name: continue - table_cols = set(table.columns) + table_cols = {col.name for col in table.columns} shared = target_cols & table_cols # Look for ID columns that could be join keys @@ -285,7 +306,7 @@ def _find_related_tables( if col.endswith("_id") or col == "id": related.append( { - "table": table.table_name, + "table": table.native_path, "join_column": col, } ) @@ -295,7 +316,7 @@ def _find_related_tables( async def _analyze_table_correlation( self, - adapter: DatabaseAdapter, + adapter: BaseAdapter, anomaly: AnomalyAlert, source_table: str, related_table: str, diff --git a/backend/src/dataing/adapters/context/database_context.py b/backend/src/dataing/adapters/context/database_context.py deleted file mode 100644 index 28b263345..000000000 --- a/backend/src/dataing/adapters/context/database_context.py +++ /dev/null @@ -1,187 +0,0 @@ -"""Database Context - Resolves tenant data source adapters. - -This module handles the resolution of tenant-specific database adapters, -enabling investigations to query the actual data source (DuckDB, Postgres, etc.) -rather than just the application metadata database. -""" - -from __future__ import annotations - -import json -import os -from typing import TYPE_CHECKING, Any -from uuid import UUID - -import structlog -from cryptography.fernet import Fernet - -from dataing.adapters.db.duckdb import DuckDBAdapter -from dataing.adapters.db.postgres import PostgresAdapter - -if TYPE_CHECKING: - from dataing.adapters.db.app_db import AppDatabase - from dataing.core.interfaces import DatabaseAdapter - -logger = structlog.get_logger() - - -class DatabaseContext: - """Resolves and caches tenant data source adapters. - - This class is responsible for: - 1. Looking up data source configuration from the app database - 2. Decrypting connection credentials - 3. Creating the appropriate adapter (DuckDB, Postgres, etc.) - 4. Caching adapters for reuse within a request - - Attributes: - app_db: The application database for looking up data sources. - """ - - def __init__(self, app_db: AppDatabase) -> None: - """Initialize the database context. - - Args: - app_db: Application database for data source lookups. - """ - self.app_db = app_db - self._adapters: dict[str, DatabaseAdapter] = {} - self._encryption_key = os.getenv("ENCRYPTION_KEY") - - async def get_adapter( - self, - tenant_id: UUID, - data_source_id: UUID, - ) -> DatabaseAdapter: - """Get or create a database adapter for a tenant's data source. - - Args: - tenant_id: The tenant's UUID. - data_source_id: The data source UUID. - - Returns: - A connected DatabaseAdapter for the data source. - - Raises: - ValueError: If data source not found or type not supported. - RuntimeError: If decryption fails. - """ - cache_key = f"{tenant_id}:{data_source_id}" - - if cache_key in self._adapters: - logger.debug("adapter_cache_hit", cache_key=cache_key) - return self._adapters[cache_key] - - logger.info( - "resolving_data_source", - tenant_id=str(tenant_id), - data_source_id=str(data_source_id), - ) - - # Look up data source from app database - ds = await self.app_db.get_data_source(data_source_id, tenant_id) - if not ds: - raise ValueError(f"Data source {data_source_id} not found for tenant {tenant_id}") - - # Create and connect the adapter - adapter = await self._create_adapter(ds) - await adapter.connect() - - # Cache for reuse - self._adapters[cache_key] = adapter - logger.info("adapter_created", ds_type=ds["type"], ds_name=ds.get("name")) - - return adapter - - async def get_default_adapter(self, tenant_id: UUID) -> DatabaseAdapter: - """Get the default data source adapter for a tenant. - - Args: - tenant_id: The tenant's UUID. - - Returns: - A connected DatabaseAdapter for the tenant's default data source. - - Raises: - ValueError: If no data sources found for tenant. - """ - # Get tenant's data sources and use the first active one - data_sources = await self.app_db.list_data_sources(tenant_id) - active_sources = [ds for ds in data_sources if ds.get("is_active", True)] - - if not active_sources: - raise ValueError(f"No active data sources found for tenant {tenant_id}") - - ds = active_sources[0] - ds_id = ds["id"] if isinstance(ds["id"], UUID) else UUID(str(ds["id"])) - return await self.get_adapter(tenant_id, ds_id) - - async def _create_adapter(self, ds: dict[str, Any]) -> DatabaseAdapter: - """Create a database adapter from data source config. - - Args: - ds: Data source record from app database. - - Returns: - Unconnected DatabaseAdapter instance. - - Raises: - ValueError: If data source type not supported. - RuntimeError: If decryption fails. - """ - ds_type = ds["type"] - config = self._decrypt_config(ds["connection_config_encrypted"]) - - if ds_type == "duckdb": - return DuckDBAdapter( - path=config["path"], - read_only=config.get("read_only", True), - ) - elif ds_type == "postgres": - return PostgresAdapter( - host=config["host"], - port=config.get("port", 5432), - database=config["database"], - user=config["user"], - password=config["password"], - schema=config.get("schema", "public"), - ) - else: - raise ValueError(f"Unsupported data source type: {ds_type}") - - def _decrypt_config(self, encrypted_config: str) -> dict[str, Any]: - """Decrypt connection configuration. - - Args: - encrypted_config: Fernet-encrypted JSON config string. - - Returns: - Decrypted configuration dictionary. - - Raises: - RuntimeError: If decryption fails or no encryption key. - """ - if not self._encryption_key: - raise RuntimeError("ENCRYPTION_KEY not set") - - try: - f = Fernet(self._encryption_key.encode()) - decrypted = f.decrypt(encrypted_config.encode()).decode() - result: dict[str, Any] = json.loads(decrypted) - return result - except Exception as e: - raise RuntimeError(f"Failed to decrypt connection config: {e}") from e - - async def close_all(self) -> None: - """Close all cached adapters. - - Should be called during application shutdown. - """ - for cache_key, adapter in self._adapters.items(): - try: - await adapter.close() - logger.debug("adapter_closed", cache_key=cache_key) - except Exception as e: - logger.warning("adapter_close_failed", cache_key=cache_key, error=str(e)) - - self._adapters.clear() diff --git a/backend/src/dataing/adapters/context/engine.py b/backend/src/dataing/adapters/context/engine.py index 4cb3af4d0..499ccf353 100644 --- a/backend/src/dataing/adapters/context/engine.py +++ b/backend/src/dataing/adapters/context/engine.py @@ -3,6 +3,8 @@ This module orchestrates the various context modules to gather all information needed for an investigation. It's a thin coordinator that delegates to specialized modules. + +Uses the unified SchemaResponse from the datasource layer. """ from __future__ import annotations @@ -12,7 +14,7 @@ import structlog -from dataing.core.domain_types import InvestigationContext +from dataing.adapters.datasource.types import SchemaResponse from dataing.core.exceptions import SchemaDiscoveryError from .anomaly_context import AnomalyConfirmation, AnomalyContext @@ -20,14 +22,27 @@ from .schema_context import SchemaContextBuilder if TYPE_CHECKING: - from dataing.core.domain_types import AnomalyAlert - from dataing.core.interfaces import DatabaseAdapter + from dataing.adapters.datasource.base import BaseAdapter + from dataing.core.domain_types import AnomalyAlert, LineageContext from .lineage import OpenLineageClient logger = structlog.get_logger() +@dataclass +class InvestigationContext: + """Context gathered for an investigation. + + Attributes: + schema: Unified schema from the data source. + lineage: Optional lineage context. + """ + + schema: SchemaResponse + lineage: LineageContext | None = None + + @dataclass class EnrichedContext: """Extended context with anomaly confirmation and correlations. @@ -54,9 +69,6 @@ class ContextEngine: - SchemaContextBuilder: Schema discovery and formatting - AnomalyContext: Anomaly confirmation - CorrelationContext: Cross-table pattern detection - - It maintains backward compatibility with the existing - DefaultContextEngine interface while adding new capabilities. """ def __init__( @@ -79,19 +91,22 @@ def __init__( self.correlation_ctx = correlation_ctx or CorrelationContext() self.lineage_client = lineage_client + def _count_tables(self, schema: SchemaResponse) -> int: + """Count total tables in a schema response.""" + return sum( + len(db_schema.tables) for catalog in schema.catalogs for db_schema in catalog.schemas + ) + async def gather( self, alert: AnomalyAlert, - adapter: DatabaseAdapter, + adapter: BaseAdapter, ) -> InvestigationContext: """Gather schema and lineage context. - This method maintains backward compatibility with the - existing DefaultContextEngine.gather() interface. - Args: alert: The anomaly alert being investigated. - adapter: Connected database adapter. + adapter: Connected data source adapter. Returns: InvestigationContext with schema and optional lineage. @@ -109,7 +124,8 @@ async def gather( log.error("schema_discovery_failed", error=str(e)) raise SchemaDiscoveryError(f"Failed to discover schema: {e}") from e - if not schema.tables: + table_count = self._count_tables(schema) + if table_count == 0: log.error("no_tables_discovered") raise SchemaDiscoveryError( "No tables discovered. " @@ -117,7 +133,7 @@ async def gather( "Investigation cannot proceed without schema." ) - log.info("schema_discovered", tables_count=len(schema.tables)) + log.info("schema_discovered", tables_count=table_count) # 2. Lineage Discovery (OPTIONAL) lineage = None @@ -138,7 +154,7 @@ async def gather( async def gather_enriched( self, alert: AnomalyAlert, - adapter: DatabaseAdapter, + adapter: BaseAdapter, ) -> EnrichedContext: """Gather enriched context with anomaly confirmation. @@ -147,7 +163,7 @@ async def gather_enriched( Args: alert: The anomaly alert being investigated. - adapter: Connected database adapter. + adapter: Connected data source adapter. Returns: EnrichedContext with all available context. diff --git a/backend/src/dataing/adapters/context/query_context.py b/backend/src/dataing/adapters/context/query_context.py index df5ad9ba7..a5abb152d 100644 --- a/backend/src/dataing/adapters/context/query_context.py +++ b/backend/src/dataing/adapters/context/query_context.py @@ -11,7 +11,7 @@ import structlog -from dataing.core.domain_types import QueryResult +from dataing.adapters.datasource.types import QueryResult if TYPE_CHECKING: from dataing.core.interfaces import DatabaseAdapter diff --git a/backend/src/dataing/adapters/context/schema_context.py b/backend/src/dataing/adapters/context/schema_context.py index cb9dc5a66..0783cdf0f 100644 --- a/backend/src/dataing/adapters/context/schema_context.py +++ b/backend/src/dataing/adapters/context/schema_context.py @@ -3,6 +3,8 @@ This module handles schema discovery and formatting for the LLM, providing clear table and column information that helps the AI generate accurate SQL queries. + +Updated to use the unified SchemaResponse type from the datasource layer. """ from __future__ import annotations @@ -11,11 +13,10 @@ import structlog -from dataing.core.domain_types import SchemaContext as SchemaContextData -from dataing.core.domain_types import TableSchema +from dataing.adapters.datasource.types import SchemaResponse, Table if TYPE_CHECKING: - from dataing.core.interfaces import DatabaseAdapter + from dataing.adapters.datasource.base import BaseAdapter logger = structlog.get_logger() @@ -28,8 +29,7 @@ class SchemaContextBuilder: 2. Formatting schema information for LLM prompts 3. Filtering tables by pattern when needed - Note: Named SchemaContextBuilder to avoid conflict with - SchemaContext domain type. + Uses the unified SchemaResponse type from the datasource layer. """ def __init__(self, max_tables: int = 20, max_columns: int = 30) -> None: @@ -44,17 +44,17 @@ def __init__(self, max_tables: int = 20, max_columns: int = 30) -> None: async def build( self, - adapter: DatabaseAdapter, + adapter: BaseAdapter, table_filter: str | None = None, - ) -> SchemaContextData: + ) -> SchemaResponse: """Build schema context from a database adapter. Args: - adapter: Connected database adapter. - table_filter: Optional pattern to filter tables. + adapter: Connected data source adapter. + table_filter: Optional pattern to filter tables (not yet used). Returns: - SchemaContextData with discovered tables. + SchemaResponse with discovered catalogs, schemas, and tables. Raises: RuntimeError: If schema discovery fails. @@ -62,26 +62,42 @@ async def build( logger.info("discovering_schema", table_filter=table_filter) try: - schema = await adapter.get_schema(table_filter) - logger.info("schema_discovered", tables_count=len(schema.tables)) + schema = await adapter.get_schema() + table_count = sum( + len(table.columns) + for catalog in schema.catalogs + for db_schema in catalog.schemas + for table in db_schema.tables + ) + logger.info("schema_discovered", table_count=table_count) return schema except Exception as e: logger.error("schema_discovery_failed", error=str(e)) raise RuntimeError(f"Failed to discover schema: {e}") from e - def format_for_llm(self, schema: SchemaContextData) -> str: + def _get_all_tables(self, schema: SchemaResponse) -> list[Table]: + """Extract all tables from the nested schema structure.""" + tables = [] + for catalog in schema.catalogs: + for db_schema in catalog.schemas: + tables.extend(db_schema.tables) + return tables + + def format_for_llm(self, schema: SchemaResponse) -> str: """Format schema as markdown for LLM prompt. Creates a clear, structured representation of the schema that helps the LLM understand available tables and columns. Args: - schema: SchemaContextData to format. + schema: SchemaResponse to format. Returns: Markdown-formatted schema string. """ - if not schema.tables: + tables = self._get_all_tables(schema) + + if not tables: return "No tables available." lines = [ @@ -91,24 +107,24 @@ def format_for_llm(self, schema: SchemaContextData) -> str: "", ] - for table in schema.tables[: self.max_tables]: - lines.append(f"### {table.table_name}") + for table in tables[: self.max_tables]: + lines.append(f"### {table.native_path}") lines.append("") - lines.append("| Column | Type |") - lines.append("|--------|------|") + lines.append("| Column | Type | Nullable |") + lines.append("|--------|------|----------|") for col in table.columns[: self.max_columns]: - col_type = table.column_types.get(col, "unknown") - lines.append(f"| {col} | {col_type} |") + nullable = "Yes" if col.nullable else "No" + lines.append(f"| {col.name} | {col.data_type.value} | {nullable} |") if len(table.columns) > self.max_columns: remaining = len(table.columns) - self.max_columns - lines.append(f"| ... | ({remaining} more columns) |") + lines.append(f"| ... | ({remaining} more columns) | |") lines.append("") - if len(schema.tables) > self.max_tables: - remaining = len(schema.tables) - self.max_tables + if len(tables) > self.max_tables: + remaining = len(tables) - self.max_tables lines.append(f"*({remaining} more tables not shown)*") lines.append("") @@ -117,73 +133,87 @@ def format_for_llm(self, schema: SchemaContextData) -> str: return "\n".join(lines) - def format_compact(self, schema: SchemaContextData) -> str: + def format_compact(self, schema: SchemaResponse) -> str: """Format schema in compact form for smaller context windows. Args: - schema: SchemaContextData to format. + schema: SchemaResponse to format. Returns: Compact schema string. """ - if not schema.tables: + tables = self._get_all_tables(schema) + + if not tables: return "No tables." lines = ["Tables:"] - for table in schema.tables[: self.max_tables]: - cols = ", ".join(table.columns[: self.max_columns]) + for table in tables[: self.max_tables]: + col_names = [col.name for col in table.columns[: self.max_columns]] + cols = ", ".join(col_names) if len(table.columns) > self.max_columns: cols += f" (+{len(table.columns) - self.max_columns} more)" - lines.append(f" {table.table_name}: {cols}") + lines.append(f" {table.native_path}: {cols}") return "\n".join(lines) def get_table_info( self, - schema: SchemaContextData, + schema: SchemaResponse, table_name: str, - ) -> TableSchema | None: + ) -> Table | None: """Get detailed info for a specific table. Args: - schema: SchemaContextData to search. - table_name: Name of table to find. + schema: SchemaResponse to search. + table_name: Name of table to find (can be qualified or unqualified). Returns: - TableSchema if found, None otherwise. + Table if found, None otherwise. """ - return schema.get_table(table_name) + tables = self._get_all_tables(schema) + table_name_lower = table_name.lower() + + for table in tables: + # Match by native_path or just name + if ( + table.native_path.lower() == table_name_lower + or table.name.lower() == table_name_lower + ): + return table + return None def get_related_tables( self, - schema: SchemaContextData, + schema: SchemaResponse, table_name: str, - ) -> list[TableSchema]: + ) -> list[Table]: """Find tables that might be related to the given table. Uses simple heuristics like shared column names to identify potentially related tables. Args: - schema: SchemaContextData to search. + schema: SchemaResponse to search. table_name: Name of the primary table. Returns: - List of potentially related TableSchema objects. + List of potentially related Table objects. """ - target = schema.get_table(table_name) + target = self.get_table_info(schema, table_name) if not target: return [] - target_cols = set(target.columns) + target_cols = {col.name for col in target.columns} related = [] + tables = self._get_all_tables(schema) - for table in schema.tables: - if table.table_name == table_name: + for table in tables: + if table.name == target.name: continue # Check for shared column names (potential join keys) - table_cols = set(table.columns) + table_cols = {col.name for col in table.columns} shared = target_cols & table_cols # Look for common patterns like id, *_id columns diff --git a/backend/src/dataing/adapters/datasource/__init__.py b/backend/src/dataing/adapters/datasource/__init__.py new file mode 100644 index 000000000..69f87f4f7 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/__init__.py @@ -0,0 +1,128 @@ +"""Unified data source adapter layer. + +This module provides a pluggable adapter architecture that normalizes +heterogeneous data sources (SQL databases, NoSQL stores, APIs, file systems) +into a unified interface. + +Core Principle: All sources become "tables with columns" from the frontend's perspective. +""" + +from dataing.adapters.datasource.api.hubspot import HubSpotAdapter + +# API adapters +from dataing.adapters.datasource.api.salesforce import SalesforceAdapter +from dataing.adapters.datasource.api.stripe import StripeAdapter +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.document.cassandra import CassandraAdapter +from dataing.adapters.datasource.document.dynamodb import DynamoDBAdapter + +# Document/NoSQL adapters +from dataing.adapters.datasource.document.mongodb import MongoDBAdapter +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AdapterError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + RateLimitedError, + SchemaFetchFailedError, + TableNotFoundError, +) +from dataing.adapters.datasource.filesystem.gcs import GCSAdapter +from dataing.adapters.datasource.filesystem.hdfs import HDFSAdapter +from dataing.adapters.datasource.filesystem.local import LocalFileAdapter + +# Filesystem adapters +from dataing.adapters.datasource.filesystem.s3 import S3Adapter +from dataing.adapters.datasource.registry import AdapterRegistry, get_registry +from dataing.adapters.datasource.sql.bigquery import BigQueryAdapter +from dataing.adapters.datasource.sql.duckdb import DuckDBAdapter +from dataing.adapters.datasource.sql.mysql import MySQLAdapter + +# Import adapters to trigger registration via decorators +# SQL adapters +from dataing.adapters.datasource.sql.postgres import PostgresAdapter +from dataing.adapters.datasource.sql.redshift import RedshiftAdapter +from dataing.adapters.datasource.sql.snowflake import SnowflakeAdapter +from dataing.adapters.datasource.sql.trino import TrinoAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + Catalog, + Column, + ColumnStats, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + NormalizedType, + QueryResult, + Schema, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, + SourceTypeDefinition, + Table, +) + +__all__ = [ + # Base classes + "BaseAdapter", + "AdapterRegistry", + "get_registry", + # SQL Adapters + "PostgresAdapter", + "DuckDBAdapter", + "MySQLAdapter", + "TrinoAdapter", + "SnowflakeAdapter", + "BigQueryAdapter", + "RedshiftAdapter", + # Document/NoSQL Adapters + "MongoDBAdapter", + "DynamoDBAdapter", + "CassandraAdapter", + # API Adapters + "SalesforceAdapter", + "HubSpotAdapter", + "StripeAdapter", + # Filesystem Adapters + "S3Adapter", + "GCSAdapter", + "HDFSAdapter", + "LocalFileAdapter", + # Types + "AdapterCapabilities", + "Catalog", + "Column", + "ColumnStats", + "ConfigField", + "ConfigSchema", + "ConnectionTestResult", + "FieldGroup", + "NormalizedType", + "QueryResult", + "Schema", + "SchemaFilter", + "SchemaResponse", + "SourceCategory", + "SourceType", + "SourceTypeDefinition", + "Table", + # Functions + "normalize_type", + # Errors + "AdapterError", + "ConnectionFailedError", + "ConnectionTimeoutError", + "AuthenticationFailedError", + "AccessDeniedError", + "QuerySyntaxError", + "QueryTimeoutError", + "RateLimitedError", + "SchemaFetchFailedError", + "TableNotFoundError", +] diff --git a/backend/src/dataing/adapters/datasource/api/__init__.py b/backend/src/dataing/adapters/datasource/api/__init__.py new file mode 100644 index 000000000..ad3d20a03 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/api/__init__.py @@ -0,0 +1,11 @@ +"""API adapters. + +This module provides adapters for API-based data sources: +- Salesforce +- HubSpot +- Stripe +""" + +from dataing.adapters.datasource.api.base import APIAdapter + +__all__ = ["APIAdapter"] diff --git a/backend/src/dataing/adapters/datasource/api/base.py b/backend/src/dataing/adapters/datasource/api/base.py new file mode 100644 index 000000000..70667a76d --- /dev/null +++ b/backend/src/dataing/adapters/datasource/api/base.py @@ -0,0 +1,117 @@ +"""Base class for API adapters. + +This module provides the abstract base class for all API-based +data source adapters. +""" + +from __future__ import annotations + +from abc import abstractmethod + +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + QueryLanguage, + QueryResult, + Table, +) + + +class APIAdapter(BaseAdapter): + """Abstract base class for API adapters. + + Extends BaseAdapter with API-specific query capabilities. + """ + + @property + def capabilities(self) -> AdapterCapabilities: + """API adapters typically have rate limits.""" + return AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + rate_limit_requests_per_minute=100, + max_concurrent_queries=1, + query_language=QueryLanguage.SCAN_ONLY, + ) + + @abstractmethod + async def query_object( + self, + object_name: str, + query: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Query an API object/entity. + + Args: + object_name: Name of the object to query. + query: Optional query string (e.g., SOQL for Salesforce). + limit: Maximum records to return. + + Returns: + QueryResult with records. + """ + ... + + @abstractmethod + async def describe_object( + self, + object_name: str, + ) -> Table: + """Get the schema of an API object. + + Args: + object_name: Name of the object. + + Returns: + Table with field definitions. + """ + ... + + @abstractmethod + async def list_objects(self) -> list[str]: + """List all available objects in the API. + + Returns: + List of object names. + """ + ... + + async def preview( + self, + object_name: str, + n: int = 100, + ) -> QueryResult: + """Get a preview of records from an object. + + Args: + object_name: Object name. + n: Number of records to preview. + + Returns: + QueryResult with preview records. + """ + return await self.query_object(object_name, limit=n) + + async def sample( + self, + object_name: str, + n: int = 100, + ) -> QueryResult: + """Get a sample of records from an object. + + Most APIs don't support true random sampling, so this + defaults to returning the first N records. + + Args: + object_name: Object name. + n: Number of records to sample. + + Returns: + QueryResult with sampled records. + """ + return await self.query_object(object_name, limit=n) diff --git a/backend/src/dataing/adapters/datasource/api/hubspot.py b/backend/src/dataing/adapters/datasource/api/hubspot.py new file mode 100644 index 000000000..ff068056a --- /dev/null +++ b/backend/src/dataing/adapters/datasource/api/hubspot.py @@ -0,0 +1,404 @@ +"""HubSpot API adapter implementation. + +This module provides a HubSpot adapter that implements the unified +data source interface with schema discovery and data querying via REST API. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.api.base import APIAdapter +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + RateLimitedError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + NormalizedType, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +HUBSPOT_TYPE_MAP = { + "string": NormalizedType.STRING, + "number": NormalizedType.DECIMAL, + "date": NormalizedType.DATE, + "datetime": NormalizedType.TIMESTAMP, + "enumeration": NormalizedType.STRING, + "bool": NormalizedType.BOOLEAN, + "phone_number": NormalizedType.STRING, + "email": NormalizedType.STRING, +} + +HUBSPOT_OBJECTS = [ + "contacts", + "companies", + "deals", + "tickets", + "products", + "line_items", + "quotes", + "calls", + "emails", + "meetings", + "notes", + "tasks", +] + +HUBSPOT_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="access_token", + label="Private App Access Token", + type="secret", + required=True, + group="auth", + description="HubSpot Private App access token", + help_url="https://developers.hubspot.com/docs/api/private-apps", + ), + ConfigField( + name="objects", + label="Objects to Include", + type="string", + required=False, + group="advanced", + placeholder="contacts,companies,deals", + description="Comma-separated list of objects (default: all standard objects)", + ), + ], +) + +HUBSPOT_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SCAN_ONLY, + rate_limit_requests_per_minute=100, + max_concurrent_queries=1, +) + + +@register_adapter( + source_type=SourceType.HUBSPOT, + display_name="HubSpot", + category=SourceCategory.API, + icon="hubspot", + description="Connect to HubSpot CRM data via REST API", + capabilities=HUBSPOT_CAPABILITIES, + config_schema=HUBSPOT_CONFIG_SCHEMA, +) +class HubSpotAdapter(APIAdapter): + """HubSpot API adapter. + + Provides schema discovery and data querying for HubSpot CRM objects. + Uses the HubSpot REST API with Private App authentication. + """ + + BASE_URL = "https://api.hubapi.com" + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize HubSpot adapter. + + Args: + config: Configuration dictionary with: + - access_token: Private App access token + - objects: Comma-separated list of objects to include (optional) + """ + super().__init__(config) + self._session: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.HUBSPOT + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return HUBSPOT_CAPABILITIES + + def _get_headers(self) -> dict[str, str]: + """Get request headers with authentication.""" + return { + "Authorization": f"Bearer {self._config.get('access_token', '')}", + "Content-Type": "application/json", + } + + async def connect(self) -> None: + """Establish connection to HubSpot API.""" + try: + import httpx + except ImportError as e: + raise ConnectionFailedError( + message="httpx is not installed. Install with: pip install httpx", + details={"error": str(e)}, + ) from e + + try: + self._session = httpx.AsyncClient( + base_url=self.BASE_URL, + headers=self._get_headers(), + timeout=30.0, + ) + self._connected = True + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to initialize HubSpot client: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close HubSpot connection.""" + if self._session: + await self._session.aclose() + self._session = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test HubSpot API connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + response = await self._session.get("/crm/v3/objects/contacts?limit=1") + latency_ms = int((time.time() - start_time) * 1000) + + if response.status_code == 200: + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="HubSpot API v3", + message="Connection successful", + ) + elif response.status_code == 401: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="Invalid access token", + error_code="AUTHENTICATION_FAILED", + ) + else: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=f"API error: {response.status_code}", + error_code="CONNECTION_FAILED", + ) + + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_objects(self) -> list[str]: + """List available HubSpot objects.""" + objects_config = self._config.get("objects", "") + if objects_config: + return [o.strip() for o in objects_config.split(",")] + return HUBSPOT_OBJECTS + + async def describe_object(self, object_name: str) -> dict[str, Any]: + """Get schema for a HubSpot object.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to HubSpot") + + try: + response = await self._session.get(f"/crm/v3/properties/{object_name}") + + if response.status_code == 401: + raise AuthenticationFailedError(message="Invalid HubSpot access token") + elif response.status_code == 403: + raise AccessDeniedError(message=f"Access denied to {object_name} properties") + elif response.status_code == 429: + raise RateLimitedError( + message="HubSpot API rate limit exceeded", + retry_after_seconds=10, + ) + + response.raise_for_status() + data = response.json() + + columns = [] + for prop in data.get("results", []): + prop_type = prop.get("type", "string") + columns.append( + { + "name": prop.get("name"), + "data_type": HUBSPOT_TYPE_MAP.get(prop_type, NormalizedType.STRING), + "native_type": prop_type, + "nullable": True, + "is_primary_key": prop.get("name") == "hs_object_id", + "is_partition_key": False, + "description": prop.get("label"), + } + ) + + return { + "name": object_name, + "table_type": "object", + "native_type": "HUBSPOT_OBJECT", + "native_path": object_name, + "columns": columns, + } + + except (AuthenticationFailedError, AccessDeniedError, RateLimitedError): + raise + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to describe {object_name}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def query_object( + self, + object_name: str, + limit: int = 100, + properties: list[str] | None = None, + ) -> QueryResult: + """Query records from a HubSpot object.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to HubSpot") + + start_time = time.time() + try: + params: dict[str, Any] = {"limit": min(limit, 100)} + if properties: + params["properties"] = ",".join(properties) + + response = await self._session.get( + f"/crm/v3/objects/{object_name}", + params=params, + ) + + if response.status_code == 401: + raise AuthenticationFailedError(message="Invalid HubSpot access token") + elif response.status_code == 403: + raise AccessDeniedError(message=f"Access denied to {object_name}") + elif response.status_code == 429: + raise RateLimitedError( + message="HubSpot API rate limit exceeded", + retry_after_seconds=10, + ) + + response.raise_for_status() + data = response.json() + + execution_time_ms = int((time.time() - start_time) * 1000) + results = data.get("results", []) + + if not results: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + all_keys = set() + rows = [] + for record in results: + props = record.get("properties", {}) + props["id"] = record.get("id") + all_keys.update(props.keys()) + rows.append(props) + + columns = [{"name": key, "data_type": "string"} for key in sorted(all_keys)] + + return QueryResult( + columns=columns, + rows=rows, + row_count=len(rows), + truncated=data.get("paging") is not None, + execution_time_ms=execution_time_ms, + ) + + except (AuthenticationFailedError, AccessDeniedError, RateLimitedError): + raise + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to query {object_name}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get HubSpot schema.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to HubSpot") + + try: + objects = await self.list_objects() + + if filter and filter.table_pattern: + objects = [o for o in objects if filter.table_pattern in o] + + if filter and filter.max_tables: + objects = objects[: filter.max_tables] + + tables = [] + for obj_name in objects: + try: + table_def = await self.describe_object(obj_name) + tables.append(table_def) + except Exception: + tables.append( + { + "name": obj_name, + "table_type": "object", + "native_type": "HUBSPOT_OBJECT", + "native_path": obj_name, + "columns": [], + } + ) + + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": "crm", + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "hubspot", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch HubSpot schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/api/salesforce.py b/backend/src/dataing/adapters/datasource/api/salesforce.py new file mode 100644 index 000000000..572e4abe4 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/api/salesforce.py @@ -0,0 +1,453 @@ +"""Salesforce adapter implementation. + +This module provides a Salesforce adapter that implements the unified +data source interface with SOQL querying and object discovery. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.api.base import APIAdapter +from dataing.adapters.datasource.errors import ( + AuthenticationFailedError, + ConnectionFailedError, + QuerySyntaxError, + RateLimitedError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + Column, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, + Table, +) + +SALESFORCE_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="oauth", label="OAuth Credentials", collapsed_by_default=False), + ], + fields=[ + ConfigField( + name="instance_url", + label="Instance URL", + type="string", + required=True, + group="connection", + placeholder="https://yourcompany.salesforce.com", + pattern="^https://.*\\.salesforce\\.com$", + ), + ConfigField( + name="auth_type", + label="Authentication Type", + type="enum", + required=True, + group="oauth", + default_value="password", + options=[ + {"value": "oauth", "label": "OAuth 2.0 (Recommended)"}, + {"value": "password", "label": "Username/Password"}, + ], + ), + ConfigField( + name="client_id", + label="Consumer Key", + type="string", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "oauth"}, + ), + ConfigField( + name="client_secret", + label="Consumer Secret", + type="secret", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "oauth"}, + ), + ConfigField( + name="refresh_token", + label="Refresh Token", + type="secret", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "oauth"}, + ), + ConfigField( + name="username", + label="Username", + type="string", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "password"}, + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "password"}, + ), + ConfigField( + name="security_token", + label="Security Token", + type="secret", + required=False, + group="oauth", + show_if={"field": "auth_type", "value": "password"}, + ), + ], +) + +SALESFORCE_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + rate_limit_requests_per_minute=100, + max_concurrent_queries=1, + query_language=QueryLanguage.SOQL, +) + + +@register_adapter( + source_type=SourceType.SALESFORCE, + display_name="Salesforce", + category=SourceCategory.API, + icon="salesforce", + description="Connect to Salesforce for CRM data querying via SOQL", + capabilities=SALESFORCE_CAPABILITIES, + config_schema=SALESFORCE_CONFIG_SCHEMA, +) +class SalesforceAdapter(APIAdapter): + """Salesforce API adapter. + + Provides SOQL querying and object schema discovery for Salesforce. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Salesforce adapter. + + Args: + config: Configuration dictionary with: + - instance_url: Salesforce instance URL + - auth_type: 'oauth' or 'password' + - For OAuth: client_id, client_secret, refresh_token + - For password: username, password, security_token + """ + super().__init__(config) + self._sf: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.SALESFORCE + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return SALESFORCE_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to Salesforce.""" + try: + from simple_salesforce import Salesforce + except ImportError as e: + raise ConnectionFailedError( + message="simple-salesforce not installed. Install: pip install simple-salesforce", + details={"error": str(e)}, + ) from e + + try: + auth_type = self._config.get("auth_type", "password") + instance_url = self._config.get("instance_url", "") + + # Extract domain from instance URL + domain = instance_url.replace("https://", "").replace(".salesforce.com", "") + + if auth_type == "oauth": + client_id = self._config.get("client_id", "") + client_secret = self._config.get("client_secret", "") + refresh_token = self._config.get("refresh_token", "") + + self._sf = Salesforce( + instance_url=instance_url, + consumer_key=client_id, + consumer_secret=client_secret, + refresh_token=refresh_token, + ) + else: + username = self._config.get("username", "") + password = self._config.get("password", "") + security_token = self._config.get("security_token", "") + + self._sf = Salesforce( + username=username, + password=password, + security_token=security_token, + domain=domain if "sandbox" in domain else None, + ) + + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "invalid_grant" in error_str or "authentication" in error_str: + raise AuthenticationFailedError( + message="Salesforce authentication failed", + details={"error": str(e)}, + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to Salesforce: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Salesforce connection.""" + self._sf = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Salesforce connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + # Query organization info + org_info = self._sf.query("SELECT Id, Name FROM Organization LIMIT 1") + org_name = org_info.get("records", [{}])[0].get("Name", "Unknown") + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"Salesforce ({org_name})", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def query_object( + self, + object_name: str, + query: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Query a Salesforce object using SOQL.""" + if not self._connected or not self._sf: + raise ConnectionFailedError(message="Not connected to Salesforce") + + start_time = time.time() + try: + if query: + soql = query + else: + # Build default query + desc = self._sf.__getattr__(object_name).describe() + fields = [f["name"] for f in desc["fields"][:50]] # Limit fields + soql = f"SELECT {', '.join(fields)} FROM {object_name} LIMIT {limit}" + + result = self._sf.query(soql) + records = result.get("records", []) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not records: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get columns from first record + columns = [] + if records: + first = records[0] + for key in first.keys(): + if key != "attributes": + columns.append({"name": key, "data_type": "string"}) + + # Convert records to rows + row_dicts = [] + for record in records: + row = {} + for key, value in record.items(): + if key != "attributes": + row[key] = self._serialize_value(value) + row_dicts.append(row) + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=result.get("done", True) is False, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "malformed query" in error_str or "syntax" in error_str: + raise QuerySyntaxError( + message=str(e), + query=query[:200] if query else object_name, + ) from e + elif "request_limit_exceeded" in error_str: + raise RateLimitedError( + message="Salesforce API rate limit exceeded", + retry_after_seconds=60, + ) from e + else: + raise + + def _serialize_value(self, value: Any) -> Any: + """Convert Salesforce values to JSON-serializable format.""" + if isinstance(value, dict): + # Nested object reference + if "attributes" in value: + return {k: self._serialize_value(v) for k, v in value.items() if k != "attributes"} + return value + return value + + async def describe_object( + self, + object_name: str, + ) -> Table: + """Get the schema of a Salesforce object.""" + if not self._connected or not self._sf: + raise ConnectionFailedError(message="Not connected to Salesforce") + + desc = self._sf.__getattr__(object_name).describe() + + columns = [] + for field in desc["fields"]: + normalized_type = normalize_type(field["type"], SourceType.SALESFORCE) + columns.append( + Column( + name=field["name"], + data_type=normalized_type, + native_type=field["type"], + nullable=field.get("nillable", True), + is_primary_key=field.get("name") == "Id", + is_partition_key=False, + description=field.get("label"), + ) + ) + + return Table( + name=object_name, + table_type="object", + native_type="SALESFORCE_OBJECT", + native_path=object_name, + columns=columns, + description=desc.get("label"), + ) + + async def list_objects(self) -> list[str]: + """List all Salesforce objects.""" + if not self._connected or not self._sf: + raise ConnectionFailedError(message="Not connected to Salesforce") + + sobjects = self._sf.describe()["sobjects"] + return [obj["name"] for obj in sobjects if obj.get("queryable", False)] + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Salesforce schema (queryable objects).""" + if not self._connected or not self._sf: + raise ConnectionFailedError(message="Not connected to Salesforce") + + try: + # List all objects + object_names = await self.list_objects() + + # Apply filter if provided + if filter and filter.table_pattern: + import fnmatch + + pattern = filter.table_pattern.replace("%", "*") + object_names = [o for o in object_names if fnmatch.fnmatch(o, pattern)] + + # Limit objects + max_tables = filter.max_tables if filter else 100 + object_names = object_names[:max_tables] + + # Get schema for each object + tables = [] + for obj_name in object_names: + try: + table = await self.describe_object(obj_name) + tables.append( + { + "name": table.name, + "table_type": table.table_type, + "native_type": table.native_type, + "native_path": table.native_path, + "columns": [ + { + "name": col.name, + "data_type": col.data_type, + "native_type": col.native_type, + "nullable": col.nullable, + "is_primary_key": col.is_primary_key, + "is_partition_key": col.is_partition_key, + "description": col.description, + } + for col in table.columns + ], + "description": table.description, + } + ) + except Exception: + # Skip objects we can't describe + continue + + # Build catalog structure + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": "salesforce", + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "salesforce", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Salesforce schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/api/stripe.py b/backend/src/dataing/adapters/datasource/api/stripe.py new file mode 100644 index 000000000..872107624 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/api/stripe.py @@ -0,0 +1,506 @@ +"""Stripe API adapter implementation. + +This module provides a Stripe adapter that implements the unified +data source interface with schema discovery and data querying via REST API. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.api.base import APIAdapter +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + RateLimitedError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + NormalizedType, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +STRIPE_OBJECTS: dict[str, dict[str, Any]] = { + "customers": { + "endpoint": "/v1/customers", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "email", "type": NormalizedType.STRING}, + {"name": "name", "type": NormalizedType.STRING}, + {"name": "phone", "type": NormalizedType.STRING}, + {"name": "description", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "default_source", "type": NormalizedType.STRING}, + {"name": "delinquent", "type": NormalizedType.BOOLEAN}, + {"name": "balance", "type": NormalizedType.INTEGER}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "charges": { + "endpoint": "/v1/charges", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "amount", "type": NormalizedType.INTEGER}, + {"name": "amount_captured", "type": NormalizedType.INTEGER}, + {"name": "amount_refunded", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "customer", "type": NormalizedType.STRING}, + {"name": "description", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "paid", "type": NormalizedType.BOOLEAN}, + {"name": "refunded", "type": NormalizedType.BOOLEAN}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "invoices": { + "endpoint": "/v1/invoices", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "customer", "type": NormalizedType.STRING}, + {"name": "subscription", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "amount_due", "type": NormalizedType.INTEGER}, + {"name": "amount_paid", "type": NormalizedType.INTEGER}, + {"name": "amount_remaining", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "due_date", "type": NormalizedType.TIMESTAMP}, + {"name": "paid", "type": NormalizedType.BOOLEAN}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "subscriptions": { + "endpoint": "/v1/subscriptions", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "customer", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "current_period_start", "type": NormalizedType.TIMESTAMP}, + {"name": "current_period_end", "type": NormalizedType.TIMESTAMP}, + {"name": "cancel_at_period_end", "type": NormalizedType.BOOLEAN}, + {"name": "canceled_at", "type": NormalizedType.TIMESTAMP}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "products": { + "endpoint": "/v1/products", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "name", "type": NormalizedType.STRING}, + {"name": "description", "type": NormalizedType.STRING}, + {"name": "active", "type": NormalizedType.BOOLEAN}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "updated", "type": NormalizedType.TIMESTAMP}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "prices": { + "endpoint": "/v1/prices", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "product", "type": NormalizedType.STRING}, + {"name": "unit_amount", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "type", "type": NormalizedType.STRING}, + {"name": "recurring", "type": NormalizedType.JSON}, + {"name": "active", "type": NormalizedType.BOOLEAN}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "payment_intents": { + "endpoint": "/v1/payment_intents", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "amount", "type": NormalizedType.INTEGER}, + {"name": "amount_received", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "customer", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "refunds": { + "endpoint": "/v1/refunds", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "amount", "type": NormalizedType.INTEGER}, + {"name": "charge", "type": NormalizedType.STRING}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "reason", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, + "balance_transactions": { + "endpoint": "/v1/balance_transactions", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "amount", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "fee", "type": NormalizedType.INTEGER}, + {"name": "net", "type": NormalizedType.INTEGER}, + {"name": "type", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + ], + }, + "payouts": { + "endpoint": "/v1/payouts", + "columns": [ + {"name": "id", "type": NormalizedType.STRING, "pk": True}, + {"name": "amount", "type": NormalizedType.INTEGER}, + {"name": "currency", "type": NormalizedType.STRING}, + {"name": "status", "type": NormalizedType.STRING}, + {"name": "arrival_date", "type": NormalizedType.TIMESTAMP}, + {"name": "created", "type": NormalizedType.TIMESTAMP}, + {"name": "livemode", "type": NormalizedType.BOOLEAN}, + {"name": "metadata", "type": NormalizedType.JSON}, + ], + }, +} + +STRIPE_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="api_key", + label="Secret API Key", + type="secret", + required=True, + group="auth", + placeholder="sk_live_... or sk_test_...", + description="Stripe secret API key (starts with sk_live_ or sk_test_)", + help_url="https://stripe.com/docs/keys", + ), + ConfigField( + name="objects", + label="Objects to Include", + type="string", + required=False, + group="advanced", + placeholder="customers,charges,invoices", + description="Comma-separated list of objects (default: all standard objects)", + ), + ], +) + +STRIPE_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=False, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SCAN_ONLY, + rate_limit_requests_per_minute=100, + max_concurrent_queries=1, +) + + +@register_adapter( + source_type=SourceType.STRIPE, + display_name="Stripe", + category=SourceCategory.API, + icon="stripe", + description="Connect to Stripe payment data via REST API", + capabilities=STRIPE_CAPABILITIES, + config_schema=STRIPE_CONFIG_SCHEMA, +) +class StripeAdapter(APIAdapter): + """Stripe API adapter. + + Provides schema discovery and data querying for Stripe payment objects. + Uses the Stripe REST API with API key authentication. + """ + + BASE_URL = "https://api.stripe.com" + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Stripe adapter. + + Args: + config: Configuration dictionary with: + - api_key: Stripe secret API key + - objects: Comma-separated list of objects to include (optional) + """ + super().__init__(config) + self._session: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.STRIPE + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return STRIPE_CAPABILITIES + + def _get_headers(self) -> dict[str, str]: + """Get request headers with authentication.""" + return { + "Authorization": f"Bearer {self._config.get('api_key', '')}", + "Content-Type": "application/x-www-form-urlencoded", + } + + async def connect(self) -> None: + """Establish connection to Stripe API.""" + try: + import httpx + except ImportError as e: + raise ConnectionFailedError( + message="httpx is not installed. Install with: pip install httpx", + details={"error": str(e)}, + ) from e + + try: + self._session = httpx.AsyncClient( + base_url=self.BASE_URL, + headers=self._get_headers(), + timeout=30.0, + ) + self._connected = True + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to initialize Stripe client: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Stripe connection.""" + if self._session: + await self._session.aclose() + self._session = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Stripe API connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + response = await self._session.get("/v1/balance") + latency_ms = int((time.time() - start_time) * 1000) + + if response.status_code == 200: + api_key = self._config.get("api_key", "") + mode = "Test" if "test" in api_key else "Live" + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"Stripe API ({mode} mode)", + message="Connection successful", + ) + elif response.status_code == 401: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="Invalid API key", + error_code="AUTHENTICATION_FAILED", + ) + else: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=f"API error: {response.status_code}", + error_code="CONNECTION_FAILED", + ) + + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_objects(self) -> list[str]: + """List available Stripe objects.""" + objects_config = self._config.get("objects", "") + if objects_config: + return [o.strip() for o in objects_config.split(",")] + return list(STRIPE_OBJECTS.keys()) + + async def describe_object(self, object_name: str) -> dict[str, Any]: + """Get schema for a Stripe object.""" + obj_def = STRIPE_OBJECTS.get(object_name) + if not obj_def: + return { + "name": object_name, + "table_type": "object", + "native_type": "STRIPE_OBJECT", + "native_path": object_name, + "columns": [], + } + + columns = [] + for col in obj_def["columns"]: + columns.append( + { + "name": col["name"], + "data_type": col["type"], + "native_type": col["type"].value, + "nullable": not col.get("pk", False), + "is_primary_key": col.get("pk", False), + "is_partition_key": False, + } + ) + + return { + "name": object_name, + "table_type": "object", + "native_type": "STRIPE_OBJECT", + "native_path": object_name, + "columns": columns, + } + + async def query_object( + self, + object_name: str, + limit: int = 100, + ) -> QueryResult: + """Query records from a Stripe object.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to Stripe") + + obj_def = STRIPE_OBJECTS.get(object_name) + if not obj_def: + raise ConnectionFailedError(message=f"Unknown Stripe object: {object_name}") + + start_time = time.time() + try: + response = await self._session.get( + obj_def["endpoint"], + params={"limit": min(limit, 100)}, + ) + + if response.status_code == 401: + raise AuthenticationFailedError(message="Invalid Stripe API key") + elif response.status_code == 403: + raise AccessDeniedError(message=f"Access denied to {object_name}") + elif response.status_code == 429: + raise RateLimitedError( + message="Stripe API rate limit exceeded", + retry_after_seconds=1, + ) + + response.raise_for_status() + data = response.json() + + execution_time_ms = int((time.time() - start_time) * 1000) + results = data.get("data", []) + + if not results: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + col_names = [c["name"] for c in obj_def["columns"]] + columns = [{"name": name, "data_type": "string"} for name in col_names] + + rows = [] + for record in results: + row = {} + for col_name in col_names: + value = record.get(col_name) + if isinstance(value, dict): + row[col_name] = value + else: + row[col_name] = value + rows.append(row) + + return QueryResult( + columns=columns, + rows=rows, + row_count=len(rows), + truncated=data.get("has_more", False), + execution_time_ms=execution_time_ms, + ) + + except (AuthenticationFailedError, AccessDeniedError, RateLimitedError): + raise + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to query {object_name}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Stripe schema.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to Stripe") + + try: + objects = await self.list_objects() + + if filter and filter.table_pattern: + objects = [o for o in objects if filter.table_pattern in o] + + if filter and filter.max_tables: + objects = objects[: filter.max_tables] + + tables = [] + for obj_name in objects: + table_def = await self.describe_object(obj_name) + tables.append(table_def) + + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": "payments", + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "stripe", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Stripe schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/base.py b/backend/src/dataing/adapters/datasource/base.py new file mode 100644 index 000000000..7f78b076b --- /dev/null +++ b/backend/src/dataing/adapters/datasource/base.py @@ -0,0 +1,216 @@ +"""Base adapter interface and abstract base classes. + +This module defines the abstract base class that all adapters must implement, +providing a consistent interface for connecting to and querying data sources. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any, Self + +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConnectionTestResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + + +class BaseAdapter(ABC): + """Abstract base class for all data source adapters. + + All adapters must implement this interface to provide: + - Connection management (connect/disconnect) + - Connection testing + - Schema discovery + - Context manager support + + Attributes: + config: Configuration dictionary for the adapter. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize the adapter with configuration. + + Args: + config: Configuration dictionary specific to the adapter type. + """ + self._config = config + self._connected = False + + @property + @abstractmethod + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + ... + + @property + @abstractmethod + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + ... + + @abstractmethod + async def connect(self) -> None: + """Establish connection to the data source. + + Should be called before any other operations. + + Raises: + ConnectionFailedError: If connection cannot be established. + AuthenticationFailedError: If credentials are invalid. + """ + ... + + @abstractmethod + async def disconnect(self) -> None: + """Close connection to the data source. + + Should be called during cleanup. + """ + ... + + @abstractmethod + async def test_connection(self) -> ConnectionTestResult: + """Test connectivity to the data source. + + Returns: + ConnectionTestResult with success status and details. + """ + ... + + @abstractmethod + async def get_schema(self, filter: SchemaFilter | None = None) -> SchemaResponse: + """Discover schema from the data source. + + Args: + filter: Optional filter for schema discovery. + + Returns: + SchemaResponse with all discovered catalogs, schemas, and tables. + + Raises: + SchemaFetchFailedError: If schema cannot be retrieved. + """ + ... + + async def __aenter__(self) -> Self: + """Async context manager entry.""" + await self.connect() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: Any, + ) -> None: + """Async context manager exit.""" + await self.disconnect() + + @property + def is_connected(self) -> bool: + """Check if adapter is currently connected.""" + return self._connected + + def _build_schema_response( + self, + source_id: str, + catalogs: list[dict[str, Any]], + ) -> SchemaResponse: + """Helper to build a SchemaResponse from catalog data. + + Args: + source_id: ID of the data source. + catalogs: List of catalog dictionaries. + + Returns: + Properly formatted SchemaResponse. + """ + from dataing.adapters.datasource.types import ( + Catalog, + Column, + Schema, + Table, + ) + + parsed_catalogs = [] + for cat_data in catalogs: + schemas = [] + for schema_data in cat_data.get("schemas", []): + tables = [] + for table_data in schema_data.get("tables", []): + columns = [Column(**col_data) for col_data in table_data.get("columns", [])] + tables.append( + Table( + name=table_data["name"], + table_type=table_data.get("table_type", "table"), + native_type=table_data.get("native_type", "TABLE"), + native_path=table_data.get("native_path", table_data["name"]), + columns=columns, + row_count=table_data.get("row_count"), + size_bytes=table_data.get("size_bytes"), + last_modified=table_data.get("last_modified"), + description=table_data.get("description"), + ) + ) + schemas.append( + Schema( + name=schema_data.get("name", "default"), + tables=tables, + ) + ) + parsed_catalogs.append( + Catalog( + name=cat_data.get("name", "default"), + schemas=schemas, + ) + ) + + # Determine source category + source_category = self._get_source_category() + + return SchemaResponse( + source_id=source_id, + source_type=self.source_type, + source_category=source_category, + fetched_at=datetime.now(), + catalogs=parsed_catalogs, + ) + + def _get_source_category(self) -> SourceCategory: + """Determine source category based on source type.""" + from dataing.adapters.datasource.types import SourceCategory, SourceType + + sql_types = { + SourceType.POSTGRESQL, + SourceType.MYSQL, + SourceType.TRINO, + SourceType.SNOWFLAKE, + SourceType.BIGQUERY, + SourceType.REDSHIFT, + SourceType.DUCKDB, + SourceType.MONGODB, + SourceType.DYNAMODB, + SourceType.CASSANDRA, + } + api_types = {SourceType.SALESFORCE, SourceType.HUBSPOT, SourceType.STRIPE} + filesystem_types = { + SourceType.S3, + SourceType.GCS, + SourceType.HDFS, + SourceType.LOCAL_FILE, + } + + if self.source_type in sql_types: + return SourceCategory.DATABASE + elif self.source_type in api_types: + return SourceCategory.API + elif self.source_type in filesystem_types: + return SourceCategory.FILESYSTEM + else: + return SourceCategory.DATABASE diff --git a/backend/src/dataing/adapters/datasource/document/__init__.py b/backend/src/dataing/adapters/datasource/document/__init__.py new file mode 100644 index 000000000..4f0845248 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/document/__init__.py @@ -0,0 +1,11 @@ +"""Document/NoSQL database adapters. + +This module provides adapters for document-oriented data sources: +- MongoDB +- DynamoDB +- Cassandra +""" + +from dataing.adapters.datasource.document.base import DocumentAdapter + +__all__ = ["DocumentAdapter"] diff --git a/backend/src/dataing/adapters/datasource/document/base.py b/backend/src/dataing/adapters/datasource/document/base.py new file mode 100644 index 000000000..68bd0b654 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/document/base.py @@ -0,0 +1,143 @@ +"""Base class for document/NoSQL database adapters. + +This module provides the abstract base class for all document-oriented +data source adapters, adding scan and aggregation capabilities. +""" + +from __future__ import annotations + +from abc import abstractmethod +from typing import Any + +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + QueryLanguage, + QueryResult, +) + + +class DocumentAdapter(BaseAdapter): + """Abstract base class for document/NoSQL database adapters. + + Extends BaseAdapter with document scanning and aggregation capabilities. + """ + + @property + def capabilities(self) -> AdapterCapabilities: + """Document adapters typically don't support SQL.""" + return AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SCAN_ONLY, + max_concurrent_queries=5, + ) + + @abstractmethod + async def scan_collection( + self, + collection: str, + filter: dict[str, Any] | None = None, + limit: int = 100, + skip: int = 0, + ) -> QueryResult: + """Scan documents from a collection. + + Args: + collection: Collection/table name. + filter: Optional filter criteria. + limit: Maximum documents to return. + skip: Number of documents to skip. + + Returns: + QueryResult with scanned documents. + """ + ... + + @abstractmethod + async def sample( + self, + collection: str, + n: int = 100, + ) -> QueryResult: + """Get a random sample of documents from a collection. + + Args: + collection: Collection name. + n: Number of documents to sample. + + Returns: + QueryResult with sampled documents. + """ + ... + + @abstractmethod + async def count_documents( + self, + collection: str, + filter: dict[str, Any] | None = None, + ) -> int: + """Count documents in a collection. + + Args: + collection: Collection name. + filter: Optional filter criteria. + + Returns: + Number of matching documents. + """ + ... + + async def preview( + self, + collection: str, + n: int = 100, + ) -> QueryResult: + """Get a preview of documents from a collection. + + Args: + collection: Collection name. + n: Number of documents to preview. + + Returns: + QueryResult with preview documents. + """ + return await self.scan_collection(collection, limit=n) + + @abstractmethod + async def aggregate( + self, + collection: str, + pipeline: list[dict[str, Any]], + ) -> QueryResult: + """Execute an aggregation pipeline. + + Args: + collection: Collection name. + pipeline: Aggregation pipeline stages. + + Returns: + QueryResult with aggregation results. + """ + ... + + @abstractmethod + async def infer_schema( + self, + collection: str, + sample_size: int = 100, + ) -> dict[str, Any]: + """Infer schema from document samples. + + Args: + collection: Collection name. + sample_size: Number of documents to sample for inference. + + Returns: + Dictionary describing inferred schema. + """ + ... diff --git a/backend/src/dataing/adapters/datasource/document/cassandra.py b/backend/src/dataing/adapters/datasource/document/cassandra.py new file mode 100644 index 000000000..2ecb5049b --- /dev/null +++ b/backend/src/dataing/adapters/datasource/document/cassandra.py @@ -0,0 +1,470 @@ +"""Apache Cassandra adapter implementation. + +This module provides a Cassandra adapter that implements the unified +data source interface with schema discovery and CQL query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.document.base import DocumentAdapter +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + NormalizedType, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +CASSANDRA_TYPE_MAP = { + "ascii": NormalizedType.STRING, + "bigint": NormalizedType.INTEGER, + "blob": NormalizedType.BINARY, + "boolean": NormalizedType.BOOLEAN, + "counter": NormalizedType.INTEGER, + "date": NormalizedType.DATE, + "decimal": NormalizedType.DECIMAL, + "double": NormalizedType.FLOAT, + "duration": NormalizedType.STRING, + "float": NormalizedType.FLOAT, + "inet": NormalizedType.STRING, + "int": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "text": NormalizedType.STRING, + "time": NormalizedType.TIME, + "timestamp": NormalizedType.TIMESTAMP, + "timeuuid": NormalizedType.STRING, + "tinyint": NormalizedType.INTEGER, + "uuid": NormalizedType.STRING, + "varchar": NormalizedType.STRING, + "varint": NormalizedType.INTEGER, + "list": NormalizedType.ARRAY, + "set": NormalizedType.ARRAY, + "map": NormalizedType.MAP, + "tuple": NormalizedType.STRUCT, + "frozen": NormalizedType.STRUCT, +} + +CASSANDRA_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="hosts", + label="Contact Points", + type="string", + required=True, + group="connection", + placeholder="host1.example.com,host2.example.com", + description="Comma-separated list of Cassandra hosts", + ), + ConfigField( + name="port", + label="Port", + type="integer", + required=True, + group="connection", + default_value=9042, + min_value=1, + max_value=65535, + ), + ConfigField( + name="keyspace", + label="Keyspace", + type="string", + required=True, + group="connection", + placeholder="my_keyspace", + description="Default keyspace to connect to", + ), + ConfigField( + name="username", + label="Username", + type="string", + required=False, + group="auth", + description="Username for authentication (optional)", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=False, + group="auth", + description="Password for authentication (optional)", + ), + ConfigField( + name="ssl_enabled", + label="Enable SSL", + type="boolean", + required=False, + group="advanced", + default_value=False, + ), + ConfigField( + name="connection_timeout", + label="Connection Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=10, + min_value=1, + max_value=120, + ), + ConfigField( + name="request_timeout", + label="Request Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=10, + min_value=1, + max_value=300, + ), + ], +) + +CASSANDRA_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=False, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SCAN_ONLY, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.CASSANDRA, + display_name="Apache Cassandra", + category=SourceCategory.DATABASE, + icon="cassandra", + description="Connect to Apache Cassandra or ScyllaDB clusters", + capabilities=CASSANDRA_CAPABILITIES, + config_schema=CASSANDRA_CONFIG_SCHEMA, +) +class CassandraAdapter(DocumentAdapter): + """Apache Cassandra adapter. + + Provides schema discovery and CQL query execution for Cassandra clusters. + Uses cassandra-driver for connection. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Cassandra adapter. + + Args: + config: Configuration dictionary with: + - hosts: Comma-separated contact points + - port: Native protocol port + - keyspace: Default keyspace + - username: Username (optional) + - password: Password (optional) + - ssl_enabled: Enable SSL (optional) + - connection_timeout: Connect timeout (optional) + - request_timeout: Request timeout (optional) + """ + super().__init__(config) + self._cluster: Any = None + self._session: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.CASSANDRA + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return CASSANDRA_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to Cassandra.""" + try: + from cassandra.auth import PlainTextAuthProvider + from cassandra.cluster import Cluster + except ImportError as e: + raise ConnectionFailedError( + message="cassandra-driver not installed. Install: pip install cassandra-driver", + details={"error": str(e)}, + ) from e + + try: + hosts_str = self._config.get("hosts", "localhost") + hosts = [h.strip() for h in hosts_str.split(",")] + port = self._config.get("port", 9042) + keyspace = self._config.get("keyspace") + username = self._config.get("username") + password = self._config.get("password") + connect_timeout = self._config.get("connection_timeout", 10) + + auth_provider = None + if username and password: + auth_provider = PlainTextAuthProvider( + username=username, + password=password, + ) + + self._cluster = Cluster( + contact_points=hosts, + port=port, + auth_provider=auth_provider, + connect_timeout=connect_timeout, + ) + + self._session = self._cluster.connect(keyspace) + self._connected = True + + except Exception as e: + error_str = str(e).lower() + if "authentication" in error_str or "credentials" in error_str: + raise AuthenticationFailedError( + message="Cassandra authentication failed", + details={"error": str(e)}, + ) from e + elif "timeout" in error_str: + raise ConnectionTimeoutError( + message="Connection to Cassandra timed out", + timeout_seconds=self._config.get("connection_timeout", 10), + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to Cassandra: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Cassandra connection.""" + if self._session: + self._session.shutdown() + self._session = None + if self._cluster: + self._cluster.shutdown() + self._cluster = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Cassandra connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + row = self._session.execute("SELECT release_version FROM system.local").one() + version = row.release_version if row else "Unknown" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"Cassandra {version}", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def scan_collection( + self, + name: str, + filter: dict[str, Any] | None = None, + limit: int = 100, + ) -> QueryResult: + """Scan a Cassandra table.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to Cassandra") + + start_time = time.time() + try: + keyspace = self._config.get("keyspace", "") + full_table = f"{keyspace}.{name}" if keyspace and "." not in name else name + + cql = f"SELECT * FROM {full_table}" + + if filter: + where_parts = [] + for key, value in filter.items(): + if isinstance(value, str): + where_parts.append(f"{key} = '{value}'") + else: + where_parts.append(f"{key} = {value}") + if where_parts: + cql += " WHERE " + " AND ".join(where_parts) + " ALLOW FILTERING" + + cql += f" LIMIT {limit}" + + rows = self._session.execute(cql) + execution_time_ms = int((time.time() - start_time) * 1000) + + rows_list = list(rows) + if not rows_list: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [{"name": col, "data_type": "string"} for col in rows_list[0]._fields] + + row_dicts = [dict(row._asdict()) for row in rows_list] + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=len(row_dicts) >= limit, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str: + raise QuerySyntaxError(message=str(e), query=cql[:200]) from e + elif "unauthorized" in error_str or "permission" in error_str: + raise AccessDeniedError(message=str(e)) from e + elif "timeout" in error_str: + raise QueryTimeoutError(message=str(e), timeout_seconds=30) from e + raise + + async def sample( + self, + name: str, + n: int = 100, + ) -> QueryResult: + """Sample rows from a Cassandra table.""" + return await self.scan_collection(name, limit=n) + + def _normalize_type(self, cql_type: str) -> NormalizedType: + """Normalize a CQL type to our standard types.""" + cql_type_lower = cql_type.lower() + + for type_prefix, normalized in CASSANDRA_TYPE_MAP.items(): + if cql_type_lower.startswith(type_prefix): + return normalized + + return NormalizedType.UNKNOWN + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Cassandra schema.""" + if not self._connected or not self._session: + raise ConnectionFailedError(message="Not connected to Cassandra") + + try: + keyspace = self._config.get("keyspace") + + if keyspace: + keyspaces = [keyspace] + else: + ks_rows = self._session.execute("SELECT keyspace_name FROM system_schema.keyspaces") + keyspaces = [ + row.keyspace_name + for row in ks_rows + if not row.keyspace_name.startswith("system") + ] + + schemas = [] + for ks in keyspaces: + tables_cql = f""" + SELECT table_name + FROM system_schema.tables + WHERE keyspace_name = '{ks}' + """ + table_rows = self._session.execute(tables_cql) + table_names = [row.table_name for row in table_rows] + + if filter and filter.table_pattern: + table_names = [t for t in table_names if filter.table_pattern in t] + + if filter and filter.max_tables: + table_names = table_names[: filter.max_tables] + + tables = [] + for table_name in table_names: + columns_cql = f""" + SELECT column_name, type, kind + FROM system_schema.columns + WHERE keyspace_name = '{ks}' AND table_name = '{table_name}' + """ + col_rows = self._session.execute(columns_cql) + + columns = [] + for col in col_rows: + columns.append( + { + "name": col.column_name, + "data_type": self._normalize_type(col.type), + "native_type": col.type, + "nullable": col.kind not in ("partition_key", "clustering"), + "is_primary_key": col.kind == "partition_key", + "is_partition_key": col.kind == "clustering", + } + ) + + tables.append( + { + "name": table_name, + "table_type": "table", + "native_type": "CASSANDRA_TABLE", + "native_path": f"{ks}.{table_name}", + "columns": columns, + } + ) + + schemas.append( + { + "name": ks, + "tables": tables, + } + ) + + catalogs = [ + { + "name": "default", + "schemas": schemas, + } + ] + + return self._build_schema_response( + source_id=self._source_id or "cassandra", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Cassandra schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/document/dynamodb.py b/backend/src/dataing/adapters/datasource/document/dynamodb.py new file mode 100644 index 000000000..7f0055498 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/document/dynamodb.py @@ -0,0 +1,503 @@ +"""Amazon DynamoDB adapter implementation. + +This module provides a DynamoDB adapter that implements the unified +data source interface with schema inference and scan capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.document.base import DocumentAdapter +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + NormalizedType, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +DYNAMODB_TYPE_MAP = { + "S": NormalizedType.STRING, + "N": NormalizedType.DECIMAL, + "B": NormalizedType.BINARY, + "SS": NormalizedType.ARRAY, + "NS": NormalizedType.ARRAY, + "BS": NormalizedType.ARRAY, + "M": NormalizedType.MAP, + "L": NormalizedType.ARRAY, + "BOOL": NormalizedType.BOOLEAN, + "NULL": NormalizedType.UNKNOWN, +} + +DYNAMODB_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="AWS Credentials", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="region", + label="AWS Region", + type="enum", + required=True, + group="connection", + default_value="us-east-1", + options=[ + {"value": "us-east-1", "label": "US East (N. Virginia)"}, + {"value": "us-east-2", "label": "US East (Ohio)"}, + {"value": "us-west-1", "label": "US West (N. California)"}, + {"value": "us-west-2", "label": "US West (Oregon)"}, + {"value": "eu-west-1", "label": "EU (Ireland)"}, + {"value": "eu-west-2", "label": "EU (London)"}, + {"value": "eu-central-1", "label": "EU (Frankfurt)"}, + {"value": "ap-northeast-1", "label": "Asia Pacific (Tokyo)"}, + {"value": "ap-southeast-1", "label": "Asia Pacific (Singapore)"}, + {"value": "ap-southeast-2", "label": "Asia Pacific (Sydney)"}, + ], + ), + ConfigField( + name="access_key_id", + label="Access Key ID", + type="string", + required=True, + group="auth", + description="AWS Access Key ID", + ), + ConfigField( + name="secret_access_key", + label="Secret Access Key", + type="secret", + required=True, + group="auth", + description="AWS Secret Access Key", + ), + ConfigField( + name="endpoint_url", + label="Endpoint URL", + type="string", + required=False, + group="advanced", + placeholder="http://localhost:8000", + description="Custom endpoint URL (for local DynamoDB)", + ), + ConfigField( + name="table_prefix", + label="Table Prefix", + type="string", + required=False, + group="advanced", + placeholder="prod_", + description="Only show tables with this prefix", + ), + ], +) + +DYNAMODB_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SCAN_ONLY, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.DYNAMODB, + display_name="Amazon DynamoDB", + category=SourceCategory.DATABASE, + icon="dynamodb", + description="Connect to Amazon DynamoDB NoSQL tables", + capabilities=DYNAMODB_CAPABILITIES, + config_schema=DYNAMODB_CONFIG_SCHEMA, +) +class DynamoDBAdapter(DocumentAdapter): + """Amazon DynamoDB adapter. + + Provides schema discovery and scan capabilities for DynamoDB tables. + Uses boto3 for AWS API access. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize DynamoDB adapter. + + Args: + config: Configuration dictionary with: + - region: AWS region + - access_key_id: AWS access key + - secret_access_key: AWS secret key + - endpoint_url: Optional custom endpoint + - table_prefix: Optional table name prefix filter + """ + super().__init__(config) + self._client: Any = None + self._resource: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.DYNAMODB + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return DYNAMODB_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to DynamoDB.""" + try: + import boto3 + except ImportError as e: + raise ConnectionFailedError( + message="boto3 is not installed. Install with: pip install boto3", + details={"error": str(e)}, + ) from e + + try: + session = boto3.Session( + aws_access_key_id=self._config.get("access_key_id"), + aws_secret_access_key=self._config.get("secret_access_key"), + region_name=self._config.get("region", "us-east-1"), + ) + + endpoint_url = self._config.get("endpoint_url") + if endpoint_url: + self._client = session.client("dynamodb", endpoint_url=endpoint_url) + self._resource = session.resource("dynamodb", endpoint_url=endpoint_url) + else: + self._client = session.client("dynamodb") + self._resource = session.resource("dynamodb") + + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "credentials" in error_str or "access" in error_str: + raise AuthenticationFailedError( + message="AWS authentication failed", + details={"error": str(e)}, + ) from e + raise ConnectionFailedError( + message=f"Failed to connect to DynamoDB: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close DynamoDB connection.""" + self._client = None + self._resource = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test DynamoDB connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + self._client.list_tables(Limit=1) + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="DynamoDB", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def scan_collection( + self, + name: str, + filter: dict[str, Any] | None = None, + limit: int = 100, + ) -> QueryResult: + """Scan a DynamoDB table.""" + if not self._connected or not self._client: + raise ConnectionFailedError(message="Not connected to DynamoDB") + + start_time = time.time() + try: + scan_params = {"TableName": name, "Limit": limit} + + if filter: + filter_expression_parts = [] + expression_values = {} + expression_names = {} + + for i, (key, value) in enumerate(filter.items()): + placeholder = f":val{i}" + name_placeholder = f"#attr{i}" + filter_expression_parts.append(f"{name_placeholder} = {placeholder}") + expression_values[placeholder] = self._serialize_value(value) + expression_names[name_placeholder] = key + + if filter_expression_parts: + scan_params["FilterExpression"] = " AND ".join(filter_expression_parts) + scan_params["ExpressionAttributeValues"] = expression_values + scan_params["ExpressionAttributeNames"] = expression_names + + response = self._client.scan(**scan_params) + items = response.get("Items", []) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not items: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + all_keys = set() + for item in items: + all_keys.update(item.keys()) + + columns = [{"name": key, "data_type": "string"} for key in sorted(all_keys)] + rows = [self._deserialize_item(item) for item in items] + + return QueryResult( + columns=columns, + rows=rows, + row_count=len(rows), + truncated=len(items) >= limit, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "accessdenied" in error_str or "not authorized" in error_str: + raise AccessDeniedError(message=str(e)) from e + elif "timeout" in error_str: + raise QueryTimeoutError(message=str(e), timeout_seconds=30) from e + raise + + def _serialize_value(self, value: Any) -> dict[str, Any]: + """Serialize a Python value to DynamoDB format.""" + if isinstance(value, str): + return {"S": value} + elif isinstance(value, bool): + return {"BOOL": value} + elif isinstance(value, int | float): + return {"N": str(value)} + elif isinstance(value, bytes): + return {"B": value} + elif isinstance(value, list): + return {"L": [self._serialize_value(v) for v in value]} + elif isinstance(value, dict): + return {"M": {k: self._serialize_value(v) for k, v in value.items()}} + elif value is None: + return {"NULL": True} + return {"S": str(value)} + + def _deserialize_item(self, item: dict[str, Any]) -> dict[str, Any]: + """Deserialize a DynamoDB item to Python dict.""" + result = {} + for key, value in item.items(): + result[key] = self._deserialize_value(value) + return result + + def _deserialize_value(self, value: dict[str, Any]) -> Any: + """Deserialize a DynamoDB value.""" + if "S" in value: + return value["S"] + elif "N" in value: + num_str = value["N"] + return float(num_str) if "." in num_str else int(num_str) + elif "B" in value: + return value["B"] + elif "BOOL" in value: + return value["BOOL"] + elif "NULL" in value: + return None + elif "L" in value: + return [self._deserialize_value(v) for v in value["L"]] + elif "M" in value: + return {k: self._deserialize_value(v) for k, v in value["M"].items()} + elif "SS" in value: + return value["SS"] + elif "NS" in value: + return [float(n) if "." in n else int(n) for n in value["NS"]] + elif "BS" in value: + return value["BS"] + return str(value) + + def _infer_type(self, value: dict[str, Any]) -> NormalizedType: + """Infer normalized type from DynamoDB value.""" + for dynamo_type, normalized in DYNAMODB_TYPE_MAP.items(): + if dynamo_type in value: + return normalized + return NormalizedType.UNKNOWN + + async def sample( + self, + name: str, + n: int = 100, + ) -> QueryResult: + """Sample documents from a DynamoDB table.""" + return await self.scan_collection(name, limit=n) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get DynamoDB schema by listing tables and inferring column types.""" + if not self._connected or not self._client: + raise ConnectionFailedError(message="Not connected to DynamoDB") + + try: + tables_list = [] + exclusive_start = None + table_prefix = self._config.get("table_prefix", "") + + while True: + params = {"Limit": 100} + if exclusive_start: + params["ExclusiveStartTableName"] = exclusive_start + + response = self._client.list_tables(**params) + table_names = response.get("TableNames", []) + + for table_name in table_names: + if table_prefix and not table_name.startswith(table_prefix): + continue + + if filter and filter.table_pattern: + if filter.table_pattern not in table_name: + continue + + tables_list.append(table_name) + + exclusive_start = response.get("LastEvaluatedTableName") + if not exclusive_start: + break + + if filter and filter.max_tables and len(tables_list) >= filter.max_tables: + tables_list = tables_list[: filter.max_tables] + break + + tables = [] + for table_name in tables_list: + try: + desc_response = self._client.describe_table(TableName=table_name) + table_desc = desc_response.get("Table", {}) + + key_schema = table_desc.get("KeySchema", []) + pk_names = {k["AttributeName"] for k in key_schema if k["KeyType"] == "HASH"} + sk_names = {k["AttributeName"] for k in key_schema if k["KeyType"] == "RANGE"} + + attr_defs = table_desc.get("AttributeDefinitions", []) + attr_types = {a["AttributeName"]: a["AttributeType"] for a in attr_defs} + + columns = [] + for attr_name, attr_type in attr_types.items(): + columns.append( + { + "name": attr_name, + "data_type": DYNAMODB_TYPE_MAP.get( + attr_type, NormalizedType.UNKNOWN + ), + "native_type": attr_type, + "nullable": attr_name not in pk_names, + "is_primary_key": attr_name in pk_names, + "is_partition_key": attr_name in sk_names, + } + ) + + scan_response = self._client.scan(TableName=table_name, Limit=10) + sample_items = scan_response.get("Items", []) + + inferred_columns = set() + for item in sample_items: + for key, value in item.items(): + if key not in attr_types and key not in inferred_columns: + inferred_columns.add(key) + columns.append( + { + "name": key, + "data_type": self._infer_type(value), + "native_type": list(value.keys())[0] + if value + else "UNKNOWN", + "nullable": True, + "is_primary_key": False, + "is_partition_key": False, + } + ) + + item_count = table_desc.get("ItemCount") + table_size = table_desc.get("TableSizeBytes") + + tables.append( + { + "name": table_name, + "table_type": "collection", + "native_type": "DYNAMODB_TABLE", + "native_path": table_name, + "columns": columns, + "row_count": item_count, + "size_bytes": table_size, + } + ) + + except Exception: + tables.append( + { + "name": table_name, + "table_type": "collection", + "native_type": "DYNAMODB_TABLE", + "native_path": table_name, + "columns": [], + } + ) + + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": self._config.get("region", "default"), + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "dynamodb", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch DynamoDB schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/document/mongodb.py b/backend/src/dataing/adapters/datasource/document/mongodb.py new file mode 100644 index 000000000..41cf6a642 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/document/mongodb.py @@ -0,0 +1,507 @@ +"""MongoDB adapter implementation. + +This module provides a MongoDB adapter that implements the unified +data source interface with schema inference and document scanning. +""" + +from __future__ import annotations + +import time +from datetime import datetime +from typing import Any + +from dataing.adapters.datasource.document.base import DocumentAdapter +from dataing.adapters.datasource.errors import ( + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +MONGODB_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + ], + fields=[ + ConfigField( + name="connection_string", + label="Connection String", + type="secret", + required=True, + group="connection", + placeholder="mongodb+srv://user:pass@cluster.mongodb.net/db", + description="Full MongoDB connection URI", + ), + ConfigField( + name="database", + label="Database", + type="string", + required=True, + group="connection", + description="Database to connect to", + ), + ], +) + +MONGODB_CAPABILITIES = AdapterCapabilities( + supports_sql=False, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=False, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.MQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.MONGODB, + display_name="MongoDB", + category=SourceCategory.DATABASE, + icon="mongodb", + description="Connect to MongoDB for document-oriented data querying", + capabilities=MONGODB_CAPABILITIES, + config_schema=MONGODB_CONFIG_SCHEMA, +) +class MongoDBAdapter(DocumentAdapter): + """MongoDB database adapter. + + Provides schema inference and document scanning for MongoDB. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize MongoDB adapter. + + Args: + config: Configuration dictionary with: + - connection_string: MongoDB connection URI + - database: Database name + """ + super().__init__(config) + self._client: Any = None + self._db: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.MONGODB + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return MONGODB_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to MongoDB.""" + try: + from motor.motor_asyncio import AsyncIOMotorClient + except ImportError as e: + raise ConnectionFailedError( + message="motor is not installed. Install with: pip install motor", + details={"error": str(e)}, + ) from e + + try: + connection_string = self._config.get("connection_string", "") + database = self._config.get("database", "") + + self._client = AsyncIOMotorClient( + connection_string, + serverSelectionTimeoutMS=30000, + ) + self._db = self._client[database] + + # Test connection + await self._client.admin.command("ping") + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "authentication" in error_str: + raise AuthenticationFailedError( + message="Authentication failed for MongoDB", + details={"error": str(e)}, + ) from e + elif "timeout" in error_str or "timed out" in error_str: + raise ConnectionTimeoutError( + message="Connection to MongoDB timed out", + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to MongoDB: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close MongoDB connection.""" + if self._client: + self._client.close() + self._client = None + self._db = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test MongoDB connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + # Get server info + info = await self._client.server_info() + version = info.get("version", "Unknown") + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"MongoDB {version}", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def scan_collection( + self, + collection: str, + filter: dict[str, Any] | None = None, + limit: int = 100, + skip: int = 0, + ) -> QueryResult: + """Scan documents from a collection.""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + start_time = time.time() + coll = self._db[collection] + + query_filter = filter or {} + cursor = coll.find(query_filter).skip(skip).limit(limit) + docs = await cursor.to_list(length=limit) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not docs: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get all unique keys from documents + all_keys: set[str] = set() + for doc in docs: + all_keys.update(doc.keys()) + + columns = [{"name": key, "data_type": "json"} for key in sorted(all_keys)] + + # Convert documents to serializable dicts + row_dicts = [] + for doc in docs: + row = {} + for key, value in doc.items(): + row[key] = self._serialize_value(value) + row_dicts.append(row) + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + execution_time_ms=execution_time_ms, + ) + + def _serialize_value(self, value: Any) -> Any: + """Convert MongoDB values to JSON-serializable format.""" + from bson import ObjectId + + if isinstance(value, ObjectId): + return str(value) + elif isinstance(value, datetime): + return value.isoformat() + elif isinstance(value, bytes): + return value.decode("utf-8", errors="replace") + elif isinstance(value, dict): + return {k: self._serialize_value(v) for k, v in value.items()} + elif isinstance(value, list): + return [self._serialize_value(v) for v in value] + else: + return value + + async def sample( + self, + collection: str, + n: int = 100, + ) -> QueryResult: + """Get a random sample of documents.""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + start_time = time.time() + coll = self._db[collection] + + # Use $sample aggregation + pipeline = [{"$sample": {"size": n}}] + cursor = coll.aggregate(pipeline) + docs = await cursor.to_list(length=n) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not docs: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get all unique keys + all_keys: set[str] = set() + for doc in docs: + all_keys.update(doc.keys()) + + columns = [{"name": key, "data_type": "json"} for key in sorted(all_keys)] + + row_dicts = [] + for doc in docs: + row = {key: self._serialize_value(value) for key, value in doc.items()} + row_dicts.append(row) + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + execution_time_ms=execution_time_ms, + ) + + async def count_documents( + self, + collection: str, + filter: dict[str, Any] | None = None, + ) -> int: + """Count documents in a collection.""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + coll = self._db[collection] + query_filter = filter or {} + count: int = await coll.count_documents(query_filter) + return count + + async def aggregate( + self, + collection: str, + pipeline: list[dict[str, Any]], + ) -> QueryResult: + """Execute an aggregation pipeline.""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + start_time = time.time() + coll = self._db[collection] + + cursor = coll.aggregate(pipeline) + docs = await cursor.to_list(length=1000) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not docs: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get all unique keys + all_keys: set[str] = set() + for doc in docs: + all_keys.update(doc.keys()) + + columns = [{"name": key, "data_type": "json"} for key in sorted(all_keys)] + + row_dicts = [] + for doc in docs: + row = {key: self._serialize_value(value) for key, value in doc.items()} + row_dicts.append(row) + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + execution_time_ms=execution_time_ms, + ) + + async def infer_schema( + self, + collection: str, + sample_size: int = 100, + ) -> dict[str, Any]: + """Infer schema from document samples.""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + sample_result = await self.sample(collection, sample_size) + + # Track field types across all documents + field_types: dict[str, set[str]] = {} + + for doc in sample_result.rows: + for key, value in doc.items(): + if key not in field_types: + field_types[key] = set() + field_types[key].add(self._infer_type(value)) + + # Build schema + schema: dict[str, Any] = { + "collection": collection, + "fields": {}, + } + + for field, types in field_types.items(): + # If multiple types, use the most common or 'mixed' + if len(types) == 1: + schema["fields"][field] = list(types)[0] + else: + schema["fields"][field] = "mixed" + + return schema + + def _infer_type(self, value: Any) -> str: + """Infer the type of a value.""" + if value is None: + return "null" + elif isinstance(value, bool): + return "boolean" + elif isinstance(value, int): + return "integer" + elif isinstance(value, float): + return "float" + elif isinstance(value, str): + return "string" + elif isinstance(value, list): + return "array" + elif isinstance(value, dict): + return "object" + else: + return "unknown" + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get MongoDB schema (collections with inferred types).""" + if not self._connected or not self._db: + raise ConnectionFailedError(message="Not connected to MongoDB") + + try: + # List collections + collections = await self._db.list_collection_names() + + # Apply filter if provided + if filter and filter.table_pattern: + import fnmatch + + pattern = filter.table_pattern.replace("%", "*") + collections = [c for c in collections if fnmatch.fnmatch(c, pattern)] + + # Limit collections + max_tables = filter.max_tables if filter else 1000 + collections = collections[:max_tables] + + # Build tables with inferred schemas + tables = [] + for coll_name in collections: + # Skip system collections + if coll_name.startswith("system."): + continue + + try: + # Sample documents to infer schema + schema_info = await self.infer_schema(coll_name, sample_size=50) + + # Get document count + count = await self.count_documents(coll_name) + + # Build columns from inferred schema + columns = [] + for field_name, field_type in schema_info.get("fields", {}).items(): + normalized_type = normalize_type(field_type, SourceType.MONGODB) + columns.append( + { + "name": field_name, + "data_type": normalized_type, + "native_type": field_type, + "nullable": True, + "is_primary_key": field_name == "_id", + "is_partition_key": False, + } + ) + + tables.append( + { + "name": coll_name, + "table_type": "collection", + "native_type": "COLLECTION", + "native_path": f"{self._config.get('database', 'db')}.{coll_name}", + "columns": columns, + "row_count": count, + } + ) + except Exception: + # If we can't infer schema, add empty table + tables.append( + { + "name": coll_name, + "table_type": "collection", + "native_type": "COLLECTION", + "native_path": f"{self._config.get('database', 'db')}.{coll_name}", + "columns": [], + } + ) + + # Build catalog structure + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": self._config.get("database", "default"), + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "mongodb", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch MongoDB schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/errors.py b/backend/src/dataing/adapters/datasource/errors.py new file mode 100644 index 000000000..2b7defe4b --- /dev/null +++ b/backend/src/dataing/adapters/datasource/errors.py @@ -0,0 +1,406 @@ +"""Error definitions for the adapter layer. + +This module defines all adapter-specific exceptions with consistent +error codes that can be mapped across all source types. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + + +class ErrorCode(str, Enum): + """Standardized error codes for all adapters.""" + + # Connection errors + CONNECTION_FAILED = "CONNECTION_FAILED" + CONNECTION_TIMEOUT = "CONNECTION_TIMEOUT" + AUTHENTICATION_FAILED = "AUTHENTICATION_FAILED" + SSL_ERROR = "SSL_ERROR" + + # Permission errors + ACCESS_DENIED = "ACCESS_DENIED" + INSUFFICIENT_PERMISSIONS = "INSUFFICIENT_PERMISSIONS" + + # Query errors + QUERY_SYNTAX_ERROR = "QUERY_SYNTAX_ERROR" + QUERY_TIMEOUT = "QUERY_TIMEOUT" + QUERY_CANCELLED = "QUERY_CANCELLED" + RESOURCE_EXHAUSTED = "RESOURCE_EXHAUSTED" + + # Rate limiting + RATE_LIMITED = "RATE_LIMITED" + + # Schema errors + TABLE_NOT_FOUND = "TABLE_NOT_FOUND" + COLUMN_NOT_FOUND = "COLUMN_NOT_FOUND" + SCHEMA_FETCH_FAILED = "SCHEMA_FETCH_FAILED" + + # Configuration errors + INVALID_CONFIG = "INVALID_CONFIG" + MISSING_REQUIRED_FIELD = "MISSING_REQUIRED_FIELD" + + # Internal errors + INTERNAL_ERROR = "INTERNAL_ERROR" + NOT_IMPLEMENTED = "NOT_IMPLEMENTED" + + +class AdapterError(Exception): + """Base exception for all adapter errors. + + Attributes: + code: Standardized error code. + message: Human-readable error message. + details: Additional error details. + retryable: Whether the operation can be retried. + retry_after_seconds: Suggested wait time before retry. + """ + + def __init__( + self, + code: ErrorCode, + message: str, + details: dict[str, Any] | None = None, + retryable: bool = False, + retry_after_seconds: int | None = None, + ) -> None: + """Initialize the adapter error.""" + super().__init__(message) + self.code = code + self.message = message + self.details = details or {} + self.retryable = retryable + self.retry_after_seconds = retry_after_seconds + + def to_dict(self) -> dict[str, Any]: + """Convert error to dictionary for API response.""" + return { + "error": { + "code": self.code.value, + "message": self.message, + "details": self.details if self.details else None, + "retryable": self.retryable, + "retry_after_seconds": self.retry_after_seconds, + } + } + + +class ConnectionFailedError(AdapterError): + """Failed to establish connection to data source.""" + + def __init__( + self, + message: str = "Failed to connect to data source", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize connection failed error.""" + super().__init__( + code=ErrorCode.CONNECTION_FAILED, + message=message, + details=details, + retryable=True, + ) + + +class ConnectionTimeoutError(AdapterError): + """Connection attempt timed out.""" + + def __init__( + self, + message: str = "Connection timed out", + timeout_seconds: int | None = None, + ) -> None: + """Initialize connection timeout error.""" + super().__init__( + code=ErrorCode.CONNECTION_TIMEOUT, + message=message, + details={"timeout_seconds": timeout_seconds} if timeout_seconds else None, + retryable=True, + ) + + +class AuthenticationFailedError(AdapterError): + """Authentication credentials were rejected.""" + + def __init__( + self, + message: str = "Authentication failed", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize authentication failed error.""" + super().__init__( + code=ErrorCode.AUTHENTICATION_FAILED, + message=message, + details=details, + retryable=False, + ) + + +class SSLError(AdapterError): + """SSL/TLS connection error.""" + + def __init__( + self, + message: str = "SSL connection error", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize SSL error.""" + super().__init__( + code=ErrorCode.SSL_ERROR, + message=message, + details=details, + retryable=False, + ) + + +class AccessDeniedError(AdapterError): + """Access to resource was denied.""" + + def __init__( + self, + message: str = "Access denied", + resource: str | None = None, + ) -> None: + """Initialize access denied error.""" + super().__init__( + code=ErrorCode.ACCESS_DENIED, + message=message, + details={"resource": resource} if resource else None, + retryable=False, + ) + + +class InsufficientPermissionsError(AdapterError): + """User lacks required permissions.""" + + def __init__( + self, + message: str = "Insufficient permissions", + required_permission: str | None = None, + ) -> None: + """Initialize insufficient permissions error.""" + super().__init__( + code=ErrorCode.INSUFFICIENT_PERMISSIONS, + message=message, + details={"required_permission": required_permission} if required_permission else None, + retryable=False, + ) + + +class QuerySyntaxError(AdapterError): + """Query syntax is invalid.""" + + def __init__( + self, + message: str = "Query syntax error", + query: str | None = None, + position: int | None = None, + ) -> None: + """Initialize query syntax error.""" + details: dict[str, Any] = {} + if query: + details["query_preview"] = query[:200] if len(query) > 200 else query + if position: + details["position"] = position + super().__init__( + code=ErrorCode.QUERY_SYNTAX_ERROR, + message=message, + details=details if details else None, + retryable=False, + ) + + +class QueryTimeoutError(AdapterError): + """Query execution timed out.""" + + def __init__( + self, + message: str = "Query timed out", + timeout_seconds: int | None = None, + ) -> None: + """Initialize query timeout error.""" + super().__init__( + code=ErrorCode.QUERY_TIMEOUT, + message=message, + details={"timeout_seconds": timeout_seconds} if timeout_seconds else None, + retryable=True, + ) + + +class QueryCancelledError(AdapterError): + """Query was cancelled.""" + + def __init__( + self, + message: str = "Query was cancelled", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize query cancelled error.""" + super().__init__( + code=ErrorCode.QUERY_CANCELLED, + message=message, + details=details, + retryable=True, + ) + + +class ResourceExhaustedError(AdapterError): + """Resource limits exceeded.""" + + def __init__( + self, + message: str = "Resource limits exceeded", + resource_type: str | None = None, + ) -> None: + """Initialize resource exhausted error.""" + super().__init__( + code=ErrorCode.RESOURCE_EXHAUSTED, + message=message, + details={"resource_type": resource_type} if resource_type else None, + retryable=True, + retry_after_seconds=60, + ) + + +class RateLimitedError(AdapterError): + """Request was rate limited.""" + + def __init__( + self, + message: str = "Rate limit exceeded", + retry_after_seconds: int = 60, + ) -> None: + """Initialize rate limited error.""" + super().__init__( + code=ErrorCode.RATE_LIMITED, + message=message, + retryable=True, + retry_after_seconds=retry_after_seconds, + ) + + +class TableNotFoundError(AdapterError): + """Table or collection not found.""" + + def __init__( + self, + table_name: str, + message: str | None = None, + ) -> None: + """Initialize table not found error.""" + super().__init__( + code=ErrorCode.TABLE_NOT_FOUND, + message=message or f"Table not found: {table_name}", + details={"table_name": table_name}, + retryable=False, + ) + + +class ColumnNotFoundError(AdapterError): + """Column not found in table.""" + + def __init__( + self, + column_name: str, + table_name: str | None = None, + message: str | None = None, + ) -> None: + """Initialize column not found error.""" + details: dict[str, Any] = {"column_name": column_name} + if table_name: + details["table_name"] = table_name + super().__init__( + code=ErrorCode.COLUMN_NOT_FOUND, + message=message or f"Column not found: {column_name}", + details=details, + retryable=False, + ) + + +class SchemaFetchFailedError(AdapterError): + """Failed to fetch schema from data source.""" + + def __init__( + self, + message: str = "Failed to fetch schema", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize schema fetch failed error.""" + super().__init__( + code=ErrorCode.SCHEMA_FETCH_FAILED, + message=message, + details=details, + retryable=True, + ) + + +class InvalidConfigError(AdapterError): + """Configuration is invalid.""" + + def __init__( + self, + message: str = "Invalid configuration", + field: str | None = None, + ) -> None: + """Initialize invalid config error.""" + super().__init__( + code=ErrorCode.INVALID_CONFIG, + message=message, + details={"field": field} if field else None, + retryable=False, + ) + + +class MissingRequiredFieldError(AdapterError): + """Required configuration field is missing.""" + + def __init__( + self, + field: str, + message: str | None = None, + ) -> None: + """Initialize missing required field error.""" + super().__init__( + code=ErrorCode.MISSING_REQUIRED_FIELD, + message=message or f"Missing required field: {field}", + details={"field": field}, + retryable=False, + ) + + +class NotImplementedError(AdapterError): + """Feature is not implemented for this adapter.""" + + def __init__( + self, + feature: str, + adapter_type: str | None = None, + ) -> None: + """Initialize not implemented error.""" + message = f"Feature not implemented: {feature}" + if adapter_type: + message = f"Feature not implemented for {adapter_type}: {feature}" + super().__init__( + code=ErrorCode.NOT_IMPLEMENTED, + message=message, + details={"feature": feature, "adapter_type": adapter_type}, + retryable=False, + ) + + +class InternalError(AdapterError): + """Internal adapter error.""" + + def __init__( + self, + message: str = "Internal error", + details: dict[str, Any] | None = None, + ) -> None: + """Initialize internal error.""" + super().__init__( + code=ErrorCode.INTERNAL_ERROR, + message=message, + details=details, + retryable=False, + ) diff --git a/backend/src/dataing/adapters/datasource/filesystem/__init__.py b/backend/src/dataing/adapters/datasource/filesystem/__init__.py new file mode 100644 index 000000000..780ad027c --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/__init__.py @@ -0,0 +1,12 @@ +"""File system adapters. + +This module provides adapters for file system data sources: +- S3 +- GCS +- HDFS +- Local files +""" + +from dataing.adapters.datasource.filesystem.base import FileSystemAdapter + +__all__ = ["FileSystemAdapter"] diff --git a/backend/src/dataing/adapters/datasource/filesystem/base.py b/backend/src/dataing/adapters/datasource/filesystem/base.py new file mode 100644 index 000000000..46f2858c6 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/base.py @@ -0,0 +1,139 @@ +"""Base class for file system adapters. + +This module provides the abstract base class for all file system +data source adapters. +""" + +from __future__ import annotations + +from abc import abstractmethod +from dataclasses import dataclass + +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + QueryLanguage, + QueryResult, + Table, +) + + +@dataclass +class FileInfo: + """Information about a file.""" + + path: str + name: str + size_bytes: int + last_modified: str | None = None + file_format: str | None = None + + +class FileSystemAdapter(BaseAdapter): + """Abstract base class for file system adapters. + + Extends BaseAdapter with file listing and reading capabilities. + File system adapters typically delegate actual reading to DuckDB. + """ + + @property + def capabilities(self) -> AdapterCapabilities: + """File system adapters support SQL via DuckDB.""" + return AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, + ) + + @abstractmethod + async def list_files( + self, + pattern: str = "*", + recursive: bool = True, + ) -> list[FileInfo]: + """List files matching a pattern. + + Args: + pattern: Glob pattern to match files. + recursive: Whether to search recursively. + + Returns: + List of FileInfo objects. + """ + ... + + @abstractmethod + async def read_file( + self, + path: str, + file_format: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Read a file and return as QueryResult. + + Args: + path: Path to the file. + file_format: Format (parquet, csv, json). Auto-detected if None. + limit: Maximum rows to return. + + Returns: + QueryResult with file contents. + """ + ... + + @abstractmethod + async def infer_schema( + self, + path: str, + file_format: str | None = None, + ) -> Table: + """Infer schema from a file. + + Args: + path: Path to the file. + file_format: Format (parquet, csv, json). Auto-detected if None. + + Returns: + Table with column definitions. + """ + ... + + async def preview( + self, + path: str, + n: int = 100, + ) -> QueryResult: + """Get a preview of a file. + + Args: + path: Path to the file. + n: Number of rows to preview. + + Returns: + QueryResult with preview data. + """ + return await self.read_file(path, limit=n) + + async def sample( + self, + path: str, + n: int = 100, + ) -> QueryResult: + """Get a sample from a file. + + For most file formats, sampling is equivalent to preview + unless the underlying system supports random sampling. + + Args: + path: Path to the file. + n: Number of rows to sample. + + Returns: + QueryResult with sampled data. + """ + return await self.read_file(path, limit=n) diff --git a/backend/src/dataing/adapters/datasource/filesystem/gcs.py b/backend/src/dataing/adapters/datasource/filesystem/gcs.py new file mode 100644 index 000000000..424024ba8 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/gcs.py @@ -0,0 +1,540 @@ +"""Google Cloud Storage adapter implementation. + +This module provides a GCS adapter that implements the unified +data source interface by using DuckDB to query files stored in GCS. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.filesystem.base import FileSystemAdapter +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +GCS_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="location", label="Bucket Location", collapsed_by_default=False), + FieldGroup(id="auth", label="GCP Credentials", collapsed_by_default=False), + FieldGroup(id="format", label="File Format", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="bucket", + label="Bucket Name", + type="string", + required=True, + group="location", + placeholder="my-data-bucket", + ), + ConfigField( + name="prefix", + label="Path Prefix", + type="string", + required=False, + group="location", + placeholder="data/warehouse/", + description="Optional path prefix to limit scope", + ), + ConfigField( + name="credentials_json", + label="Service Account JSON", + type="secret", + required=True, + group="auth", + description="Service account credentials JSON content", + ), + ConfigField( + name="file_format", + label="Default File Format", + type="enum", + required=False, + group="format", + default_value="auto", + options=[ + {"value": "auto", "label": "Auto-detect"}, + {"value": "parquet", "label": "Parquet"}, + {"value": "csv", "label": "CSV"}, + {"value": "json", "label": "JSON/JSONL"}, + ], + ), + ], +) + +GCS_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.GCS, + display_name="Google Cloud Storage", + category=SourceCategory.FILESYSTEM, + icon="gcs", + description="Query Parquet, CSV, and JSON files stored in Google Cloud Storage", + capabilities=GCS_CAPABILITIES, + config_schema=GCS_CONFIG_SCHEMA, +) +class GCSAdapter(FileSystemAdapter): + """Google Cloud Storage adapter. + + Uses DuckDB with GCS extension to query files stored in GCS buckets. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize GCS adapter. + + Args: + config: Configuration dictionary with: + - bucket: GCS bucket name + - prefix: Optional path prefix + - credentials_json: Service account JSON credentials + - file_format: Default file format (auto, parquet, csv, json) + """ + super().__init__(config) + self._conn: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.GCS + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return GCS_CAPABILITIES + + def _get_gcs_path(self, path: str = "") -> str: + """Construct full GCS path.""" + bucket = self._config.get("bucket", "") + prefix = self._config.get("prefix", "").strip("/") + + if path: + if prefix: + return f"gs://{bucket}/{prefix}/{path}" + return f"gs://{bucket}/{path}" + elif prefix: + return f"gs://{bucket}/{prefix}/" + return f"gs://{bucket}/" + + async def connect(self) -> None: + """Establish connection to GCS via DuckDB.""" + try: + import duckdb + except ImportError as e: + raise ConnectionFailedError( + message="duckdb is not installed. Install with: pip install duckdb", + details={"error": str(e)}, + ) from e + + try: + self._conn = duckdb.connect(":memory:") + + self._conn.execute("INSTALL httpfs") + self._conn.execute("LOAD httpfs") + + credentials_json = self._config.get("credentials_json", "") + if credentials_json: + import json + import os + import tempfile + + creds = ( + json.loads(credentials_json) + if isinstance(credentials_json, str) + else credentials_json + ) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump(creds, f) + creds_path = f.name + + try: + self._conn.execute(f"SET gcs_service_account_key_file = '{creds_path}'") + finally: + os.unlink(creds_path) + + self._connected = True + + except Exception as e: + error_str = str(e).lower() + if "credentials" in error_str or "authentication" in error_str: + raise AuthenticationFailedError( + message="GCS authentication failed", + details={"error": str(e)}, + ) from e + raise ConnectionFailedError( + message=f"Failed to connect to GCS: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close GCS connection.""" + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test GCS connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + self._config.get("bucket", "") + self._config.get("prefix", "") + + gcs_path = self._get_gcs_path() + + try: + self._conn.execute(f"SELECT * FROM glob('{gcs_path}*.parquet') LIMIT 1") + except Exception: + pass + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="GCS via DuckDB", + message="Connection successful", + ) + + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + error_str = str(e).lower() + + if "accessdenied" in error_str or "forbidden" in error_str: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="Access denied to GCS bucket", + error_code="ACCESS_DENIED", + ) + elif "nosuchbucket" in error_str or "not found" in error_str: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="GCS bucket not found", + error_code="CONNECTION_FAILED", + ) + + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_files(self, pattern: str = "*") -> list[dict[str, Any]]: + """List files in the GCS bucket.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to GCS") + + try: + gcs_path = self._get_gcs_path() + full_pattern = f"{gcs_path}{pattern}" + + result = self._conn.execute(f"SELECT * FROM glob('{full_pattern}')").fetchall() + + files = [] + for row in result: + filepath = row[0] + filename = filepath.split("/")[-1] + files.append( + { + "path": filepath, + "name": filename, + "size": None, + } + ) + + return files + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to list GCS files: {str(e)}", + details={"error": str(e)}, + ) from e + + async def read_file( + self, + path: str, + format: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Read a file from GCS.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to GCS") + + start_time = time.time() + try: + file_format = format or self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + elif path.endswith(".json") or path.endswith(".jsonl"): + file_format = "json" + else: + file_format = "parquet" + + if file_format == "parquet": + sql = f"SELECT * FROM read_parquet('{path}') LIMIT {limit}" + elif file_format == "csv": + sql = f"SELECT * FROM read_csv_auto('{path}') LIMIT {limit}" + else: + sql = f"SELECT * FROM read_json_auto('{path}') LIMIT {limit}" + + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=len(rows) >= limit, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=path) from e + elif "accessdenied" in error_str: + raise AccessDeniedError(message=str(e)) from e + raise + + def _map_duckdb_type(self, type_code: Any) -> str: + """Map DuckDB type code to string representation.""" + if type_code is None: + return "unknown" + type_str = str(type_code).lower() + result: str = normalize_type(type_str, SourceType.DUCKDB).value + return result + + async def infer_schema(self, path: str) -> dict[str, Any]: + """Infer schema from a GCS file.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to GCS") + + try: + file_format = self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + else: + file_format = "json" + + if file_format == "parquet": + sql = f"DESCRIBE SELECT * FROM read_parquet('{path}')" + elif file_format == "csv": + sql = f"DESCRIBE SELECT * FROM read_csv_auto('{path}')" + else: + sql = f"DESCRIBE SELECT * FROM read_json_auto('{path}')" + + result = self._conn.execute(sql) + rows = result.fetchall() + + columns = [] + for row in rows: + col_name = row[0] + col_type = row[1] + columns.append( + { + "name": col_name, + "data_type": normalize_type(col_type, SourceType.DUCKDB), + "native_type": col_type, + "nullable": True, + "is_primary_key": False, + "is_partition_key": False, + } + ) + + filename = path.split("/")[-1] + table_name = filename.rsplit(".", 1)[0].replace("-", "_").replace(" ", "_") + + return { + "name": table_name, + "table_type": "file", + "native_type": f"GCS_{file_format.upper()}_FILE", + "native_path": path, + "columns": columns, + } + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to infer schema from {path}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against GCS files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to GCS") + + start_time = time.time() + try: + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=sql[:200]) from e + elif "timeout" in error_str: + raise QueryTimeoutError(message=str(e), timeout_seconds=timeout_seconds) from e + raise + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get GCS schema by discovering files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to GCS") + + try: + file_extensions = ["*.parquet", "*.csv", "*.json", "*.jsonl"] + all_files = [] + + for ext in file_extensions: + try: + files = await self.list_files(ext) + all_files.extend(files) + except Exception: + pass + + if filter and filter.table_pattern: + all_files = [f for f in all_files if filter.table_pattern in f["name"]] + + if filter and filter.max_tables: + all_files = all_files[: filter.max_tables] + + tables = [] + for file_info in all_files: + try: + table_def = await self.infer_schema(file_info["path"]) + tables.append(table_def) + except Exception: + tables.append( + { + "name": file_info["name"].rsplit(".", 1)[0], + "table_type": "file", + "native_type": "GCS_FILE", + "native_path": file_info["path"], + "columns": [], + } + ) + + bucket = self._config.get("bucket", "default") + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": bucket, + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "gcs", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch GCS schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/filesystem/hdfs.py b/backend/src/dataing/adapters/datasource/filesystem/hdfs.py new file mode 100644 index 000000000..fee10257d --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/hdfs.py @@ -0,0 +1,556 @@ +"""HDFS (Hadoop Distributed File System) adapter implementation. + +This module provides an HDFS adapter that implements the unified +data source interface by using DuckDB to query files stored in HDFS. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.filesystem.base import FileSystemAdapter +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +HDFS_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="HDFS Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=True), + FieldGroup(id="format", label="File Format", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="namenode_host", + label="NameNode Host", + type="string", + required=True, + group="connection", + placeholder="namenode.example.com", + description="HDFS NameNode hostname", + ), + ConfigField( + name="namenode_port", + label="NameNode Port", + type="integer", + required=True, + group="connection", + default_value=9000, + min_value=1, + max_value=65535, + description="HDFS NameNode port (typically 9000 or 8020)", + ), + ConfigField( + name="path", + label="Base Path", + type="string", + required=True, + group="connection", + placeholder="/user/data/warehouse", + description="Base HDFS path to query", + ), + ConfigField( + name="username", + label="Username", + type="string", + required=False, + group="auth", + description="HDFS username (for simple auth)", + ), + ConfigField( + name="kerberos_enabled", + label="Kerberos Authentication", + type="boolean", + required=False, + group="auth", + default_value=False, + ), + ConfigField( + name="kerberos_principal", + label="Kerberos Principal", + type="string", + required=False, + group="auth", + placeholder="user@REALM.COM", + show_if={"field": "kerberos_enabled", "value": True}, + ), + ConfigField( + name="file_format", + label="Default File Format", + type="enum", + required=False, + group="format", + default_value="auto", + options=[ + {"value": "auto", "label": "Auto-detect"}, + {"value": "parquet", "label": "Parquet"}, + {"value": "csv", "label": "CSV"}, + {"value": "json", "label": "JSON/JSONL"}, + {"value": "orc", "label": "ORC"}, + ], + ), + ], +) + +HDFS_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.HDFS, + display_name="HDFS", + category=SourceCategory.FILESYSTEM, + icon="hdfs", + description="Query Parquet, ORC, CSV, and JSON files stored in HDFS", + capabilities=HDFS_CAPABILITIES, + config_schema=HDFS_CONFIG_SCHEMA, +) +class HDFSAdapter(FileSystemAdapter): + """HDFS (Hadoop Distributed File System) adapter. + + Uses DuckDB with httpfs extension to query files stored in HDFS. + Note: Requires WebHDFS REST API to be enabled on the cluster. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize HDFS adapter. + + Args: + config: Configuration dictionary with: + - namenode_host: NameNode hostname + - namenode_port: NameNode port + - path: Base HDFS path + - username: Username for simple auth (optional) + - kerberos_enabled: Use Kerberos auth (optional) + - kerberos_principal: Kerberos principal (optional) + - file_format: Default file format (auto, parquet, csv, json, orc) + """ + super().__init__(config) + self._conn: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.HDFS + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return HDFS_CAPABILITIES + + def _get_hdfs_url(self, path: str = "") -> str: + """Construct HDFS URL for DuckDB access via WebHDFS.""" + host = self._config.get("namenode_host", "localhost") + port = self._config.get("namenode_port", 9000) + base_path = self._config.get("path", "/").strip("/") + username = self._config.get("username", "") + + if path: + full_path = f"{base_path}/{path}".strip("/") + else: + full_path = base_path + + if username: + return f"hdfs://{host}:{port}/{full_path}?user.name={username}" + return f"hdfs://{host}:{port}/{full_path}" + + async def connect(self) -> None: + """Establish connection to HDFS via DuckDB.""" + try: + import duckdb + except ImportError as e: + raise ConnectionFailedError( + message="duckdb is not installed. Install with: pip install duckdb", + details={"error": str(e)}, + ) from e + + try: + self._conn = duckdb.connect(":memory:") + + self._conn.execute("INSTALL httpfs") + self._conn.execute("LOAD httpfs") + + self._connected = True + + except Exception as e: + error_str = str(e).lower() + if "authentication" in error_str or "kerberos" in error_str: + raise AuthenticationFailedError( + message="HDFS authentication failed", + details={"error": str(e)}, + ) from e + raise ConnectionFailedError( + message=f"Failed to connect to HDFS: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close HDFS connection.""" + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test HDFS connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="HDFS via DuckDB", + message="Connection successful", + ) + + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + error_str = str(e).lower() + + if "permission" in error_str or "access" in error_str: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="Access denied to HDFS", + error_code="ACCESS_DENIED", + ) + elif "connection" in error_str or "refused" in error_str: + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message="Cannot connect to HDFS NameNode", + error_code="CONNECTION_FAILED", + ) + + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_files(self, pattern: str = "*") -> list[dict[str, Any]]: + """List files in the HDFS directory.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to HDFS") + + try: + hdfs_path = self._get_hdfs_url() + full_pattern = f"{hdfs_path}/{pattern}" + + try: + result = self._conn.execute(f"SELECT * FROM glob('{full_pattern}')").fetchall() + + files = [] + for row in result: + filepath = row[0] + filename = filepath.split("/")[-1] + files.append( + { + "path": filepath, + "name": filename, + "size": None, + } + ) + return files + except Exception: + return [] + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to list HDFS files: {str(e)}", + details={"error": str(e)}, + ) from e + + async def read_file( + self, + path: str, + format: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Read a file from HDFS.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to HDFS") + + start_time = time.time() + try: + file_format = format or self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + elif path.endswith(".json") or path.endswith(".jsonl"): + file_format = "json" + elif path.endswith(".orc"): + file_format = "orc" + else: + file_format = "parquet" + + if file_format == "parquet": + sql = f"SELECT * FROM read_parquet('{path}') LIMIT {limit}" + elif file_format == "csv": + sql = f"SELECT * FROM read_csv_auto('{path}') LIMIT {limit}" + elif file_format == "orc": + sql = f"SELECT * FROM read_orc('{path}') LIMIT {limit}" + else: + sql = f"SELECT * FROM read_json_auto('{path}') LIMIT {limit}" + + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=len(rows) >= limit, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=path) from e + elif "permission" in error_str or "access" in error_str: + raise AccessDeniedError(message=str(e)) from e + raise + + def _map_duckdb_type(self, type_code: Any) -> str: + """Map DuckDB type code to string representation.""" + if type_code is None: + return "unknown" + type_str = str(type_code).lower() + result: str = normalize_type(type_str, SourceType.DUCKDB).value + return result + + async def infer_schema(self, path: str) -> dict[str, Any]: + """Infer schema from an HDFS file.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to HDFS") + + try: + file_format = self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + elif path.endswith(".orc"): + file_format = "orc" + else: + file_format = "json" + + if file_format == "parquet": + sql = f"DESCRIBE SELECT * FROM read_parquet('{path}')" + elif file_format == "csv": + sql = f"DESCRIBE SELECT * FROM read_csv_auto('{path}')" + elif file_format == "orc": + sql = f"DESCRIBE SELECT * FROM read_orc('{path}')" + else: + sql = f"DESCRIBE SELECT * FROM read_json_auto('{path}')" + + result = self._conn.execute(sql) + rows = result.fetchall() + + columns = [] + for row in rows: + col_name = row[0] + col_type = row[1] + columns.append( + { + "name": col_name, + "data_type": normalize_type(col_type, SourceType.DUCKDB), + "native_type": col_type, + "nullable": True, + "is_primary_key": False, + "is_partition_key": False, + } + ) + + filename = path.split("/")[-1] + table_name = filename.rsplit(".", 1)[0].replace("-", "_").replace(" ", "_") + + return { + "name": table_name, + "table_type": "file", + "native_type": f"HDFS_{file_format.upper()}_FILE", + "native_path": path, + "columns": columns, + } + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to infer schema from {path}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against HDFS files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to HDFS") + + start_time = time.time() + try: + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=sql[:200]) from e + elif "timeout" in error_str: + raise QueryTimeoutError(message=str(e), timeout_seconds=timeout_seconds) from e + raise + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get HDFS schema by discovering files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to HDFS") + + try: + file_extensions = ["*.parquet", "*.csv", "*.json", "*.jsonl", "*.orc"] + all_files = [] + + for ext in file_extensions: + try: + files = await self.list_files(ext) + all_files.extend(files) + except Exception: + pass + + if filter and filter.table_pattern: + all_files = [f for f in all_files if filter.table_pattern in f["name"]] + + if filter and filter.max_tables: + all_files = all_files[: filter.max_tables] + + tables = [] + for file_info in all_files: + try: + table_def = await self.infer_schema(file_info["path"]) + tables.append(table_def) + except Exception: + tables.append( + { + "name": file_info["name"].rsplit(".", 1)[0], + "table_type": "file", + "native_type": "HDFS_FILE", + "native_path": file_info["path"], + "columns": [], + } + ) + + path = self._config.get("path", "/") + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": path.strip("/").replace("/", "_") or "root", + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "hdfs", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch HDFS schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/filesystem/local.py b/backend/src/dataing/adapters/datasource/filesystem/local.py new file mode 100644 index 000000000..d01570bf9 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/local.py @@ -0,0 +1,521 @@ +"""Local file system adapter implementation. + +This module provides a local file system adapter that implements the unified +data source interface by using DuckDB to query local Parquet, CSV, and JSON files. +""" + +from __future__ import annotations + +import os +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + ConnectionFailedError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.filesystem.base import FileSystemAdapter +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +LOCAL_FILE_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="location", label="File Location", collapsed_by_default=False), + FieldGroup(id="format", label="File Format", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="path", + label="Directory Path", + type="string", + required=True, + group="location", + placeholder="/path/to/data", + description="Path to directory containing data files", + ), + ConfigField( + name="recursive", + label="Include Subdirectories", + type="boolean", + required=False, + group="location", + default_value=False, + description="Search for files in subdirectories", + ), + ConfigField( + name="file_format", + label="Default File Format", + type="enum", + required=False, + group="format", + default_value="auto", + options=[ + {"value": "auto", "label": "Auto-detect"}, + {"value": "parquet", "label": "Parquet"}, + {"value": "csv", "label": "CSV"}, + {"value": "json", "label": "JSON/JSONL"}, + ], + ), + ], +) + +LOCAL_FILE_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.LOCAL_FILE, + display_name="Local Files", + category=SourceCategory.FILESYSTEM, + icon="folder", + description="Query Parquet, CSV, and JSON files from local filesystem", + capabilities=LOCAL_FILE_CAPABILITIES, + config_schema=LOCAL_FILE_CONFIG_SCHEMA, +) +class LocalFileAdapter(FileSystemAdapter): + """Local file system adapter. + + Uses DuckDB to query files stored on the local filesystem. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize local file adapter. + + Args: + config: Configuration dictionary with: + - path: Directory path containing data files + - recursive: Search subdirectories (optional) + - file_format: Default file format (auto, parquet, csv, json) + """ + super().__init__(config) + self._conn: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.LOCAL_FILE + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return LOCAL_FILE_CAPABILITIES + + def _get_base_path(self) -> str: + """Get the configured base path.""" + path = self._config.get("path", ".") + result: str = os.path.abspath(os.path.expanduser(path)) + return result + + async def connect(self) -> None: + """Establish connection to local file system via DuckDB.""" + try: + import duckdb + except ImportError as e: + raise ConnectionFailedError( + message="duckdb is not installed. Install with: pip install duckdb", + details={"error": str(e)}, + ) from e + + try: + base_path = self._get_base_path() + + if not os.path.exists(base_path): + raise ConnectionFailedError( + message=f"Directory does not exist: {base_path}", + details={"path": base_path}, + ) + + if not os.path.isdir(base_path): + raise ConnectionFailedError( + message=f"Path is not a directory: {base_path}", + details={"path": base_path}, + ) + + self._conn = duckdb.connect(":memory:") + self._connected = True + + except ConnectionFailedError: + raise + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to connect to local filesystem: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close DuckDB connection.""" + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test local filesystem connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + base_path = self._get_base_path() + + file_count = 0 + for entry in os.listdir(base_path): + if entry.endswith((".parquet", ".csv", ".json", ".jsonl")): + file_count += 1 + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="Local FS via DuckDB", + message=f"Connection successful. Found {file_count} data files.", + ) + + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_files(self, pattern: str = "*") -> list[dict[str, Any]]: + """List files in the local directory.""" + if not self._connected: + raise ConnectionFailedError(message="Not connected to local filesystem") + + try: + base_path = self._get_base_path() + recursive = self._config.get("recursive", False) + + files = [] + + if recursive: + for root, _, filenames in os.walk(base_path): + for filename in filenames: + if self._matches_pattern(filename, pattern): + filepath = os.path.join(root, filename) + try: + size = os.path.getsize(filepath) + except Exception: + size = None + files.append( + { + "path": filepath, + "name": filename, + "size": size, + } + ) + else: + for entry in os.listdir(base_path): + filepath = os.path.join(base_path, entry) + if os.path.isfile(filepath) and self._matches_pattern(entry, pattern): + try: + size = os.path.getsize(filepath) + except Exception: + size = None + files.append( + { + "path": filepath, + "name": entry, + "size": size, + } + ) + + return files + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to list files: {str(e)}", + details={"error": str(e)}, + ) from e + + def _matches_pattern(self, filename: str, pattern: str) -> bool: + """Check if filename matches the pattern.""" + import fnmatch + + return fnmatch.fnmatch(filename, pattern) + + async def read_file( + self, + path: str, + format: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Read a local file.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to local filesystem") + + start_time = time.time() + try: + file_format = format or self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + elif path.endswith(".json") or path.endswith(".jsonl"): + file_format = "json" + else: + file_format = "parquet" + + if file_format == "parquet": + sql = f"SELECT * FROM read_parquet('{path}') LIMIT {limit}" + elif file_format == "csv": + sql = f"SELECT * FROM read_csv_auto('{path}') LIMIT {limit}" + else: + sql = f"SELECT * FROM read_json_auto('{path}') LIMIT {limit}" + + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=len(rows) >= limit, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=path) from e + raise + + def _map_duckdb_type(self, type_code: Any) -> str: + """Map DuckDB type code to string representation.""" + if type_code is None: + return "unknown" + type_str = str(type_code).lower() + result: str = normalize_type(type_str, SourceType.DUCKDB).value + return result + + async def infer_schema(self, path: str) -> dict[str, Any]: + """Infer schema from a local file.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to local filesystem") + + try: + file_format = self._config.get("file_format", "auto") + + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + else: + file_format = "json" + + if file_format == "parquet": + sql = f"DESCRIBE SELECT * FROM read_parquet('{path}')" + elif file_format == "csv": + sql = f"DESCRIBE SELECT * FROM read_csv_auto('{path}')" + else: + sql = f"DESCRIBE SELECT * FROM read_json_auto('{path}')" + + result = self._conn.execute(sql) + rows = result.fetchall() + + columns = [] + for row in rows: + col_name = row[0] + col_type = row[1] + columns.append( + { + "name": col_name, + "data_type": normalize_type(col_type, SourceType.DUCKDB), + "native_type": col_type, + "nullable": True, + "is_primary_key": False, + "is_partition_key": False, + } + ) + + filename = os.path.basename(path) + table_name = filename.rsplit(".", 1)[0].replace("-", "_").replace(" ", "_") + + try: + size = os.path.getsize(path) + except Exception: + size = None + + return { + "name": table_name, + "table_type": "file", + "native_type": f"LOCAL_{file_format.upper()}_FILE", + "native_path": path, + "columns": columns, + "size_bytes": size, + } + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to infer schema from {path}: {str(e)}", + details={"error": str(e)}, + ) from e + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against local files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to local filesystem") + + start_time = time.time() + try: + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError(message=str(e), query=sql[:200]) from e + elif "timeout" in error_str: + raise QueryTimeoutError(message=str(e), timeout_seconds=timeout_seconds) from e + raise + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get local filesystem schema by discovering files.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to local filesystem") + + try: + file_extensions = ["*.parquet", "*.csv", "*.json", "*.jsonl"] + all_files = [] + + for ext in file_extensions: + try: + files = await self.list_files(ext) + all_files.extend(files) + except Exception: + pass + + if filter and filter.table_pattern: + all_files = [f for f in all_files if filter.table_pattern in f["name"]] + + if filter and filter.max_tables: + all_files = all_files[: filter.max_tables] + + tables = [] + for file_info in all_files: + try: + table_def = await self.infer_schema(file_info["path"]) + tables.append(table_def) + except Exception: + tables.append( + { + "name": file_info["name"].rsplit(".", 1)[0], + "table_type": "file", + "native_type": "LOCAL_FILE", + "native_path": file_info["path"], + "columns": [], + "size_bytes": file_info.get("size"), + } + ) + + base_path = self._get_base_path() + dir_name = os.path.basename(base_path) or "root" + + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": dir_name, + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "local", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch local filesystem schema: {str(e)}", + details={"error": str(e)}, + ) from e diff --git a/backend/src/dataing/adapters/datasource/filesystem/s3.py b/backend/src/dataing/adapters/datasource/filesystem/s3.py new file mode 100644 index 000000000..c282ac8e5 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/filesystem/s3.py @@ -0,0 +1,570 @@ +"""S3 adapter implementation. + +This module provides an S3 adapter that implements the unified +data source interface using DuckDB for file querying. +""" + +from __future__ import annotations + +import time +from datetime import datetime +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.filesystem.base import FileInfo, FileSystemAdapter +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + Column, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, + Table, +) + +S3_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="location", label="Bucket Location", collapsed_by_default=False), + FieldGroup(id="auth", label="AWS Credentials", collapsed_by_default=False), + FieldGroup(id="format", label="File Format", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="bucket", + label="Bucket Name", + type="string", + required=True, + group="location", + placeholder="my-data-bucket", + ), + ConfigField( + name="prefix", + label="Path Prefix", + type="string", + required=False, + group="location", + placeholder="data/warehouse/", + description="Optional path prefix to limit scope", + ), + ConfigField( + name="region", + label="AWS Region", + type="enum", + required=True, + group="location", + default_value="us-east-1", + options=[ + {"value": "us-east-1", "label": "US East (N. Virginia)"}, + {"value": "us-east-2", "label": "US East (Ohio)"}, + {"value": "us-west-1", "label": "US West (N. California)"}, + {"value": "us-west-2", "label": "US West (Oregon)"}, + {"value": "eu-west-1", "label": "EU (Ireland)"}, + {"value": "eu-west-2", "label": "EU (London)"}, + {"value": "eu-central-1", "label": "EU (Frankfurt)"}, + {"value": "ap-northeast-1", "label": "Asia Pacific (Tokyo)"}, + {"value": "ap-southeast-1", "label": "Asia Pacific (Singapore)"}, + ], + ), + ConfigField( + name="access_key_id", + label="Access Key ID", + type="string", + required=True, + group="auth", + ), + ConfigField( + name="secret_access_key", + label="Secret Access Key", + type="secret", + required=True, + group="auth", + ), + ConfigField( + name="file_format", + label="Default File Format", + type="enum", + required=False, + group="format", + default_value="auto", + options=[ + {"value": "auto", "label": "Auto-detect"}, + {"value": "parquet", "label": "Parquet"}, + {"value": "csv", "label": "CSV"}, + {"value": "json", "label": "JSON/JSONL"}, + ], + ), + ], +) + +S3_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.S3, + display_name="Amazon S3", + category=SourceCategory.FILESYSTEM, + icon="aws-s3", + description="Query parquet, CSV, and JSON files directly from S3 using SQL", + capabilities=S3_CAPABILITIES, + config_schema=S3_CONFIG_SCHEMA, +) +class S3Adapter(FileSystemAdapter): + """S3 file system adapter. + + Uses DuckDB with httpfs extension for querying files directly from S3. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize S3 adapter. + + Args: + config: Configuration dictionary with: + - bucket: S3 bucket name + - prefix: Path prefix (optional) + - region: AWS region + - access_key_id: AWS access key + - secret_access_key: AWS secret key + - file_format: Default format (optional) + """ + super().__init__(config) + self._duckdb_conn: Any = None + self._s3_client: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.S3 + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return S3_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to S3.""" + try: + import boto3 + import duckdb + except ImportError as e: + raise ConnectionFailedError( + message="boto3 and duckdb are required. Install with: pip install boto3 duckdb", + details={"error": str(e)}, + ) from e + + try: + region = self._config.get("region", "us-east-1") + access_key = self._config.get("access_key_id", "") + secret_key = self._config.get("secret_access_key", "") + + # Initialize S3 client for listing + self._s3_client = boto3.client( + "s3", + region_name=region, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + ) + + # Initialize DuckDB with S3 credentials + self._duckdb_conn = duckdb.connect(":memory:") + self._duckdb_conn.execute("INSTALL httpfs") + self._duckdb_conn.execute("LOAD httpfs") + self._duckdb_conn.execute(f"SET s3_region = '{region}'") + self._duckdb_conn.execute(f"SET s3_access_key_id = '{access_key}'") + self._duckdb_conn.execute(f"SET s3_secret_access_key = '{secret_key}'") + + # Test connection by listing bucket + bucket = self._config.get("bucket", "") + self._s3_client.head_bucket(Bucket=bucket) + + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "accessdenied" in error_str or "403" in error_str: + raise AccessDeniedError( + message="Access denied to S3 bucket", + ) from e + elif "invalidaccesskeyid" in error_str or "signaturemismatch" in error_str: + raise AuthenticationFailedError( + message="Invalid AWS credentials", + details={"error": str(e)}, + ) from e + elif "nosuchbucket" in error_str: + raise ConnectionFailedError( + message=f"S3 bucket not found: {self._config.get('bucket')}", + details={"error": str(e)}, + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to S3: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close S3 connection.""" + if self._duckdb_conn: + self._duckdb_conn.close() + self._duckdb_conn = None + self._s3_client = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test S3 connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + bucket = self._config.get("bucket", "") + prefix = self._config.get("prefix", "") + + # List objects to verify access + response = self._s3_client.list_objects_v2( + Bucket=bucket, + Prefix=prefix, + MaxKeys=1, + ) + key_count = response.get("KeyCount", 0) + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"S3 ({bucket})", + message=f"Connection successful, found {key_count}+ objects", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def list_files( + self, + pattern: str = "*", + recursive: bool = True, + ) -> list[FileInfo]: + """List files in S3 bucket.""" + if not self._connected or not self._s3_client: + raise ConnectionFailedError(message="Not connected to S3") + + bucket = self._config.get("bucket", "") + prefix = self._config.get("prefix", "") + + files = [] + paginator = self._s3_client.get_paginator("list_objects_v2") + + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []): + key = obj["Key"] + name = key.split("/")[-1] + + # Skip directories + if key.endswith("/"): + continue + + # Match pattern + if pattern != "*": + import fnmatch + + if not fnmatch.fnmatch(name, pattern): + continue + + # Detect file format + file_format = None + if name.endswith(".parquet"): + file_format = "parquet" + elif name.endswith(".csv"): + file_format = "csv" + elif name.endswith(".json") or name.endswith(".jsonl"): + file_format = "json" + + files.append( + FileInfo( + path=f"s3://{bucket}/{key}", + name=name, + size_bytes=obj.get("Size", 0), + last_modified=obj.get("LastModified", datetime.now()).isoformat(), + file_format=file_format, + ) + ) + + return files + + async def read_file( + self, + path: str, + file_format: str | None = None, + limit: int = 100, + ) -> QueryResult: + """Read a file from S3.""" + if not self._connected or not self._duckdb_conn: + raise ConnectionFailedError(message="Not connected to S3") + + start_time = time.time() + + # Auto-detect format if not specified + if not file_format: + file_format = self._config.get("file_format", "auto") + if file_format == "auto": + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + elif path.endswith(".json") or path.endswith(".jsonl"): + file_format = "json" + else: + file_format = "parquet" # Default + + # Build query based on format + if file_format == "parquet": + sql = f"SELECT * FROM read_parquet('{path}') LIMIT {limit}" + elif file_format == "csv": + sql = f"SELECT * FROM read_csv_auto('{path}') LIMIT {limit}" + elif file_format == "json": + sql = f"SELECT * FROM read_json_auto('{path}') LIMIT {limit}" + else: + sql = f"SELECT * FROM read_parquet('{path}') LIMIT {limit}" + + result = self._duckdb_conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + execution_time_ms=execution_time_ms, + ) + + def _map_duckdb_type(self, type_code: Any) -> str: + """Map DuckDB type to normalized type.""" + if type_code is None: + return "unknown" + type_str = str(type_code).lower() + result: str = normalize_type(type_str, SourceType.DUCKDB).value + return result + + async def infer_schema( + self, + path: str, + file_format: str | None = None, + ) -> Table: + """Infer schema from a file.""" + if not self._connected or not self._duckdb_conn: + raise ConnectionFailedError(message="Not connected to S3") + + # Auto-detect format + if not file_format: + if path.endswith(".parquet"): + file_format = "parquet" + elif path.endswith(".csv"): + file_format = "csv" + else: + file_format = "parquet" + + # Get schema using DESCRIBE + if file_format == "parquet": + sql = f"DESCRIBE SELECT * FROM read_parquet('{path}')" + elif file_format == "csv": + sql = f"DESCRIBE SELECT * FROM read_csv_auto('{path}')" + else: + sql = f"DESCRIBE SELECT * FROM read_parquet('{path}')" + + result = self._duckdb_conn.execute(sql) + rows = result.fetchall() + + columns = [] + for row in rows: + col_name = row[0] + col_type = row[1] + columns.append( + Column( + name=col_name, + data_type=normalize_type(col_type, SourceType.DUCKDB), + native_type=col_type, + nullable=True, + is_primary_key=False, + is_partition_key=False, + ) + ) + + # Get file name for table name + name = path.split("/")[-1].split(".")[0] + + return Table( + name=name, + table_type="file", + native_type="PARQUET_FILE" if file_format == "parquet" else "CSV_FILE", + native_path=path, + columns=columns, + ) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get S3 schema (files as tables).""" + if not self._connected: + raise ConnectionFailedError(message="Not connected to S3") + + try: + # List files + files = await self.list_files() + + # Apply filter if provided + if filter and filter.table_pattern: + import fnmatch + + pattern = filter.table_pattern.replace("%", "*") + files = [f for f in files if fnmatch.fnmatch(f.name, pattern)] + + # Limit files + max_tables = filter.max_tables if filter else 100 + files = files[:max_tables] + + # Infer schema for each file + tables = [] + for file_info in files: + try: + table = await self.infer_schema(file_info.path, file_info.file_format) + tables.append( + { + "name": table.name, + "table_type": table.table_type, + "native_type": table.native_type, + "native_path": table.native_path, + "columns": [ + { + "name": col.name, + "data_type": col.data_type, + "native_type": col.native_type, + "nullable": col.nullable, + "is_primary_key": col.is_primary_key, + "is_partition_key": col.is_partition_key, + } + for col in table.columns + ], + "size_bytes": file_info.size_bytes, + "last_modified": file_info.last_modified, + } + ) + except Exception: + # Skip files we can't read + continue + + bucket = self._config.get("bucket", "") + prefix = self._config.get("prefix", "") + + # Build catalog structure + catalogs = [ + { + "name": bucket, + "schemas": [ + { + "name": prefix or "root", + "tables": tables, + } + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "s3", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch S3 schema: {str(e)}", + details={"error": str(e)}, + ) from e + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against S3 files using DuckDB.""" + if not self._connected or not self._duckdb_conn: + raise ConnectionFailedError(message="Not connected to S3") + + start_time = time.time() + + result = self._duckdb_conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) diff --git a/backend/src/dataing/adapters/datasource/registry.py b/backend/src/dataing/adapters/datasource/registry.py new file mode 100644 index 000000000..cfd3426ad --- /dev/null +++ b/backend/src/dataing/adapters/datasource/registry.py @@ -0,0 +1,224 @@ +"""Adapter registry for managing data source adapters. + +This module provides a singleton registry for registering and creating +data source adapters by type. +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import Any, TypeVar + +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigSchema, + SourceCategory, + SourceType, + SourceTypeDefinition, +) + +T = TypeVar("T", bound=BaseAdapter) + + +class AdapterRegistry: + """Singleton registry for data source adapters. + + This registry maintains a mapping of source types to adapter classes, + allowing dynamic creation of adapters based on configuration. + """ + + _instance: AdapterRegistry | None = None + _adapters: dict[SourceType, type[BaseAdapter]] + _definitions: dict[SourceType, SourceTypeDefinition] + + def __new__(cls) -> AdapterRegistry: + """Create or return the singleton instance.""" + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._adapters = {} + cls._instance._definitions = {} + return cls._instance + + @classmethod + def get_instance(cls) -> AdapterRegistry: + """Get the singleton instance.""" + return cls() + + def register( + self, + source_type: SourceType, + adapter_class: type[BaseAdapter], + display_name: str, + category: SourceCategory, + icon: str, + description: str, + capabilities: AdapterCapabilities, + config_schema: ConfigSchema, + ) -> None: + """Register an adapter class for a source type. + + Args: + source_type: The source type to register. + adapter_class: The adapter class to register. + display_name: Human-readable name for the source type. + category: Category of the source (database, api, filesystem). + icon: Icon identifier for the source type. + description: Description of the source type. + capabilities: Capabilities of the adapter. + config_schema: Configuration schema for connection forms. + """ + self._adapters[source_type] = adapter_class + self._definitions[source_type] = SourceTypeDefinition( + type=source_type, + display_name=display_name, + category=category, + icon=icon, + description=description, + capabilities=capabilities, + config_schema=config_schema, + ) + + def unregister(self, source_type: SourceType) -> None: + """Unregister an adapter for a source type. + + Args: + source_type: The source type to unregister. + """ + self._adapters.pop(source_type, None) + self._definitions.pop(source_type, None) + + def create( + self, + source_type: SourceType | str, + config: dict[str, Any], + ) -> BaseAdapter: + """Create an adapter instance for a source type. + + Args: + source_type: The source type (can be string or enum). + config: Configuration dictionary for the adapter. + + Returns: + Instance of the appropriate adapter. + + Raises: + ValueError: If source type is not registered. + """ + if isinstance(source_type, str): + source_type = SourceType(source_type) + + adapter_class = self._adapters.get(source_type) + if adapter_class is None: + raise ValueError(f"No adapter registered for source type: {source_type}") + + return adapter_class(config) + + def get_adapter_class(self, source_type: SourceType) -> type[BaseAdapter] | None: + """Get the adapter class for a source type. + + Args: + source_type: The source type. + + Returns: + The adapter class, or None if not registered. + """ + return self._adapters.get(source_type) + + def get_definition(self, source_type: SourceType) -> SourceTypeDefinition | None: + """Get the source type definition. + + Args: + source_type: The source type. + + Returns: + The source type definition, or None if not registered. + """ + return self._definitions.get(source_type) + + def list_types(self) -> list[SourceTypeDefinition]: + """List all registered source type definitions. + + Returns: + List of all source type definitions. + """ + return list(self._definitions.values()) + + def is_registered(self, source_type: SourceType) -> bool: + """Check if a source type is registered. + + Args: + source_type: The source type to check. + + Returns: + True if registered, False otherwise. + """ + return source_type in self._adapters + + @property + def registered_types(self) -> list[SourceType]: + """Get list of all registered source types.""" + return list(self._adapters.keys()) + + +def register_adapter( + source_type: SourceType, + display_name: str, + category: SourceCategory, + icon: str, + description: str, + capabilities: AdapterCapabilities, + config_schema: ConfigSchema, +) -> Callable[[type[T]], type[T]]: + """Decorator to register an adapter class. + + Usage: + @register_adapter( + source_type=SourceType.POSTGRESQL, + display_name="PostgreSQL", + category=SourceCategory.DATABASE, + icon="postgresql", + description="PostgreSQL database", + capabilities=AdapterCapabilities(...), + config_schema=ConfigSchema(...), + ) + class PostgresAdapter(SQLAdapter): + ... + + Args: + source_type: The source type to register. + display_name: Human-readable name. + category: Source category. + icon: Icon identifier. + description: Source description. + capabilities: Adapter capabilities. + config_schema: Configuration schema. + + Returns: + Decorator function. + """ + + def decorator(cls: type[T]) -> type[T]: + registry = AdapterRegistry.get_instance() + registry.register( + source_type=source_type, + adapter_class=cls, + display_name=display_name, + category=category, + icon=icon, + description=description, + capabilities=capabilities, + config_schema=config_schema, + ) + return cls + + return decorator + + +# Global registry instance +_registry = AdapterRegistry.get_instance() + + +def get_registry() -> AdapterRegistry: + """Get the global adapter registry instance.""" + return _registry diff --git a/backend/src/dataing/adapters/datasource/sql/__init__.py b/backend/src/dataing/adapters/datasource/sql/__init__.py new file mode 100644 index 000000000..54d57399e --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/__init__.py @@ -0,0 +1,15 @@ +"""SQL database adapters. + +This module provides adapters for SQL-speaking data sources: +- PostgreSQL +- MySQL +- Trino +- Snowflake +- BigQuery +- Redshift +- DuckDB +""" + +from dataing.adapters.datasource.sql.base import SQLAdapter + +__all__ = ["SQLAdapter"] diff --git a/backend/src/dataing/adapters/datasource/sql/base.py b/backend/src/dataing/adapters/datasource/sql/base.py new file mode 100644 index 000000000..f66535a8c --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/base.py @@ -0,0 +1,213 @@ +"""Base class for SQL database adapters. + +This module provides the abstract base class for all SQL-speaking +data source adapters, adding query execution capabilities. +""" + +from __future__ import annotations + +from abc import abstractmethod +from typing import Any + +from dataing.adapters.datasource.base import BaseAdapter +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + QueryLanguage, + QueryResult, +) + + +class SQLAdapter(BaseAdapter): + """Abstract base class for SQL database adapters. + + Extends BaseAdapter with SQL query execution capabilities. + All SQL adapters must implement: + - execute_query: Execute arbitrary SQL + - _get_schema_query: Return SQL to fetch schema metadata + - _get_tables_query: Return SQL to list tables + """ + + @property + def capabilities(self) -> AdapterCapabilities: + """SQL adapters support SQL queries by default.""" + return AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=10, + ) + + @abstractmethod + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against the data source. + + Args: + sql: The SQL query to execute. + params: Optional query parameters. + timeout_seconds: Query timeout in seconds. + limit: Optional row limit (may be applied via LIMIT clause). + + Returns: + QueryResult with columns, rows, and metadata. + + Raises: + QuerySyntaxError: If the query syntax is invalid. + QueryTimeoutError: If the query times out. + AccessDeniedError: If access is denied. + """ + ... + + async def sample( + self, + table: str, + n: int = 100, + schema: str | None = None, + ) -> QueryResult: + """Get a random sample of rows from a table. + + Args: + table: Table name. + n: Number of rows to sample. + schema: Optional schema name. + + Returns: + QueryResult with sampled rows. + """ + full_table = f"{schema}.{table}" if schema else table + sql = self._build_sample_query(full_table, n) + return await self.execute_query(sql, limit=n) + + async def preview( + self, + table: str, + n: int = 100, + schema: str | None = None, + ) -> QueryResult: + """Get a preview of rows from a table (first N rows). + + Args: + table: Table name. + n: Number of rows to preview. + schema: Optional schema name. + + Returns: + QueryResult with preview rows. + """ + full_table = f"{schema}.{table}" if schema else table + sql = f"SELECT * FROM {full_table} LIMIT {n}" + return await self.execute_query(sql, limit=n) + + async def count_rows( + self, + table: str, + schema: str | None = None, + ) -> int: + """Get the row count for a table. + + Args: + table: Table name. + schema: Optional schema name. + + Returns: + Number of rows in the table. + """ + full_table = f"{schema}.{table}" if schema else table + sql = f"SELECT COUNT(*) as cnt FROM {full_table}" + result = await self.execute_query(sql) + if result.rows: + return int(result.rows[0].get("cnt", 0)) + return 0 + + def _build_sample_query(self, table: str, n: int) -> str: + """Build a sampling query for the database type. + + Default implementation uses TABLESAMPLE if available, + otherwise falls back to ORDER BY RANDOM(). + Subclasses should override for optimal sampling. + + Args: + table: Full table name (schema.table). + n: Number of rows to sample. + + Returns: + SQL query string. + """ + return f"SELECT * FROM {table} ORDER BY RANDOM() LIMIT {n}" + + @abstractmethod + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from the database. + + Returns: + List of dictionaries with table metadata: + - catalog: Catalog name + - schema: Schema name + - table_name: Table name + - table_type: Type (table, view, etc.) + - columns: List of column dictionaries + """ + ... + + async def get_column_stats( + self, + table: str, + columns: list[str], + schema: str | None = None, + ) -> dict[str, dict[str, Any]]: + """Get statistics for specific columns. + + Args: + table: Table name. + columns: List of column names. + schema: Optional schema name. + + Returns: + Dictionary mapping column names to their statistics. + """ + full_table = f"{schema}.{table}" if schema else table + stats = {} + + for col in columns: + sql = f""" + SELECT + COUNT(*) as total_count, + COUNT({col}) as non_null_count, + COUNT(DISTINCT {col}) as distinct_count, + MIN({col}::text) as min_value, + MAX({col}::text) as max_value + FROM {full_table} + """ + try: + result = await self.execute_query(sql, timeout_seconds=60) + if result.rows: + row = result.rows[0] + total = row.get("total_count", 0) + non_null = row.get("non_null_count", 0) + null_count = total - non_null if total else 0 + stats[col] = { + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "distinct_count": row.get("distinct_count"), + "min_value": row.get("min_value"), + "max_value": row.get("max_value"), + } + except Exception: + stats[col] = { + "null_count": 0, + "null_rate": 0.0, + "distinct_count": None, + "min_value": None, + "max_value": None, + } + + return stats diff --git a/backend/src/dataing/adapters/datasource/sql/bigquery.py b/backend/src/dataing/adapters/datasource/sql/bigquery.py new file mode 100644 index 000000000..1b07f66f8 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/bigquery.py @@ -0,0 +1,561 @@ +"""BigQuery adapter implementation. + +This module provides a BigQuery adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +BIGQUERY_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="project", label="Project", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="project_id", + label="Project ID", + type="string", + required=True, + group="project", + placeholder="my-gcp-project", + description="Google Cloud project ID", + ), + ConfigField( + name="dataset", + label="Default Dataset", + type="string", + required=False, + group="project", + placeholder="my_dataset", + description="Default dataset to query (optional)", + ), + ConfigField( + name="credentials_json", + label="Service Account JSON", + type="secret", + required=True, + group="auth", + description="Service account credentials JSON (paste full JSON)", + ), + ConfigField( + name="location", + label="Location", + type="enum", + required=False, + group="advanced", + default_value="US", + options=[ + {"value": "US", "label": "US (multi-region)"}, + {"value": "EU", "label": "EU (multi-region)"}, + {"value": "us-central1", "label": "us-central1"}, + {"value": "us-east1", "label": "us-east1"}, + {"value": "europe-west1", "label": "europe-west1"}, + {"value": "asia-east1", "label": "asia-east1"}, + ], + ), + ConfigField( + name="query_timeout", + label="Query Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=300, + min_value=30, + max_value=3600, + ), + ], +) + +BIGQUERY_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.BIGQUERY, + display_name="BigQuery", + category=SourceCategory.DATABASE, + icon="bigquery", + description="Connect to Google BigQuery for serverless data warehouse querying", + capabilities=BIGQUERY_CAPABILITIES, + config_schema=BIGQUERY_CONFIG_SCHEMA, +) +class BigQueryAdapter(SQLAdapter): + """BigQuery database adapter. + + Provides full schema discovery and query execution for BigQuery. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize BigQuery adapter. + + Args: + config: Configuration dictionary with: + - project_id: GCP project ID + - dataset: Default dataset (optional) + - credentials_json: Service account JSON + - location: Data location (optional) + - query_timeout: Timeout in seconds (optional) + """ + super().__init__(config) + self._client: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.BIGQUERY + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return BIGQUERY_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to BigQuery.""" + try: + from google.cloud import bigquery + from google.oauth2 import service_account + except ImportError as e: + raise ConnectionFailedError( + message="google-cloud-bigquery not installed. pip install google-cloud-bigquery", + details={"error": str(e)}, + ) from e + + try: + import json + + project_id = self._config.get("project_id", "") + credentials_json = self._config.get("credentials_json", "") + location = self._config.get("location", "US") + + # Parse credentials JSON + if isinstance(credentials_json, str): + credentials_info = json.loads(credentials_json) + else: + credentials_info = credentials_json + + credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_info + ) + + self._client = bigquery.Client( + project=project_id, + credentials=credentials, + location=location, + ) + self._connected = True + except json.JSONDecodeError as e: + raise AuthenticationFailedError( + message="Invalid credentials JSON format", + details={"error": str(e)}, + ) from e + except Exception as e: + error_str = str(e).lower() + if "permission" in error_str or "forbidden" in error_str or "403" in error_str: + raise AccessDeniedError( + message="Access denied to BigQuery project", + ) from e + elif "invalid" in error_str and "credential" in error_str: + raise AuthenticationFailedError( + message="Invalid BigQuery credentials", + details={"error": str(e)}, + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to BigQuery: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close BigQuery client.""" + if self._client: + self._client.close() + self._client = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test BigQuery connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + # Run a simple query to test connection + query = "SELECT 1" + query_job = self._client.query(query) + query_job.result() + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version="Google BigQuery", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against BigQuery.""" + if not self._connected or not self._client: + raise ConnectionFailedError(message="Not connected to BigQuery") + + start_time = time.time() + try: + from google.cloud import bigquery + + job_config = bigquery.QueryJobConfig() + job_config.timeout_ms = timeout_seconds * 1000 + + # Set default dataset if configured + dataset = self._config.get("dataset") + if dataset: + project_id = self._config.get("project_id", "") + job_config.default_dataset = f"{project_id}.{dataset}" + + query_job = self._client.query(sql, job_config=job_config) + results = query_job.result(timeout=timeout_seconds) + + execution_time_ms = int((time.time() - start_time) * 1000) + + # Get schema from result + schema = results.schema + if not schema: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [ + {"name": field.name, "data_type": self._map_bq_type(field.field_type)} + for field in schema + ] + column_names = [field.name for field in schema] + + # Convert rows to dicts + row_dicts = [] + for row in results: + row_dict = {} + for name in column_names: + value = row[name] + # Convert non-serializable types to strings + if hasattr(value, "isoformat"): + value = value.isoformat() + elif hasattr(value, "__iter__") and not isinstance(value, str | dict | list): + value = list(value) + row_dict[name] = value + row_dicts.append(row_dict) + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "400" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "permission" in error_str or "403" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "timeout" in error_str or "deadline exceeded" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + + def _map_bq_type(self, bq_type: str) -> str: + """Map BigQuery type to normalized type.""" + result: str = normalize_type(bq_type, SourceType.BIGQUERY).value + return result + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from BigQuery.""" + project_id = self._config.get("project_id", "") + dataset = self._config.get("dataset", "") + + if dataset: + sql = f""" + SELECT + '{project_id}' as table_catalog, + table_schema, + table_name, + table_type + FROM `{project_id}.{dataset}.INFORMATION_SCHEMA.TABLES` + ORDER BY table_name + """ + else: + sql = f""" + SELECT + '{project_id}' as table_catalog, + schema_name as table_schema, + '' as table_name, + 'SCHEMA' as table_type + FROM `{project_id}.INFORMATION_SCHEMA.SCHEMATA` + """ + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get BigQuery schema.""" + if not self._connected or not self._client: + raise ConnectionFailedError(message="Not connected to BigQuery") + + try: + project_id = self._config.get("project_id", "") + dataset = self._config.get("dataset", "") + + # If dataset specified, get tables from that dataset + if dataset: + return await self._get_dataset_schema(project_id, dataset, filter) + else: + # List all datasets and their tables + return await self._get_project_schema(project_id, filter) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch BigQuery schema: {str(e)}", + details={"error": str(e)}, + ) from e + + async def _get_dataset_schema( + self, + project_id: str, + dataset: str, + filter: SchemaFilter | None, + ) -> SchemaResponse: + """Get schema for a specific dataset.""" + # Build filter conditions + conditions = [] + if filter: + if filter.table_pattern: + conditions.append(f"table_name LIKE '{filter.table_pattern}'") + if not filter.include_views: + conditions.append("table_type = 'BASE TABLE'") + + where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else "" + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM `{project_id}.{dataset}.INFORMATION_SCHEMA.TABLES` + {where_clause} + ORDER BY table_name + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns + columns_sql = f""" + SELECT + table_schema, + table_name, + column_name, + data_type, + is_nullable, + ordinal_position + FROM `{project_id}.{dataset}.INFORMATION_SCHEMA.COLUMNS` + {where_clause} + ORDER BY table_name, ordinal_position + """ + columns_result = await self.execute_query(columns_sql) + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{project_id}.{schema_name}.{table_name}", + "columns": [], + } + + # Add columns + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.BIGQUERY), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": False, + "is_partition_key": False, + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": project_id, + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "bigquery", + catalogs=catalogs, + ) + + async def _get_project_schema( + self, + project_id: str, + filter: SchemaFilter | None, + ) -> SchemaResponse: + """Get schema for entire project (all datasets).""" + # List all datasets + datasets = list(self._client.list_datasets()) + + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + + for ds in datasets: + dataset_id = ds.dataset_id + + # Skip if filter doesn't match + if filter and filter.schema_pattern: + if filter.schema_pattern not in dataset_id: + continue + + try: + # Get tables for this dataset + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.TABLES` + ORDER BY table_name + LIMIT 100 + """ + tables_result = await self.execute_query(tables_sql) + + schema_map[dataset_id] = {} + for row in tables_result.rows: + table_name = row["table_name"] + table_type_raw = row["table_type"] + table_type = "view" if "view" in table_type_raw.lower() else "table" + + schema_map[dataset_id][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{project_id}.{dataset_id}.{table_name}", + "columns": [], + } + + except Exception: + # Skip datasets we can't access + continue + + # Build catalog structure + catalogs = [ + { + "name": project_id, + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "bigquery", + catalogs=catalogs, + ) + + def _build_sample_query(self, table: str, n: int) -> str: + """Build BigQuery-specific sampling query using TABLESAMPLE.""" + return f"SELECT * FROM {table} TABLESAMPLE SYSTEM (10 PERCENT) LIMIT {n}" diff --git a/backend/src/dataing/adapters/datasource/sql/duckdb.py b/backend/src/dataing/adapters/datasource/sql/duckdb.py new file mode 100644 index 000000000..91a3ea16a --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/duckdb.py @@ -0,0 +1,431 @@ +"""DuckDB adapter implementation. + +This module provides a DuckDB adapter that implements the unified +data source interface with full schema discovery and query capabilities. +DuckDB can also be used to query parquet files and other file formats. +""" + +from __future__ import annotations + +import os +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + ConnectionFailedError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +DUCKDB_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="source", label="Data Source", collapsed_by_default=False), + ], + fields=[ + ConfigField( + name="source_type", + label="Source Type", + type="enum", + required=True, + group="source", + default_value="directory", + options=[ + {"value": "directory", "label": "Directory of files"}, + {"value": "database", "label": "DuckDB database file"}, + ], + ), + ConfigField( + name="path", + label="Path", + type="string", + required=True, + group="source", + placeholder="/path/to/data or /path/to/db.duckdb", + description="Path to directory with parquet/CSV files, or .duckdb file", + ), + ConfigField( + name="read_only", + label="Read Only", + type="boolean", + required=False, + group="source", + default_value=True, + description="Open database in read-only mode", + ), + ], +) + +DUCKDB_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.DUCKDB, + display_name="DuckDB", + category=SourceCategory.DATABASE, + icon="duckdb", + description="Connect to DuckDB databases or query parquet/CSV files directly", + capabilities=DUCKDB_CAPABILITIES, + config_schema=DUCKDB_CONFIG_SCHEMA, +) +class DuckDBAdapter(SQLAdapter): + """DuckDB database adapter. + + Provides schema discovery and query execution for DuckDB databases + and direct file querying (parquet, CSV, etc.). + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize DuckDB adapter. + + Args: + config: Configuration dictionary with: + - path: Path to database file or directory + - source_type: "database" or "directory" + - read_only: Whether to open read-only (default: True) + """ + super().__init__(config) + self._conn: Any = None + self._source_id: str = "" + self._is_directory_mode = config.get("source_type", "directory") == "directory" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.DUCKDB + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return DUCKDB_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to DuckDB.""" + try: + import duckdb + except ImportError as e: + raise ConnectionFailedError( + message="duckdb is not installed. Install with: pip install duckdb", + details={"error": str(e)}, + ) from e + + path = self._config.get("path", ":memory:") + read_only = self._config.get("read_only", True) + + try: + if self._is_directory_mode: + # In directory mode, use in-memory database + self._conn = duckdb.connect(":memory:") + # Register parquet files as views + await self._register_directory_files() + elif path == ":memory:": + # In-memory mode - cannot be read-only + self._conn = duckdb.connect(":memory:") + else: + # Database file mode + if not os.path.exists(path): + raise ConnectionFailedError( + message=f"Database file not found: {path}", + details={"path": path}, + ) + self._conn = duckdb.connect(path, read_only=read_only) + + self._connected = True + except Exception as e: + if "ConnectionFailedError" in type(e).__name__: + raise + raise ConnectionFailedError( + message=f"Failed to connect to DuckDB: {str(e)}", + details={"error": str(e), "path": path}, + ) from e + + async def _register_directory_files(self) -> None: + """Register files in directory as DuckDB views.""" + path = self._config.get("path", "") + if not path or not os.path.isdir(path): + return + + # Find all parquet and CSV files + for filename in os.listdir(path): + filepath = os.path.join(path, filename) + if not os.path.isfile(filepath): + continue + + # Create view name from filename (without extension) + view_name = os.path.splitext(filename)[0] + # Clean up view name to be valid SQL identifier + view_name = view_name.replace("-", "_").replace(" ", "_") + + if filename.endswith(".parquet"): + sql = f"CREATE VIEW IF NOT EXISTS {view_name} AS " + sql += f"SELECT * FROM read_parquet('{filepath}')" + self._conn.execute(sql) + elif filename.endswith(".csv"): + sql = f"CREATE VIEW IF NOT EXISTS {view_name} AS " + sql += f"SELECT * FROM read_csv_auto('{filepath}')" + self._conn.execute(sql) + elif filename.endswith(".json") or filename.endswith(".jsonl"): + sql = f"CREATE VIEW IF NOT EXISTS {view_name} AS " + sql += f"SELECT * FROM read_json_auto('{filepath}')" + self._conn.execute(sql) + + async def disconnect(self) -> None: + """Close DuckDB connection.""" + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test DuckDB connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + result = self._conn.execute("SELECT version()").fetchone() + version = result[0] if result else "Unknown" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"DuckDB {version}", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against DuckDB.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to DuckDB") + + start_time = time.time() + try: + result = self._conn.execute(sql) + columns_info = result.description + rows = result.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Build column metadata + columns = [ + {"name": col[0], "data_type": self._map_duckdb_type(col[1])} for col in columns_info + ] + column_names = [col[0] for col in columns_info] + + # Convert rows to dicts + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "parser error" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "timeout" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + + def _map_duckdb_type(self, type_code: Any) -> str: + """Map DuckDB type code to string representation.""" + if type_code is None: + return "unknown" + type_str = str(type_code).lower() + result: str = normalize_type(type_str, SourceType.DUCKDB).value + return result + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from DuckDB.""" + sql = """ + SELECT + database_name as table_catalog, + schema_name as table_schema, + table_name, + table_type + FROM information_schema.tables + WHERE table_schema NOT IN ('pg_catalog', 'information_schema') + ORDER BY table_schema, table_name + """ + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get DuckDB schema.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to DuckDB") + + try: + # Build filter conditions + conditions = ["table_schema NOT IN ('pg_catalog', 'information_schema')"] + if filter: + if filter.table_pattern: + conditions.append(f"table_name LIKE '{filter.table_pattern}'") + if filter.schema_pattern: + conditions.append(f"table_schema LIKE '{filter.schema_pattern}'") + if not filter.include_views: + conditions.append("table_type = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM information_schema.tables + WHERE {where_clause} + ORDER BY table_schema, table_name + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns + columns_sql = f""" + SELECT + table_schema, + table_name, + column_name, + data_type, + is_nullable, + column_default, + ordinal_position + FROM information_schema.columns + WHERE {where_clause} + ORDER BY table_schema, table_name, ordinal_position + """ + columns_result = await self.execute_query(columns_sql) + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{schema_name}.{table_name}", + "columns": [], + } + + # Add columns + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.DUCKDB), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": False, + "is_partition_key": False, + "default_value": row["column_default"], + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "duckdb", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch DuckDB schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build DuckDB-specific sampling query using TABLESAMPLE.""" + return f"SELECT * FROM {table} USING SAMPLE {n} ROWS" diff --git a/backend/src/dataing/adapters/datasource/sql/mysql.py b/backend/src/dataing/adapters/datasource/sql/mysql.py new file mode 100644 index 000000000..1b8ebb8a9 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/mysql.py @@ -0,0 +1,472 @@ +"""MySQL adapter implementation. + +This module provides a MySQL adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +MYSQL_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="ssl", label="SSL/TLS", collapsed_by_default=True), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="host", + label="Host", + type="string", + required=True, + group="connection", + placeholder="localhost", + description="MySQL server hostname or IP address", + ), + ConfigField( + name="port", + label="Port", + type="integer", + required=True, + group="connection", + default_value=3306, + min_value=1, + max_value=65535, + ), + ConfigField( + name="database", + label="Database", + type="string", + required=True, + group="connection", + placeholder="mydb", + description="Name of the database to connect to", + ), + ConfigField( + name="username", + label="Username", + type="string", + required=True, + group="auth", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=True, + group="auth", + ), + ConfigField( + name="ssl", + label="Use SSL", + type="boolean", + required=False, + group="ssl", + default_value=False, + ), + ConfigField( + name="connection_timeout", + label="Connection Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=30, + min_value=5, + max_value=300, + ), + ], +) + +MYSQL_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=10, +) + + +@register_adapter( + source_type=SourceType.MYSQL, + display_name="MySQL", + category=SourceCategory.DATABASE, + icon="mysql", + description="Connect to MySQL databases for schema discovery and querying", + capabilities=MYSQL_CAPABILITIES, + config_schema=MYSQL_CONFIG_SCHEMA, +) +class MySQLAdapter(SQLAdapter): + """MySQL database adapter. + + Provides full schema discovery and query execution for MySQL databases. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize MySQL adapter. + + Args: + config: Configuration dictionary with: + - host: Server hostname + - port: Server port + - database: Database name + - username: Username + - password: Password + - ssl: Whether to use SSL (optional) + - connection_timeout: Timeout in seconds (optional) + """ + super().__init__(config) + self._pool: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.MYSQL + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return MYSQL_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to MySQL.""" + try: + import aiomysql + except ImportError as e: + raise ConnectionFailedError( + message="aiomysql is not installed. Install with: pip install aiomysql", + details={"error": str(e)}, + ) from e + + try: + host = self._config.get("host", "localhost") + port = self._config.get("port", 3306) + database = self._config.get("database", "") + username = self._config.get("username", "") + password = self._config.get("password", "") + use_ssl = self._config.get("ssl", False) + timeout = self._config.get("connection_timeout", 30) + + ssl_context = None + if use_ssl: + import ssl + + ssl_context = ssl.create_default_context() + + self._pool = await aiomysql.create_pool( + host=host, + port=port, + user=username, + password=password, + db=database, + ssl=ssl_context, + connect_timeout=timeout, + minsize=1, + maxsize=10, + autocommit=True, + ) + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "access denied" in error_str: + raise AuthenticationFailedError( + message="Access denied for MySQL user", + details={"error": str(e)}, + ) from e + elif "unknown database" in error_str: + raise ConnectionFailedError( + message=f"Database does not exist: {self._config.get('database')}", + details={"error": str(e)}, + ) from e + elif "timeout" in error_str or "timed out" in error_str: + raise ConnectionTimeoutError( + message="Connection to MySQL timed out", + timeout_seconds=self._config.get("connection_timeout", 30), + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to MySQL: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close MySQL connection pool.""" + if self._pool: + self._pool.close() + await self._pool.wait_closed() + self._pool = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test MySQL connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + async with self._pool.acquire() as conn: + async with conn.cursor() as cur: + await cur.execute("SELECT VERSION()") + result = await cur.fetchone() + version = result[0] if result else "Unknown" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"MySQL {version}", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against MySQL.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to MySQL") + + start_time = time.time() + try: + import aiomysql + + async with self._pool.acquire() as conn: + async with conn.cursor(aiomysql.DictCursor) as cur: + # Set query timeout + await cur.execute(f"SET max_execution_time = {timeout_seconds * 1000}") + + # Execute query + await cur.execute(sql) + rows = await cur.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not rows: + # Get columns from cursor description + columns = [] + if cur.description: + columns = [ + {"name": col[0], "data_type": "string"} for col in cur.description + ] + return QueryResult( + columns=columns, + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get column info + columns = [{"name": col[0], "data_type": "string"} for col in cur.description] + + # Convert rows to dicts (already dicts with DictCursor) + row_dicts = list(rows) + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "access denied" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "timeout" in error_str or "max_execution_time" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from MySQL.""" + database = self._config.get("database", "") + sql = f""" + SELECT + TABLE_CATALOG as table_catalog, + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + TABLE_TYPE as table_type + FROM information_schema.TABLES + WHERE TABLE_SCHEMA = '{database}' + ORDER BY TABLE_NAME + """ + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get MySQL schema.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to MySQL") + + try: + database = self._config.get("database", "") + + # Build filter conditions + conditions = [f"TABLE_SCHEMA = '{database}'"] + if filter: + if filter.table_pattern: + conditions.append(f"TABLE_NAME LIKE '{filter.table_pattern}'") + if not filter.include_views: + conditions.append("TABLE_TYPE = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + TABLE_TYPE as table_type + FROM information_schema.TABLES + WHERE {where_clause} + ORDER BY TABLE_NAME + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns + columns_sql = f""" + SELECT + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + COLUMN_NAME as column_name, + DATA_TYPE as data_type, + IS_NULLABLE as is_nullable, + COLUMN_DEFAULT as column_default, + ORDINAL_POSITION as ordinal_position, + COLUMN_KEY as column_key + FROM information_schema.COLUMNS + WHERE {where_clause} + ORDER BY TABLE_NAME, ORDINAL_POSITION + """ + columns_result = await self.execute_query(columns_sql) + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{schema_name}.{table_name}", + "columns": [], + } + + # Add columns + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + is_pk = row.get("column_key") == "PRI" + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.MYSQL), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": is_pk, + "is_partition_key": False, + "default_value": row["column_default"], + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": "default", + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "mysql", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch MySQL schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build MySQL-specific sampling query.""" + # MySQL doesn't have TABLESAMPLE, use ORDER BY RAND() + return f"SELECT * FROM {table} ORDER BY RAND() LIMIT {n}" diff --git a/backend/src/dataing/adapters/datasource/sql/postgres.py b/backend/src/dataing/adapters/datasource/sql/postgres.py new file mode 100644 index 000000000..d80cb9663 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/postgres.py @@ -0,0 +1,507 @@ +"""PostgreSQL adapter implementation. + +This module provides a PostgreSQL adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +# PostgreSQL configuration schema for frontend forms +POSTGRES_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="ssl", label="SSL/TLS", collapsed_by_default=True), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="host", + label="Host", + type="string", + required=True, + group="connection", + placeholder="localhost", + description="PostgreSQL server hostname or IP address", + ), + ConfigField( + name="port", + label="Port", + type="integer", + required=True, + group="connection", + default_value=5432, + min_value=1, + max_value=65535, + ), + ConfigField( + name="database", + label="Database", + type="string", + required=True, + group="connection", + placeholder="mydb", + description="Name of the database to connect to", + ), + ConfigField( + name="username", + label="Username", + type="string", + required=True, + group="auth", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=True, + group="auth", + ), + ConfigField( + name="ssl_mode", + label="SSL Mode", + type="enum", + required=False, + group="ssl", + default_value="prefer", + options=[ + {"value": "disable", "label": "Disable"}, + {"value": "prefer", "label": "Prefer"}, + {"value": "require", "label": "Require"}, + {"value": "verify-ca", "label": "Verify CA"}, + {"value": "verify-full", "label": "Verify Full"}, + ], + ), + ConfigField( + name="connection_timeout", + label="Connection Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=30, + min_value=5, + max_value=300, + ), + ConfigField( + name="schemas", + label="Schemas to Include", + type="string", + required=False, + group="advanced", + placeholder="public,analytics", + description="Comma-separated list of schemas to include (default: all)", + ), + ], +) + +POSTGRES_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=10, +) + + +@register_adapter( + source_type=SourceType.POSTGRESQL, + display_name="PostgreSQL", + category=SourceCategory.DATABASE, + icon="postgresql", + description="Connect to PostgreSQL databases for schema discovery and querying", + capabilities=POSTGRES_CAPABILITIES, + config_schema=POSTGRES_CONFIG_SCHEMA, +) +class PostgresAdapter(SQLAdapter): + """PostgreSQL database adapter. + + Provides full schema discovery and query execution for PostgreSQL databases. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize PostgreSQL adapter. + + Args: + config: Configuration dictionary with: + - host: Server hostname + - port: Server port + - database: Database name + - username: Username + - password: Password + - ssl_mode: SSL mode (optional) + - connection_timeout: Timeout in seconds (optional) + - schemas: Comma-separated schemas to include (optional) + """ + super().__init__(config) + self._pool: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.POSTGRESQL + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return POSTGRES_CAPABILITIES + + def _build_dsn(self) -> str: + """Build PostgreSQL DSN from config.""" + host = self._config.get("host", "localhost") + port = self._config.get("port", 5432) + database = self._config.get("database", "postgres") + username = self._config.get("username", "") + password = self._config.get("password", "") + ssl_mode = self._config.get("ssl_mode", "prefer") + + return f"postgresql://{username}:{password}@{host}:{port}/{database}?sslmode={ssl_mode}" + + async def connect(self) -> None: + """Establish connection to PostgreSQL.""" + try: + import asyncpg + except ImportError as e: + raise ConnectionFailedError( + message="asyncpg is not installed. Install with: pip install asyncpg", + details={"error": str(e)}, + ) from e + + try: + timeout = self._config.get("connection_timeout", 30) + self._pool = await asyncpg.create_pool( + self._build_dsn(), + min_size=1, + max_size=10, + command_timeout=timeout, + ) + self._connected = True + except asyncpg.InvalidPasswordError as e: + raise AuthenticationFailedError( + message="Password authentication failed for PostgreSQL", + details={"error": str(e)}, + ) from e + except asyncpg.InvalidCatalogNameError as e: + raise ConnectionFailedError( + message=f"Database does not exist: {self._config.get('database')}", + details={"error": str(e)}, + ) from e + except asyncpg.CannotConnectNowError as e: + raise ConnectionFailedError( + message="Cannot connect to PostgreSQL server", + details={"error": str(e)}, + ) from e + except TimeoutError as e: + raise ConnectionTimeoutError( + message="Connection to PostgreSQL timed out", + timeout_seconds=self._config.get("connection_timeout", 30), + ) from e + except Exception as e: + raise ConnectionFailedError( + message=f"Failed to connect to PostgreSQL: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close PostgreSQL connection pool.""" + if self._pool: + await self._pool.close() + self._pool = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test PostgreSQL connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + async with self._pool.acquire() as conn: + result = await conn.fetchrow("SELECT version()") + version = result[0] if result else "Unknown" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=version, + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to PostgreSQL") + + start_time = time.time() + try: + async with self._pool.acquire() as conn: + # Set statement timeout + await conn.execute(f"SET statement_timeout = {timeout_seconds * 1000}") + + # Execute query + rows = await conn.fetch(sql) + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not rows: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + # Get column info + columns = [{"name": key, "data_type": "string"} for key in rows[0].keys()] + + # Convert rows to dicts + row_dicts = [dict(row) for row in rows] + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "permission denied" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "canceling statement" in error_str or "timeout" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from PostgreSQL.""" + schemas_filter = self._config.get("schemas", "") + if schemas_filter: + schema_list = [s.strip() for s in schemas_filter.split(",")] + schema_condition = f"AND table_schema IN ({','.join(repr(s) for s in schema_list)})" + else: + schema_condition = "AND table_schema NOT IN ('pg_catalog', 'information_schema')" + + sql = f""" + SELECT + table_catalog, + table_schema, + table_name, + table_type + FROM information_schema.tables + WHERE 1=1 + {schema_condition} + ORDER BY table_schema, table_name + """ + + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get database schema.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to PostgreSQL") + + try: + # Build filter conditions + conditions = ["table_schema NOT IN ('pg_catalog', 'information_schema')"] + if filter: + if filter.table_pattern: + conditions.append(f"table_name LIKE '{filter.table_pattern}'") + if filter.schema_pattern: + conditions.append(f"table_schema LIKE '{filter.schema_pattern}'") + if not filter.include_views: + conditions.append("table_type = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM information_schema.tables + WHERE {where_clause} + ORDER BY table_schema, table_name + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns for all tables + columns_sql = f""" + SELECT + table_schema, + table_name, + column_name, + data_type, + is_nullable, + column_default, + ordinal_position + FROM information_schema.columns + WHERE {where_clause} + ORDER BY table_schema, table_name, ordinal_position + """ + columns_result = await self.execute_query(columns_sql) + + # Get primary keys + pk_sql = f""" + SELECT + kcu.table_schema, + kcu.table_name, + kcu.column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + WHERE tc.constraint_type = 'PRIMARY KEY' + AND {where_clause.replace('table_schema', 'tc.table_schema') + .replace('table_name', 'tc.table_name') + .replace('table_type', "'BASE TABLE'")} + """ + try: + pk_result = await self.execute_query(pk_sql) + pk_set = { + (row["table_schema"], row["table_name"], row["column_name"]) + for row in pk_result.rows + } + except Exception: + pk_set = set() + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{schema_name}.{table_name}", + "columns": [], + } + + # Add columns + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + is_pk = (schema_name, table_name, row["column_name"]) in pk_set + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.POSTGRESQL), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": is_pk, + "is_partition_key": False, + "default_value": row["column_default"], + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": self._config.get("database", "default"), + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "postgres", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch PostgreSQL schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build PostgreSQL-specific sampling query using TABLESAMPLE.""" + # Use TABLESAMPLE SYSTEM for larger tables, random for smaller + return f""" + SELECT * FROM {table} + TABLESAMPLE SYSTEM (10) + LIMIT {n} + """ diff --git a/backend/src/dataing/adapters/datasource/sql/redshift.py b/backend/src/dataing/adapters/datasource/sql/redshift.py new file mode 100644 index 000000000..912c29f16 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/redshift.py @@ -0,0 +1,450 @@ +"""Amazon Redshift adapter implementation. + +This module provides an Amazon Redshift adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +REDSHIFT_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="ssl", label="SSL/TLS", collapsed_by_default=True), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="host", + label="Host", + type="string", + required=True, + group="connection", + placeholder="cluster-name.region.redshift.amazonaws.com", + description="Redshift cluster endpoint", + ), + ConfigField( + name="port", + label="Port", + type="integer", + required=True, + group="connection", + default_value=5439, + min_value=1, + max_value=65535, + ), + ConfigField( + name="database", + label="Database", + type="string", + required=True, + group="connection", + placeholder="dev", + description="Name of the database to connect to", + ), + ConfigField( + name="username", + label="Username", + type="string", + required=True, + group="auth", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=True, + group="auth", + ), + ConfigField( + name="ssl_mode", + label="SSL Mode", + type="enum", + required=False, + group="ssl", + default_value="require", + options=[ + {"value": "disable", "label": "Disable"}, + {"value": "require", "label": "Require"}, + {"value": "verify-ca", "label": "Verify CA"}, + {"value": "verify-full", "label": "Verify Full"}, + ], + ), + ConfigField( + name="connection_timeout", + label="Connection Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=30, + min_value=5, + max_value=300, + ), + ConfigField( + name="schemas", + label="Schemas to Include", + type="string", + required=False, + group="advanced", + placeholder="public,analytics", + description="Comma-separated list of schemas to include (default: all)", + ), + ], +) + +REDSHIFT_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=10, +) + + +@register_adapter( + source_type=SourceType.REDSHIFT, + display_name="Amazon Redshift", + category=SourceCategory.DATABASE, + icon="redshift", + description="Connect to Amazon Redshift data warehouses", + capabilities=REDSHIFT_CAPABILITIES, + config_schema=REDSHIFT_CONFIG_SCHEMA, +) +class RedshiftAdapter(SQLAdapter): + """Amazon Redshift database adapter. + + Provides full schema discovery and query execution for Redshift clusters. + Uses asyncpg for connection as Redshift is PostgreSQL-compatible. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Redshift adapter. + + Args: + config: Configuration dictionary with: + - host: Cluster endpoint + - port: Server port (default: 5439) + - database: Database name + - username: Username + - password: Password + - ssl_mode: SSL mode (optional) + - connection_timeout: Timeout in seconds (optional) + - schemas: Comma-separated schemas to include (optional) + """ + super().__init__(config) + self._pool: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.REDSHIFT + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return REDSHIFT_CAPABILITIES + + def _build_dsn(self) -> str: + """Build PostgreSQL-compatible DSN from config.""" + host = self._config.get("host", "localhost") + port = self._config.get("port", 5439) + database = self._config.get("database", "dev") + username = self._config.get("username", "") + password = self._config.get("password", "") + ssl_mode = self._config.get("ssl_mode", "require") + + return f"postgresql://{username}:{password}@{host}:{port}/{database}?sslmode={ssl_mode}" + + async def connect(self) -> None: + """Establish connection to Redshift.""" + try: + import asyncpg + except ImportError as e: + raise ConnectionFailedError( + message="asyncpg is not installed. Install with: pip install asyncpg", + details={"error": str(e)}, + ) from e + + try: + timeout = self._config.get("connection_timeout", 30) + self._pool = await asyncpg.create_pool( + self._build_dsn(), + min_size=1, + max_size=10, + command_timeout=timeout, + ) + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "password" in error_str or "authentication" in error_str: + raise AuthenticationFailedError( + message="Authentication failed for Redshift", + details={"error": str(e)}, + ) from e + elif "timeout" in error_str: + raise ConnectionTimeoutError( + message="Connection to Redshift timed out", + timeout_seconds=self._config.get("connection_timeout", 30), + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to Redshift: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Redshift connection pool.""" + if self._pool: + await self._pool.close() + self._pool = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Redshift connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + async with self._pool.acquire() as conn: + result = await conn.fetchrow("SELECT version()") + version = result[0] if result else "Unknown" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=version, + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to Redshift") + + start_time = time.time() + try: + async with self._pool.acquire() as conn: + await conn.execute(f"SET statement_timeout = {timeout_seconds * 1000}") + rows = await conn.fetch(sql) + execution_time_ms = int((time.time() - start_time) * 1000) + + if not rows: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [{"name": key, "data_type": "string"} for key in rows[0].keys()] + row_dicts = [dict(row) for row in rows] + + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "permission denied" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "canceling statement" in error_str or "timeout" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Redshift schema.""" + if not self._connected or not self._pool: + raise ConnectionFailedError(message="Not connected to Redshift") + + try: + conditions = ["table_schema NOT IN ('pg_catalog', 'information_schema', 'pg_internal')"] + if filter: + if filter.table_pattern: + conditions.append(f"table_name LIKE '{filter.table_pattern}'") + if filter.schema_pattern: + conditions.append(f"table_schema LIKE '{filter.schema_pattern}'") + if not filter.include_views: + conditions.append("table_type = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM information_schema.tables + WHERE {where_clause} + ORDER BY table_schema, table_name + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + columns_sql = f""" + SELECT + table_schema, + table_name, + column_name, + data_type, + is_nullable, + column_default, + ordinal_position + FROM information_schema.columns + WHERE {where_clause} + ORDER BY table_schema, table_name, ordinal_position + """ + columns_result = await self.execute_query(columns_sql) + + pk_sql = """ + SELECT + schemaname as table_schema, + tablename as table_name, + columnname as column_name + FROM svv_table_info ti + JOIN pg_attribute a ON ti.table_id = a.attrelid + WHERE a.attnum > 0 + AND a.attisdropped = false + """ + try: + pk_result = await self.execute_query(pk_sql) + pk_set = { + (row["table_schema"], row["table_name"], row["column_name"]) + for row in pk_result.rows + } + except Exception: + pk_set = set() + + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{schema_name}.{table_name}", + "columns": [], + } + + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + is_pk = (schema_name, table_name, row["column_name"]) in pk_set + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.REDSHIFT), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": is_pk, + "is_partition_key": False, + "default_value": row["column_default"], + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + catalogs = [ + { + "name": self._config.get("database", "default"), + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "redshift", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Redshift schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build Redshift-specific sampling query.""" + return f"SELECT * FROM {table} ORDER BY RANDOM() LIMIT {n}" diff --git a/backend/src/dataing/adapters/datasource/sql/snowflake.py b/backend/src/dataing/adapters/datasource/sql/snowflake.py new file mode 100644 index 000000000..4caa88e0d --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/snowflake.py @@ -0,0 +1,478 @@ +"""Snowflake adapter implementation. + +This module provides a Snowflake adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +SNOWFLAKE_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="account", + label="Account", + type="string", + required=True, + group="connection", + placeholder="xy12345.us-east-1", + description="Snowflake account identifier (e.g., xy12345.us-east-1)", + ), + ConfigField( + name="warehouse", + label="Warehouse", + type="string", + required=True, + group="connection", + placeholder="COMPUTE_WH", + description="Virtual warehouse to use", + ), + ConfigField( + name="database", + label="Database", + type="string", + required=True, + group="connection", + placeholder="MY_DATABASE", + ), + ConfigField( + name="schema", + label="Schema", + type="string", + required=False, + group="connection", + placeholder="PUBLIC", + default_value="PUBLIC", + ), + ConfigField( + name="user", + label="User", + type="string", + required=True, + group="auth", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=True, + group="auth", + ), + ConfigField( + name="role", + label="Role", + type="string", + required=False, + group="advanced", + placeholder="ACCOUNTADMIN", + description="Role to use for the session", + ), + ConfigField( + name="login_timeout", + label="Login Timeout (seconds)", + type="integer", + required=False, + group="advanced", + default_value=60, + min_value=10, + max_value=300, + ), + ], +) + +SNOWFLAKE_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=10, +) + + +@register_adapter( + source_type=SourceType.SNOWFLAKE, + display_name="Snowflake", + category=SourceCategory.DATABASE, + icon="snowflake", + description="Connect to Snowflake data warehouse for analytics and querying", + capabilities=SNOWFLAKE_CAPABILITIES, + config_schema=SNOWFLAKE_CONFIG_SCHEMA, +) +class SnowflakeAdapter(SQLAdapter): + """Snowflake database adapter. + + Provides full schema discovery and query execution for Snowflake. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Snowflake adapter. + + Args: + config: Configuration dictionary with: + - account: Snowflake account identifier + - warehouse: Virtual warehouse + - database: Database name + - schema: Schema name (optional) + - user: Username + - password: Password + - role: Role (optional) + - login_timeout: Timeout in seconds (optional) + """ + super().__init__(config) + self._conn: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.SNOWFLAKE + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return SNOWFLAKE_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to Snowflake.""" + try: + import snowflake.connector + except ImportError as e: + raise ConnectionFailedError( + message="snowflake-connector-python not installed. pip install it", + details={"error": str(e)}, + ) from e + + try: + account = self._config.get("account", "") + user = self._config.get("user", "") + password = self._config.get("password", "") + warehouse = self._config.get("warehouse", "") + database = self._config.get("database", "") + schema = self._config.get("schema", "PUBLIC") + role = self._config.get("role") + login_timeout = self._config.get("login_timeout", 60) + + connect_params = { + "account": account, + "user": user, + "password": password, + "warehouse": warehouse, + "database": database, + "schema": schema, + "login_timeout": login_timeout, + } + + if role: + connect_params["role"] = role + + self._conn = snowflake.connector.connect(**connect_params) + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "incorrect username or password" in error_str or "authentication" in error_str: + raise AuthenticationFailedError( + message="Authentication failed for Snowflake", + details={"error": str(e)}, + ) from e + elif "timeout" in error_str: + raise ConnectionTimeoutError( + message="Connection to Snowflake timed out", + timeout_seconds=self._config.get("login_timeout", 60), + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to Snowflake: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Snowflake connection.""" + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Snowflake connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + cursor = self._conn.cursor() + cursor.execute("SELECT CURRENT_VERSION()") + result = cursor.fetchone() + version = result[0] if result else "Unknown" + cursor.close() + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=f"Snowflake {version}", + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against Snowflake.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to Snowflake") + + start_time = time.time() + cursor = None + try: + cursor = self._conn.cursor() + + # Set query timeout + cursor.execute(f"ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = {timeout_seconds}") + + # Execute query + cursor.execute(sql) + + # Get column info + columns_info = cursor.description + rows = cursor.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [{"name": col[0], "data_type": "string"} for col in columns_info] + column_names = [col[0] for col in columns_info] + + # Convert rows to dicts + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "sql compilation error" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "insufficient privileges" in error_str or "access denied" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "timeout" in error_str or "statement timeout" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + finally: + if cursor: + cursor.close() + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from Snowflake.""" + database = self._config.get("database", "") + schema = self._config.get("schema", "PUBLIC") + + sql = f""" + SELECT + TABLE_CATALOG as table_catalog, + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + TABLE_TYPE as table_type + FROM {database}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = '{schema}' + ORDER BY TABLE_NAME + """ + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Snowflake schema.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to Snowflake") + + try: + database = self._config.get("database", "") + schema = self._config.get("schema", "PUBLIC") + + # Build filter conditions + conditions = [f"TABLE_SCHEMA = '{schema}'"] + if filter: + if filter.table_pattern: + conditions.append(f"TABLE_NAME LIKE '{filter.table_pattern}'") + if filter.schema_pattern: + conditions.append(f"TABLE_SCHEMA LIKE '{filter.schema_pattern}'") + if not filter.include_views: + conditions.append("TABLE_TYPE = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + TABLE_TYPE as table_type, + ROW_COUNT as row_count, + BYTES as size_bytes + FROM {database}.INFORMATION_SCHEMA.TABLES + WHERE {where_clause} + ORDER BY TABLE_NAME + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns + columns_sql = f""" + SELECT + TABLE_SCHEMA as table_schema, + TABLE_NAME as table_name, + COLUMN_NAME as column_name, + DATA_TYPE as data_type, + IS_NULLABLE as is_nullable, + COLUMN_DEFAULT as column_default, + ORDINAL_POSITION as ordinal_position + FROM {database}.INFORMATION_SCHEMA.COLUMNS + WHERE {where_clause} + ORDER BY TABLE_NAME, ORDINAL_POSITION + """ + columns_result = await self.execute_query(columns_sql) + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["TABLE_SCHEMA"] or row.get("table_schema", "") + table_name = row["TABLE_NAME"] or row.get("table_name", "") + table_type_raw = row["TABLE_TYPE"] or row.get("table_type", "") + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{database}.{schema_name}.{table_name}", + "columns": [], + "row_count": row.get("ROW_COUNT") or row.get("row_count"), + "size_bytes": row.get("BYTES") or row.get("size_bytes"), + } + + # Add columns + for row in columns_result.rows: + schema_name = row["TABLE_SCHEMA"] or row.get("table_schema", "") + table_name = row["TABLE_NAME"] or row.get("table_name", "") + if schema_name in schema_map and table_name in schema_map[schema_name]: + col_data = { + "name": row["COLUMN_NAME"] or row.get("column_name", ""), + "data_type": normalize_type( + row["DATA_TYPE"] or row.get("data_type", ""), SourceType.SNOWFLAKE + ), + "native_type": row["DATA_TYPE"] or row.get("data_type", ""), + "nullable": (row["IS_NULLABLE"] or row.get("is_nullable", "YES")) == "YES", + "is_primary_key": False, + "is_partition_key": False, + "default_value": row["COLUMN_DEFAULT"] or row.get("column_default"), + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": database, + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "snowflake", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Snowflake schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build Snowflake-specific sampling query using TABLESAMPLE.""" + return f"SELECT * FROM {table} SAMPLE ({n} ROWS)" diff --git a/backend/src/dataing/adapters/datasource/sql/trino.py b/backend/src/dataing/adapters/datasource/sql/trino.py new file mode 100644 index 000000000..bcd146de0 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/sql/trino.py @@ -0,0 +1,477 @@ +"""Trino adapter implementation. + +This module provides a Trino adapter that implements the unified +data source interface with full schema discovery and query capabilities. +""" + +from __future__ import annotations + +import time +from typing import Any + +from dataing.adapters.datasource.errors import ( + AccessDeniedError, + AuthenticationFailedError, + ConnectionFailedError, + ConnectionTimeoutError, + QuerySyntaxError, + QueryTimeoutError, + SchemaFetchFailedError, +) +from dataing.adapters.datasource.registry import register_adapter +from dataing.adapters.datasource.sql.base import SQLAdapter +from dataing.adapters.datasource.type_mapping import normalize_type +from dataing.adapters.datasource.types import ( + AdapterCapabilities, + ConfigField, + ConfigSchema, + ConnectionTestResult, + FieldGroup, + QueryLanguage, + QueryResult, + SchemaFilter, + SchemaResponse, + SourceCategory, + SourceType, +) + +TRINO_CONFIG_SCHEMA = ConfigSchema( + field_groups=[ + FieldGroup(id="connection", label="Connection", collapsed_by_default=False), + FieldGroup(id="auth", label="Authentication", collapsed_by_default=False), + FieldGroup(id="advanced", label="Advanced", collapsed_by_default=True), + ], + fields=[ + ConfigField( + name="host", + label="Host", + type="string", + required=True, + group="connection", + placeholder="localhost", + description="Trino coordinator hostname or IP address", + ), + ConfigField( + name="port", + label="Port", + type="integer", + required=True, + group="connection", + default_value=8080, + min_value=1, + max_value=65535, + ), + ConfigField( + name="catalog", + label="Catalog", + type="string", + required=True, + group="connection", + placeholder="hive", + description="Default catalog to use", + ), + ConfigField( + name="schema", + label="Schema", + type="string", + required=False, + group="connection", + placeholder="default", + description="Default schema to use", + ), + ConfigField( + name="user", + label="User", + type="string", + required=True, + group="auth", + placeholder="trino", + ), + ConfigField( + name="password", + label="Password", + type="secret", + required=False, + group="auth", + description="Password (if authentication is enabled)", + ), + ConfigField( + name="http_scheme", + label="HTTP Scheme", + type="enum", + required=False, + group="advanced", + default_value="http", + options=[ + {"value": "http", "label": "HTTP"}, + {"value": "https", "label": "HTTPS"}, + ], + ), + ConfigField( + name="verify", + label="Verify SSL", + type="boolean", + required=False, + group="advanced", + default_value=True, + ), + ], +) + +TRINO_CAPABILITIES = AdapterCapabilities( + supports_sql=True, + supports_sampling=True, + supports_row_count=True, + supports_column_stats=True, + supports_preview=True, + supports_write=False, + query_language=QueryLanguage.SQL, + max_concurrent_queries=5, +) + + +@register_adapter( + source_type=SourceType.TRINO, + display_name="Trino", + category=SourceCategory.DATABASE, + icon="trino", + description="Connect to Trino clusters for distributed SQL querying", + capabilities=TRINO_CAPABILITIES, + config_schema=TRINO_CONFIG_SCHEMA, +) +class TrinoAdapter(SQLAdapter): + """Trino database adapter. + + Provides full schema discovery and query execution for Trino clusters. + """ + + def __init__(self, config: dict[str, Any]) -> None: + """Initialize Trino adapter. + + Args: + config: Configuration dictionary with: + - host: Coordinator hostname + - port: Coordinator port + - catalog: Default catalog + - schema: Default schema (optional) + - user: Username + - password: Password (optional) + - http_scheme: http or https (optional) + - verify: Verify SSL certificates (optional) + """ + super().__init__(config) + self._conn: Any = None + self._cursor: Any = None + self._source_id: str = "" + + @property + def source_type(self) -> SourceType: + """Get the source type for this adapter.""" + return SourceType.TRINO + + @property + def capabilities(self) -> AdapterCapabilities: + """Get the capabilities of this adapter.""" + return TRINO_CAPABILITIES + + async def connect(self) -> None: + """Establish connection to Trino.""" + try: + from trino.auth import BasicAuthentication + from trino.dbapi import connect + except ImportError as e: + raise ConnectionFailedError( + message="trino is not installed. Install with: pip install trino", + details={"error": str(e)}, + ) from e + + try: + host = self._config.get("host", "localhost") + port = self._config.get("port", 8080) + catalog = self._config.get("catalog", "hive") + schema = self._config.get("schema", "default") + user = self._config.get("user", "trino") + password = self._config.get("password") + http_scheme = self._config.get("http_scheme", "http") + verify = self._config.get("verify", True) + + auth = None + if password: + auth = BasicAuthentication(user, password) + + self._conn = connect( + host=host, + port=port, + user=user, + catalog=catalog, + schema=schema, + http_scheme=http_scheme, + auth=auth, + verify=verify, + ) + self._connected = True + except Exception as e: + error_str = str(e).lower() + if "authentication" in error_str or "401" in error_str: + raise AuthenticationFailedError( + message="Authentication failed for Trino", + details={"error": str(e)}, + ) from e + elif "connection refused" in error_str or "timeout" in error_str: + raise ConnectionTimeoutError( + message="Connection to Trino timed out", + ) from e + else: + raise ConnectionFailedError( + message=f"Failed to connect to Trino: {str(e)}", + details={"error": str(e)}, + ) from e + + async def disconnect(self) -> None: + """Close Trino connection.""" + if self._cursor: + self._cursor.close() + self._cursor = None + if self._conn: + self._conn.close() + self._conn = None + self._connected = False + + async def test_connection(self) -> ConnectionTestResult: + """Test Trino connectivity.""" + start_time = time.time() + try: + if not self._connected: + await self.connect() + + cursor = self._conn.cursor() + cursor.execute("SELECT 'test'") + cursor.fetchall() + cursor.close() + + # Get server info + catalog = self._config.get("catalog", "") + version = f"Trino (catalog: {catalog})" + + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=True, + latency_ms=latency_ms, + server_version=version, + message="Connection successful", + ) + except Exception as e: + latency_ms = int((time.time() - start_time) * 1000) + return ConnectionTestResult( + success=False, + latency_ms=latency_ms, + message=str(e), + error_code="CONNECTION_FAILED", + ) + + async def execute_query( + self, + sql: str, + params: dict[str, Any] | None = None, + timeout_seconds: int = 30, + limit: int | None = None, + ) -> QueryResult: + """Execute a SQL query against Trino.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to Trino") + + start_time = time.time() + cursor = None + try: + cursor = self._conn.cursor() + cursor.execute(sql) + + # Get column info + columns_info = cursor.description + rows = cursor.fetchall() + + execution_time_ms = int((time.time() - start_time) * 1000) + + if not columns_info: + return QueryResult( + columns=[], + rows=[], + row_count=0, + execution_time_ms=execution_time_ms, + ) + + columns = [{"name": col[0], "data_type": "string"} for col in columns_info] + column_names = [col[0] for col in columns_info] + + # Convert rows to dicts + row_dicts = [dict(zip(column_names, row, strict=False)) for row in rows] + + # Apply limit if needed + truncated = False + if limit and len(row_dicts) > limit: + row_dicts = row_dicts[:limit] + truncated = True + + return QueryResult( + columns=columns, + rows=row_dicts, + row_count=len(row_dicts), + truncated=truncated, + execution_time_ms=execution_time_ms, + ) + + except Exception as e: + error_str = str(e).lower() + if "syntax error" in error_str or "mismatched input" in error_str: + raise QuerySyntaxError( + message=str(e), + query=sql[:200], + ) from e + elif "permission denied" in error_str or "access denied" in error_str: + raise AccessDeniedError( + message=str(e), + ) from e + elif "timeout" in error_str or "exceeded" in error_str: + raise QueryTimeoutError( + message=str(e), + timeout_seconds=timeout_seconds, + ) from e + else: + raise + finally: + if cursor: + cursor.close() + + async def _fetch_table_metadata(self) -> list[dict[str, Any]]: + """Fetch table metadata from Trino.""" + catalog = self._config.get("catalog", "hive") + schema = self._config.get("schema", "default") + + sql = f""" + SELECT + table_catalog, + table_schema, + table_name, + table_type + FROM {catalog}.information_schema.tables + WHERE table_schema = '{schema}' + ORDER BY table_name + """ + result = await self.execute_query(sql) + return list(result.rows) + + async def get_schema( + self, + filter: SchemaFilter | None = None, + ) -> SchemaResponse: + """Get Trino schema.""" + if not self._connected or not self._conn: + raise ConnectionFailedError(message="Not connected to Trino") + + try: + catalog = self._config.get("catalog", "hive") + schema = self._config.get("schema", "default") + + # Build filter conditions + conditions = [f"table_schema = '{schema}'"] + if filter: + if filter.table_pattern: + conditions.append(f"table_name LIKE '{filter.table_pattern}'") + if filter.schema_pattern: + conditions.append(f"table_schema LIKE '{filter.schema_pattern}'") + if not filter.include_views: + conditions.append("table_type = 'BASE TABLE'") + + where_clause = " AND ".join(conditions) + limit_clause = f"LIMIT {filter.max_tables}" if filter else "LIMIT 1000" + + # Get tables + tables_sql = f""" + SELECT + table_schema, + table_name, + table_type + FROM {catalog}.information_schema.tables + WHERE {where_clause} + ORDER BY table_name + {limit_clause} + """ + tables_result = await self.execute_query(tables_sql) + + # Get columns + columns_sql = f""" + SELECT + table_schema, + table_name, + column_name, + data_type, + is_nullable, + ordinal_position + FROM {catalog}.information_schema.columns + WHERE {where_clause} + ORDER BY table_name, ordinal_position + """ + columns_result = await self.execute_query(columns_sql) + + # Organize into schema response + schema_map: dict[str, dict[str, dict[str, Any]]] = {} + for row in tables_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + table_type_raw = row["table_type"] + + table_type = "view" if "view" in table_type_raw.lower() else "table" + + if schema_name not in schema_map: + schema_map[schema_name] = {} + schema_map[schema_name][table_name] = { + "name": table_name, + "table_type": table_type, + "native_type": table_type_raw, + "native_path": f"{catalog}.{schema_name}.{table_name}", + "columns": [], + } + + # Add columns + for row in columns_result.rows: + schema_name = row["table_schema"] + table_name = row["table_name"] + if schema_name in schema_map and table_name in schema_map[schema_name]: + col_data = { + "name": row["column_name"], + "data_type": normalize_type(row["data_type"], SourceType.TRINO), + "native_type": row["data_type"], + "nullable": row["is_nullable"] == "YES", + "is_primary_key": False, + "is_partition_key": False, + } + schema_map[schema_name][table_name]["columns"].append(col_data) + + # Build catalog structure + catalogs = [ + { + "name": catalog, + "schemas": [ + { + "name": schema_name, + "tables": list(tables.values()), + } + for schema_name, tables in schema_map.items() + ], + } + ] + + return self._build_schema_response( + source_id=self._source_id or "trino", + catalogs=catalogs, + ) + + except Exception as e: + raise SchemaFetchFailedError( + message=f"Failed to fetch Trino schema: {str(e)}", + details={"error": str(e)}, + ) from e + + def _build_sample_query(self, table: str, n: int) -> str: + """Build Trino-specific sampling query using TABLESAMPLE.""" + return f"SELECT * FROM {table} TABLESAMPLE BERNOULLI(10) LIMIT {n}" diff --git a/backend/src/dataing/adapters/datasource/type_mapping.py b/backend/src/dataing/adapters/datasource/type_mapping.py new file mode 100644 index 000000000..166045f30 --- /dev/null +++ b/backend/src/dataing/adapters/datasource/type_mapping.py @@ -0,0 +1,495 @@ +"""Type normalization mappings for all data sources. + +This module provides mappings from native data types to normalized types, +ensuring consistent type representation across all source types. +""" + +from __future__ import annotations + +import re + +from dataing.adapters.datasource.types import NormalizedType, SourceType + +# PostgreSQL type mappings +POSTGRESQL_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "varchar": NormalizedType.STRING, + "character varying": NormalizedType.STRING, + "text": NormalizedType.STRING, + "char": NormalizedType.STRING, + "character": NormalizedType.STRING, + "name": NormalizedType.STRING, + "uuid": NormalizedType.STRING, + "citext": NormalizedType.STRING, + # Integer types + "smallint": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "int": NormalizedType.INTEGER, + "int2": NormalizedType.INTEGER, + "int4": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + "int8": NormalizedType.INTEGER, + "serial": NormalizedType.INTEGER, + "bigserial": NormalizedType.INTEGER, + "smallserial": NormalizedType.INTEGER, + # Float types + "real": NormalizedType.FLOAT, + "float4": NormalizedType.FLOAT, + "double precision": NormalizedType.FLOAT, + "float8": NormalizedType.FLOAT, + # Decimal types + "numeric": NormalizedType.DECIMAL, + "decimal": NormalizedType.DECIMAL, + "money": NormalizedType.DECIMAL, + # Boolean + "boolean": NormalizedType.BOOLEAN, + "bool": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "time without time zone": NormalizedType.TIME, + "time with time zone": NormalizedType.TIME, + "timestamp": NormalizedType.TIMESTAMP, + "timestamp without time zone": NormalizedType.TIMESTAMP, + "timestamp with time zone": NormalizedType.TIMESTAMP, + "timestamptz": NormalizedType.TIMESTAMP, + "interval": NormalizedType.STRING, + # Binary + "bytea": NormalizedType.BINARY, + # JSON types + "json": NormalizedType.JSON, + "jsonb": NormalizedType.JSON, + # Array type (handled specially) + "array": NormalizedType.ARRAY, + # Geometric types (map to string for now) + "point": NormalizedType.STRING, + "line": NormalizedType.STRING, + "lseg": NormalizedType.STRING, + "box": NormalizedType.STRING, + "path": NormalizedType.STRING, + "polygon": NormalizedType.STRING, + "circle": NormalizedType.STRING, + # Network types + "inet": NormalizedType.STRING, + "cidr": NormalizedType.STRING, + "macaddr": NormalizedType.STRING, + "macaddr8": NormalizedType.STRING, + # Bit strings + "bit": NormalizedType.STRING, + "bit varying": NormalizedType.STRING, + # Other + "xml": NormalizedType.STRING, + "oid": NormalizedType.INTEGER, +} + +# MySQL type mappings +MYSQL_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "varchar": NormalizedType.STRING, + "char": NormalizedType.STRING, + "text": NormalizedType.STRING, + "tinytext": NormalizedType.STRING, + "mediumtext": NormalizedType.STRING, + "longtext": NormalizedType.STRING, + "enum": NormalizedType.STRING, + "set": NormalizedType.STRING, + # Integer types + "tinyint": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "mediumint": NormalizedType.INTEGER, + "int": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + # Float types + "float": NormalizedType.FLOAT, + "double": NormalizedType.FLOAT, + "double precision": NormalizedType.FLOAT, + # Decimal types + "decimal": NormalizedType.DECIMAL, + "numeric": NormalizedType.DECIMAL, + # Boolean (MySQL uses TINYINT(1)) + "bit": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "datetime": NormalizedType.DATETIME, + "timestamp": NormalizedType.TIMESTAMP, + "year": NormalizedType.INTEGER, + # Binary types + "binary": NormalizedType.BINARY, + "varbinary": NormalizedType.BINARY, + "tinyblob": NormalizedType.BINARY, + "blob": NormalizedType.BINARY, + "mediumblob": NormalizedType.BINARY, + "longblob": NormalizedType.BINARY, + # JSON + "json": NormalizedType.JSON, + # Spatial types + "geometry": NormalizedType.STRING, + "point": NormalizedType.STRING, + "linestring": NormalizedType.STRING, + "polygon": NormalizedType.STRING, +} + +# Snowflake type mappings +SNOWFLAKE_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "varchar": NormalizedType.STRING, + "char": NormalizedType.STRING, + "character": NormalizedType.STRING, + "string": NormalizedType.STRING, + "text": NormalizedType.STRING, + # Integer types + "number": NormalizedType.DECIMAL, # NUMBER can be decimal + "int": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "tinyint": NormalizedType.INTEGER, + "byteint": NormalizedType.INTEGER, + # Float types + "float": NormalizedType.FLOAT, + "float4": NormalizedType.FLOAT, + "float8": NormalizedType.FLOAT, + "double": NormalizedType.FLOAT, + "double precision": NormalizedType.FLOAT, + "real": NormalizedType.FLOAT, + # Decimal types + "decimal": NormalizedType.DECIMAL, + "numeric": NormalizedType.DECIMAL, + # Boolean + "boolean": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "datetime": NormalizedType.DATETIME, + "timestamp": NormalizedType.TIMESTAMP, + "timestamp_ntz": NormalizedType.TIMESTAMP, + "timestamp_ltz": NormalizedType.TIMESTAMP, + "timestamp_tz": NormalizedType.TIMESTAMP, + # Binary + "binary": NormalizedType.BINARY, + "varbinary": NormalizedType.BINARY, + # Semi-structured types + "variant": NormalizedType.JSON, + "object": NormalizedType.MAP, + "array": NormalizedType.ARRAY, + # Geography + "geography": NormalizedType.STRING, + "geometry": NormalizedType.STRING, +} + +# BigQuery type mappings +BIGQUERY_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "string": NormalizedType.STRING, + "bytes": NormalizedType.BINARY, + # Integer types + "int64": NormalizedType.INTEGER, + "int": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + "tinyint": NormalizedType.INTEGER, + "byteint": NormalizedType.INTEGER, + # Float types + "float64": NormalizedType.FLOAT, + "float": NormalizedType.FLOAT, + # Decimal types + "numeric": NormalizedType.DECIMAL, + "bignumeric": NormalizedType.DECIMAL, + "decimal": NormalizedType.DECIMAL, + "bigdecimal": NormalizedType.DECIMAL, + # Boolean + "bool": NormalizedType.BOOLEAN, + "boolean": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "datetime": NormalizedType.DATETIME, + "timestamp": NormalizedType.TIMESTAMP, + # Complex types + "struct": NormalizedType.STRUCT, + "record": NormalizedType.STRUCT, + "array": NormalizedType.ARRAY, + "json": NormalizedType.JSON, + # Geography + "geography": NormalizedType.STRING, + "interval": NormalizedType.STRING, +} + +# Trino type mappings (similar to Presto) +TRINO_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "varchar": NormalizedType.STRING, + "char": NormalizedType.STRING, + "varbinary": NormalizedType.BINARY, + "json": NormalizedType.JSON, + # Integer types + "tinyint": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + # Float types + "real": NormalizedType.FLOAT, + "double": NormalizedType.FLOAT, + # Decimal types + "decimal": NormalizedType.DECIMAL, + # Boolean + "boolean": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "time with time zone": NormalizedType.TIME, + "timestamp": NormalizedType.TIMESTAMP, + "timestamp with time zone": NormalizedType.TIMESTAMP, + "interval year to month": NormalizedType.STRING, + "interval day to second": NormalizedType.STRING, + # Complex types + "array": NormalizedType.ARRAY, + "map": NormalizedType.MAP, + "row": NormalizedType.STRUCT, + # Other + "uuid": NormalizedType.STRING, + "ipaddress": NormalizedType.STRING, +} + +# DuckDB type mappings +DUCKDB_TYPE_MAP: dict[str, NormalizedType] = { + # String types + "varchar": NormalizedType.STRING, + "char": NormalizedType.STRING, + "bpchar": NormalizedType.STRING, + "text": NormalizedType.STRING, + "string": NormalizedType.STRING, + "uuid": NormalizedType.STRING, + # Integer types + "tinyint": NormalizedType.INTEGER, + "smallint": NormalizedType.INTEGER, + "integer": NormalizedType.INTEGER, + "int": NormalizedType.INTEGER, + "bigint": NormalizedType.INTEGER, + "hugeint": NormalizedType.INTEGER, + "utinyint": NormalizedType.INTEGER, + "usmallint": NormalizedType.INTEGER, + "uinteger": NormalizedType.INTEGER, + "ubigint": NormalizedType.INTEGER, + # Float types + "real": NormalizedType.FLOAT, + "float": NormalizedType.FLOAT, + "double": NormalizedType.FLOAT, + # Decimal types + "decimal": NormalizedType.DECIMAL, + "numeric": NormalizedType.DECIMAL, + # Boolean + "boolean": NormalizedType.BOOLEAN, + "bool": NormalizedType.BOOLEAN, + # Date/Time types + "date": NormalizedType.DATE, + "time": NormalizedType.TIME, + "timestamp": NormalizedType.TIMESTAMP, + "timestamptz": NormalizedType.TIMESTAMP, + "timestamp with time zone": NormalizedType.TIMESTAMP, + "interval": NormalizedType.STRING, + # Binary + "blob": NormalizedType.BINARY, + "bytea": NormalizedType.BINARY, + # Complex types + "list": NormalizedType.ARRAY, + "struct": NormalizedType.STRUCT, + "map": NormalizedType.MAP, + "json": NormalizedType.JSON, +} + +# MongoDB type mappings +MONGODB_TYPE_MAP: dict[str, NormalizedType] = { + "string": NormalizedType.STRING, + "int": NormalizedType.INTEGER, + "int32": NormalizedType.INTEGER, + "long": NormalizedType.INTEGER, + "int64": NormalizedType.INTEGER, + "double": NormalizedType.FLOAT, + "decimal": NormalizedType.DECIMAL, + "decimal128": NormalizedType.DECIMAL, + "bool": NormalizedType.BOOLEAN, + "boolean": NormalizedType.BOOLEAN, + "date": NormalizedType.TIMESTAMP, + "timestamp": NormalizedType.TIMESTAMP, + "objectid": NormalizedType.STRING, + "object": NormalizedType.JSON, + "array": NormalizedType.ARRAY, + "bindata": NormalizedType.BINARY, + "null": NormalizedType.UNKNOWN, + "regex": NormalizedType.STRING, + "javascript": NormalizedType.STRING, + "symbol": NormalizedType.STRING, + "minkey": NormalizedType.STRING, + "maxkey": NormalizedType.STRING, +} + +# DynamoDB type mappings +DYNAMODB_TYPE_MAP: dict[str, NormalizedType] = { + "s": NormalizedType.STRING, # String + "n": NormalizedType.DECIMAL, # Number + "b": NormalizedType.BINARY, # Binary + "bool": NormalizedType.BOOLEAN, + "null": NormalizedType.UNKNOWN, + "m": NormalizedType.MAP, # Map + "l": NormalizedType.ARRAY, # List + "ss": NormalizedType.ARRAY, # String Set + "ns": NormalizedType.ARRAY, # Number Set + "bs": NormalizedType.ARRAY, # Binary Set +} + +# Salesforce type mappings +SALESFORCE_TYPE_MAP: dict[str, NormalizedType] = { + "id": NormalizedType.STRING, + "string": NormalizedType.STRING, + "textarea": NormalizedType.STRING, + "phone": NormalizedType.STRING, + "email": NormalizedType.STRING, + "url": NormalizedType.STRING, + "picklist": NormalizedType.STRING, + "multipicklist": NormalizedType.STRING, + "combobox": NormalizedType.STRING, + "reference": NormalizedType.STRING, + "int": NormalizedType.INTEGER, + "double": NormalizedType.DECIMAL, + "currency": NormalizedType.DECIMAL, + "percent": NormalizedType.DECIMAL, + "boolean": NormalizedType.BOOLEAN, + "date": NormalizedType.DATE, + "datetime": NormalizedType.TIMESTAMP, + "time": NormalizedType.TIME, + "base64": NormalizedType.BINARY, + "location": NormalizedType.JSON, + "address": NormalizedType.JSON, + "encryptedstring": NormalizedType.STRING, +} + +# HubSpot type mappings +HUBSPOT_TYPE_MAP: dict[str, NormalizedType] = { + "string": NormalizedType.STRING, + "number": NormalizedType.DECIMAL, + "date": NormalizedType.DATE, + "datetime": NormalizedType.TIMESTAMP, + "enumeration": NormalizedType.STRING, + "bool": NormalizedType.BOOLEAN, + "phone_number": NormalizedType.STRING, +} + +# Parquet/Arrow type mappings (for file systems) +PARQUET_TYPE_MAP: dict[str, NormalizedType] = { + "utf8": NormalizedType.STRING, + "string": NormalizedType.STRING, + "large_string": NormalizedType.STRING, + "int8": NormalizedType.INTEGER, + "int16": NormalizedType.INTEGER, + "int32": NormalizedType.INTEGER, + "int64": NormalizedType.INTEGER, + "uint8": NormalizedType.INTEGER, + "uint16": NormalizedType.INTEGER, + "uint32": NormalizedType.INTEGER, + "uint64": NormalizedType.INTEGER, + "float": NormalizedType.FLOAT, + "float16": NormalizedType.FLOAT, + "float32": NormalizedType.FLOAT, + "double": NormalizedType.FLOAT, + "float64": NormalizedType.FLOAT, + "decimal": NormalizedType.DECIMAL, + "decimal128": NormalizedType.DECIMAL, + "decimal256": NormalizedType.DECIMAL, + "bool": NormalizedType.BOOLEAN, + "boolean": NormalizedType.BOOLEAN, + "date": NormalizedType.DATE, + "date32": NormalizedType.DATE, + "date64": NormalizedType.DATE, + "time": NormalizedType.TIME, + "time32": NormalizedType.TIME, + "time64": NormalizedType.TIME, + "timestamp": NormalizedType.TIMESTAMP, + "binary": NormalizedType.BINARY, + "large_binary": NormalizedType.BINARY, + "fixed_size_binary": NormalizedType.BINARY, + "list": NormalizedType.ARRAY, + "large_list": NormalizedType.ARRAY, + "fixed_size_list": NormalizedType.ARRAY, + "map": NormalizedType.MAP, + "struct": NormalizedType.STRUCT, + "dictionary": NormalizedType.STRING, + "null": NormalizedType.UNKNOWN, +} + +# Master mapping from source type to type map +SOURCE_TYPE_MAPS: dict[SourceType, dict[str, NormalizedType]] = { + SourceType.POSTGRESQL: POSTGRESQL_TYPE_MAP, + SourceType.MYSQL: MYSQL_TYPE_MAP, + SourceType.SNOWFLAKE: SNOWFLAKE_TYPE_MAP, + SourceType.BIGQUERY: BIGQUERY_TYPE_MAP, + SourceType.TRINO: TRINO_TYPE_MAP, + SourceType.REDSHIFT: POSTGRESQL_TYPE_MAP, # Redshift is PostgreSQL-based + SourceType.DUCKDB: DUCKDB_TYPE_MAP, + SourceType.MONGODB: MONGODB_TYPE_MAP, + SourceType.DYNAMODB: DYNAMODB_TYPE_MAP, + SourceType.CASSANDRA: POSTGRESQL_TYPE_MAP, # Similar enough + SourceType.SALESFORCE: SALESFORCE_TYPE_MAP, + SourceType.HUBSPOT: HUBSPOT_TYPE_MAP, + SourceType.STRIPE: HUBSPOT_TYPE_MAP, # Similar type system + SourceType.S3: PARQUET_TYPE_MAP, + SourceType.GCS: PARQUET_TYPE_MAP, + SourceType.HDFS: PARQUET_TYPE_MAP, + SourceType.LOCAL_FILE: PARQUET_TYPE_MAP, +} + + +def normalize_type( + native_type: str, + source_type: SourceType, +) -> NormalizedType: + """Normalize a native type to the standard type system. + + Args: + native_type: The native type string from the data source. + source_type: The source type to use for mapping. + + Returns: + Normalized type enum value. + """ + if not native_type: + return NormalizedType.UNKNOWN + + # Get the type map for this source + type_map = SOURCE_TYPE_MAPS.get(source_type, {}) + + # Clean up the native type + clean_type = native_type.lower().strip() + + # Handle array types (e.g., "integer[]", "ARRAY") + if "[]" in clean_type or clean_type.startswith("array"): + return NormalizedType.ARRAY + + # Handle parameterized types (e.g., "varchar(255)", "decimal(10,2)") + base_type = re.sub(r"\(.*\)", "", clean_type).strip() + + # Try exact match first + if base_type in type_map: + return type_map[base_type] + + # Try partial match + for key, value in type_map.items(): + if key in base_type or base_type in key: + return value + + return NormalizedType.UNKNOWN + + +def get_type_map(source_type: SourceType) -> dict[str, NormalizedType]: + """Get the type mapping dictionary for a source type. + + Args: + source_type: The source type. + + Returns: + Dictionary mapping native types to normalized types. + """ + return SOURCE_TYPE_MAPS.get(source_type, {}) diff --git a/backend/src/dataing/adapters/datasource/types.py b/backend/src/dataing/adapters/datasource/types.py new file mode 100644 index 000000000..9e81e306b --- /dev/null +++ b/backend/src/dataing/adapters/datasource/types.py @@ -0,0 +1,365 @@ +"""Type definitions for the unified data source layer. + +This module defines all the data structures used across all adapters, +ensuring consistent JSON output regardless of the underlying source. +""" + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class SourceType(str, Enum): + """Supported data source types.""" + + # SQL Databases + POSTGRESQL = "postgresql" + MYSQL = "mysql" + TRINO = "trino" + SNOWFLAKE = "snowflake" + BIGQUERY = "bigquery" + REDSHIFT = "redshift" + DUCKDB = "duckdb" + + # NoSQL Databases + MONGODB = "mongodb" + DYNAMODB = "dynamodb" + CASSANDRA = "cassandra" + + # APIs + SALESFORCE = "salesforce" + HUBSPOT = "hubspot" + STRIPE = "stripe" + + # File Systems + S3 = "s3" + GCS = "gcs" + HDFS = "hdfs" + LOCAL_FILE = "local_file" + + +class SourceCategory(str, Enum): + """Categories of data sources.""" + + DATABASE = "database" + API = "api" + FILESYSTEM = "filesystem" + + +class NormalizedType(str, Enum): + """Normalized type system that maps all source types.""" + + STRING = "string" + INTEGER = "integer" + FLOAT = "float" + DECIMAL = "decimal" + BOOLEAN = "boolean" + DATE = "date" + DATETIME = "datetime" + TIME = "time" + TIMESTAMP = "timestamp" + BINARY = "binary" + JSON = "json" + ARRAY = "array" + MAP = "map" + STRUCT = "struct" + UNKNOWN = "unknown" + + +class QueryLanguage(str, Enum): + """Query languages supported by adapters.""" + + SQL = "sql" + SOQL = "soql" # Salesforce Object Query Language + MQL = "mql" # MongoDB Query Language + SCAN_ONLY = "scan_only" # No query language, scan only + + +class ColumnStats(BaseModel): + """Statistics for a column.""" + + model_config = ConfigDict(frozen=True) + + null_count: int + null_rate: float + distinct_count: int | None = None + min_value: str | None = None + max_value: str | None = None + sample_values: list[str] = Field(default_factory=list) + + +class Column(BaseModel): + """Unified column representation.""" + + model_config = ConfigDict(frozen=True) + + name: str + data_type: NormalizedType + native_type: str + nullable: bool = True + is_primary_key: bool = False + is_partition_key: bool = False + description: str | None = None + default_value: str | None = None + stats: ColumnStats | None = None + + +class Table(BaseModel): + """Unified table representation.""" + + model_config = ConfigDict(frozen=True) + + name: str + table_type: Literal["table", "view", "external", "object", "collection", "file"] + native_type: str + native_path: str + columns: list[Column] + row_count: int | None = None + size_bytes: int | None = None + last_modified: datetime | None = None + description: str | None = None + + +class Schema(BaseModel): + """Schema within a catalog.""" + + model_config = ConfigDict(frozen=True) + + name: str + tables: list[Table] + + +class Catalog(BaseModel): + """Catalog containing schemas.""" + + model_config = ConfigDict(frozen=True) + + name: str + schemas: list[Schema] + + +class SchemaResponse(BaseModel): + """Unified schema response from any adapter.""" + + model_config = ConfigDict(frozen=True) + + source_id: str + source_type: SourceType + source_category: SourceCategory + fetched_at: datetime + catalogs: list[Catalog] + + def get_all_tables(self) -> list[Table]: + """Get all tables from the nested catalog/schema structure.""" + tables = [] + for catalog in self.catalogs: + for schema in catalog.schemas: + tables.extend(schema.tables) + return tables + + def table_count(self) -> int: + """Count total tables across all catalogs and schemas.""" + return sum(len(schema.tables) for catalog in self.catalogs for schema in catalog.schemas) + + def is_empty(self) -> bool: + """Check if schema has no tables. Used for fail-fast validation.""" + return self.table_count() == 0 + + def to_prompt_string(self, max_tables: int = 10, max_columns: int = 15) -> str: + """Format schema for LLM prompt. + + Args: + max_tables: Maximum tables to include. + max_columns: Maximum columns per table. + + Returns: + Formatted string for LLM consumption. + """ + tables = self.get_all_tables() + if not tables: + return "No tables available." + + lines = ["AVAILABLE TABLES AND COLUMNS (USE ONLY THESE):"] + + for table in tables[:max_tables]: + lines.append(f"\n{table.native_path}") + for col in table.columns[:max_columns]: + lines.append(f" - {col.name} ({col.data_type.value})") + if len(table.columns) > max_columns: + lines.append(f" ... and {len(table.columns) - max_columns} more columns") + + if len(tables) > max_tables: + lines.append(f"\n... and {len(tables) - max_tables} more tables") + + lines.append("\nCRITICAL: Use ONLY the tables and columns listed above.") + lines.append("DO NOT invent tables or columns.") + + return "\n".join(lines) + + def get_table_names(self) -> list[str]: + """Get list of all table names for LLM context.""" + return [table.native_path for table in self.get_all_tables()] + + +class SchemaFilter(BaseModel): + """Filter for schema discovery.""" + + model_config = ConfigDict(frozen=True) + + table_pattern: str | None = None + schema_pattern: str | None = None + catalog_pattern: str | None = None + include_views: bool = True + max_tables: int = 1000 + + +class QueryResult(BaseModel): + """Result of executing a query.""" + + model_config = ConfigDict(frozen=True) + + columns: list[dict[str, Any]] # [{"name": "col", "data_type": "string"}] + rows: list[dict[str, Any]] + row_count: int + truncated: bool = False + execution_time_ms: int | None = None + + def to_summary(self, max_rows: int = 5) -> str: + """Create a summary of the query results for LLM interpretation. + + Args: + max_rows: Maximum number of rows to include in the summary. + + Returns: + Formatted summary string. + """ + if not self.rows: + return "No rows returned" + + col_names = [col.get("name", "?") for col in self.columns] + lines = [f"Columns: {', '.join(col_names)}"] + lines.append(f"Total rows: {self.row_count}") + if self.truncated: + lines.append("(Results truncated)") + lines.append("\nSample rows:") + + for row in self.rows[:max_rows]: + row_str = ", ".join(f"{k}={v}" for k, v in row.items()) + lines.append(f" {row_str}") + + if len(self.rows) > max_rows: + lines.append(f" ... and {len(self.rows) - max_rows} more rows") + + return "\n".join(lines) + + +class ConnectionTestResult(BaseModel): + """Result of testing a connection.""" + + model_config = ConfigDict(frozen=True) + + success: bool + latency_ms: int | None = None + server_version: str | None = None + message: str + error_code: str | None = None + + +class AdapterCapabilities(BaseModel): + """Capabilities of an adapter.""" + + model_config = ConfigDict(frozen=True) + + supports_sql: bool = False + supports_sampling: bool = False + supports_row_count: bool = False + supports_column_stats: bool = False + supports_preview: bool = False + supports_write: bool = False + rate_limit_requests_per_minute: int | None = None + max_concurrent_queries: int = 1 + query_language: QueryLanguage = QueryLanguage.SCAN_ONLY + + +class FieldGroup(BaseModel): + """Group of configuration fields.""" + + model_config = ConfigDict(frozen=True) + + id: str + label: str + description: str | None = None + collapsed_by_default: bool = False + + +class ConfigField(BaseModel): + """Configuration field for connection forms.""" + + model_config = ConfigDict(frozen=True) + + name: str + label: str + type: Literal["string", "integer", "boolean", "enum", "secret", "file", "json"] + required: bool + group: str + default_value: Any | None = None + placeholder: str | None = None + min_value: int | None = None + max_value: int | None = None + pattern: str | None = None + options: list[dict[str, str]] | None = None + show_if: dict[str, Any] | None = None + description: str | None = None + help_url: str | None = None + + +class ConfigSchema(BaseModel): + """Configuration schema for an adapter.""" + + model_config = ConfigDict(frozen=True) + + fields: list[ConfigField] + field_groups: list[FieldGroup] + + +class SourceTypeDefinition(BaseModel): + """Complete definition of a source type.""" + + model_config = ConfigDict(frozen=True) + + type: SourceType + display_name: str + category: SourceCategory + icon: str + description: str + capabilities: AdapterCapabilities + config_schema: ConfigSchema + + +class DataSourceStats(BaseModel): + """Statistics for a data source.""" + + model_config = ConfigDict(frozen=True) + + table_count: int + total_row_count: int | None = None + total_size_bytes: int | None = None + + +class DataSourceResponse(BaseModel): + """Response for a data source.""" + + model_config = ConfigDict(frozen=True) + + id: str + name: str + source_type: SourceType + source_category: SourceCategory + status: Literal["connected", "disconnected", "error"] + created_at: datetime + last_synced_at: datetime | None = None + stats: DataSourceStats | None = None diff --git a/backend/src/dataing/adapters/db/__init__.py b/backend/src/dataing/adapters/db/__init__.py index c0fab5262..798933587 100644 --- a/backend/src/dataing/adapters/db/__init__.py +++ b/backend/src/dataing/adapters/db/__init__.py @@ -1,8 +1,14 @@ -"""Database adapters implementing the DatabaseAdapter protocol.""" +"""Application database adapters. -from .duckdb import DuckDBAdapter +This package contains adapters for the application's own databases, +NOT data source adapters for tenant data. For data source adapters, +see dataing.adapters.datasource. + +Contents: +- app_db: Application metadata database (tenants, data sources, API keys) +""" + +from .app_db import AppDatabase from .mock import MockDatabaseAdapter -from .postgres import PostgresAdapter -from .trino import TrinoAdapter -__all__ = ["PostgresAdapter", "TrinoAdapter", "MockDatabaseAdapter", "DuckDBAdapter"] +__all__ = ["AppDatabase", "MockDatabaseAdapter"] diff --git a/backend/src/dataing/adapters/db/app_db.py b/backend/src/dataing/adapters/db/app_db.py index 3ac30d08b..8a2a7e441 100644 --- a/backend/src/dataing/adapters/db/app_db.py +++ b/backend/src/dataing/adapters/db/app_db.py @@ -1,5 +1,7 @@ """Application database adapter using asyncpg.""" +from __future__ import annotations + import json from collections.abc import AsyncIterator from contextlib import asynccontextmanager @@ -173,6 +175,7 @@ async def list_data_sources(self, tenant_id: UUID) -> list[dict[str, Any]]: """List all data sources for a tenant.""" return await self.fetch_all( """SELECT id, name, type, is_default, is_active, + connection_config_encrypted, last_health_check_at, last_health_check_status, created_at FROM data_sources WHERE tenant_id = $1 AND is_active = true diff --git a/backend/src/dataing/adapters/db/duckdb.py b/backend/src/dataing/adapters/db/duckdb.py deleted file mode 100644 index e76065416..000000000 --- a/backend/src/dataing/adapters/db/duckdb.py +++ /dev/null @@ -1,276 +0,0 @@ -"""DuckDB implementation of DatabaseAdapter. - -Supports two modes: -1. Parquet directory: Auto-registers all .parquet files as views -2. DuckDB file: Opens existing .duckdb database - -Always read-only for safety. -""" - -from __future__ import annotations - -import asyncio -from pathlib import Path -from typing import TYPE_CHECKING - -import duckdb - -from dataing.core.domain_types import QueryResult, SchemaContext, TableSchema - -if TYPE_CHECKING: - pass - - -class DuckDBAdapter: - """DuckDB implementation of DatabaseAdapter. - - Uses DuckDB for fast in-memory analytics, particularly useful - for demo scenarios with parquet files. - - Attributes: - path: Path to .duckdb file or directory of parquet files. - read_only: Always True for safety. - """ - - def __init__(self, path: str, read_only: bool = True) -> None: - """Initialize the DuckDB adapter. - - Args: - path: Path to .duckdb file or directory containing parquet files. - read_only: Whether to open in read-only mode (always True for safety). - """ - self.path = Path(path) - self.read_only = True # Always read-only for safety - self._conn: duckdb.DuckDBPyConnection | None = None - self._is_parquet_dir = False - - async def connect(self) -> None: - """Establish DuckDB connection. - - If path is a directory, creates an in-memory database and - registers all .parquet files as views. - - Should be called during application startup. - """ - # Run in thread pool since DuckDB operations are synchronous - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, self._connect_sync) - - def _connect_sync(self) -> None: - """Synchronous connection logic.""" - if self.path.is_dir(): - # Parquet directory mode - create in-memory database - self._is_parquet_dir = True - self._conn = duckdb.connect(":memory:") - - # Register each .parquet file as a table - for parquet_file in self.path.glob("*.parquet"): - table_name = parquet_file.stem # filename without extension - self._conn.execute( - f"CREATE TABLE {table_name} AS SELECT * FROM read_parquet('{parquet_file}')" - ) - else: - # DuckDB file mode - open in read-only mode - self._conn = duckdb.connect(str(self.path), read_only=True) - - async def close(self) -> None: - """Close DuckDB connection. - - Should be called during application shutdown. - """ - if self._conn: - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, self._conn.close) - self._conn = None - - async def execute_query(self, sql: str, timeout_seconds: int = 30) -> QueryResult: - """Execute a read-only SQL query. - - Args: - sql: The SQL query to execute. - timeout_seconds: Maximum time to wait for query completion. - - Returns: - QueryResult with columns, rows, and row count. - - Raises: - RuntimeError: If connection not initialized. - asyncio.TimeoutError: If query exceeds timeout. - """ - if not self._conn: - raise RuntimeError("Connection not initialized. Call connect() first.") - - loop = asyncio.get_event_loop() - - try: - result = await asyncio.wait_for( - loop.run_in_executor(None, self._execute_query_sync, sql), - timeout=timeout_seconds, - ) - return result - except TimeoutError as err: - raise TimeoutError(f"Query timed out after {timeout_seconds} seconds") from err - - def _execute_query_sync(self, sql: str) -> QueryResult: - """Synchronous query execution.""" - if not self._conn: - raise RuntimeError("Connection not initialized") - - result = self._conn.execute(sql) - rows = result.fetchall() - columns = [desc[0] for desc in result.description] if result.description else [] - - if not rows: - return QueryResult( - columns=tuple(columns), - rows=(), - row_count=0, - ) - - # Convert rows to list of dicts - result_rows = tuple(dict(zip(columns, row, strict=False)) for row in rows) - - return QueryResult( - columns=tuple(columns), - rows=result_rows, - row_count=len(rows), - ) - - async def get_schema(self, table_pattern: str | None = None) -> SchemaContext: - """Discover available tables and columns. - - Args: - table_pattern: Optional pattern to filter tables. - - Returns: - SchemaContext with all discovered tables. - - Raises: - RuntimeError: If connection not initialized. - """ - if not self._conn: - raise RuntimeError("Connection not initialized. Call connect() first.") - - loop = asyncio.get_event_loop() - return await loop.run_in_executor(None, self._get_schema_sync, table_pattern) - - def _get_schema_sync(self, table_pattern: str | None = None) -> SchemaContext: - """Synchronous schema discovery.""" - if not self._conn: - raise RuntimeError("Connection not initialized") - - # Get all tables - tables_result = self._conn.execute("SHOW TABLES").fetchall() - - tables = [] - for (table_name,) in tables_result: - # Apply filter if provided - if table_pattern and table_pattern.lower() not in table_name.lower(): - continue - - # Get column info for each table - columns_result = self._conn.execute(f"DESCRIBE {table_name}").fetchall() - - columns = [] - column_types = {} - for col_info in columns_result: - col_name = col_info[0] - col_type = col_info[1] - columns.append(col_name) - column_types[col_name] = col_type - - tables.append( - TableSchema( - table_name=table_name, - columns=tuple(columns), - column_types=column_types, - ) - ) - - return SchemaContext(tables=tuple(tables)) - - async def get_column_statistics( - self, table_name: str, column_name: str - ) -> dict[str, float | int | str | None]: - """Get statistics for a specific column. - - Args: - table_name: Name of the table. - column_name: Name of the column. - - Returns: - Dictionary with statistics (count, null_count, null_rate, - distinct_count, min, max, avg for numerics). - """ - if not self._conn: - raise RuntimeError("Connection not initialized. Call connect() first.") - - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - None, self._get_column_statistics_sync, table_name, column_name - ) - - def _get_column_statistics_sync( - self, table_name: str, column_name: str - ) -> dict[str, float | int | str | None]: - """Synchronous column statistics.""" - if not self._conn: - raise RuntimeError("Connection not initialized") - - stats: dict[str, float | int | str | None] = {} - - # Basic counts - count_result = self._conn.execute( - f""" - SELECT - COUNT(*) as total_count, - COUNT({column_name}) as non_null_count, - COUNT(*) - COUNT({column_name}) as null_count, - ROUND(100.0 * (COUNT(*) - COUNT({column_name})) / COUNT(*), 2) as null_rate, - APPROX_COUNT_DISTINCT({column_name}) as distinct_count - FROM {table_name} - """ - ).fetchone() - - if count_result: - stats["total_count"] = count_result[0] - stats["non_null_count"] = count_result[1] - stats["null_count"] = count_result[2] - stats["null_rate"] = count_result[3] - stats["distinct_count"] = count_result[4] - - # Try to get min/max/avg for numeric columns - try: - numeric_result = self._conn.execute( - f""" - SELECT - MIN({column_name})::VARCHAR as min_val, - MAX({column_name})::VARCHAR as max_val, - AVG(TRY_CAST({column_name} AS DOUBLE)) as avg_val - FROM {table_name} - """ - ).fetchone() - - if numeric_result: - stats["min"] = numeric_result[0] - stats["max"] = numeric_result[1] - stats["avg"] = numeric_result[2] - except Exception: - # Column might not support numeric operations - pass - - return stats - - async def get_table_row_count(self, table_name: str) -> int: - """Get approximate row count for a table. - - Args: - table_name: Name of the table. - - Returns: - Approximate row count. - """ - result = await self.execute_query(f"SELECT COUNT(*) FROM {table_name}") - if result.rows: - return list(result.rows[0].values())[0] # type: ignore - return 0 diff --git a/backend/src/dataing/adapters/db/mock.py b/backend/src/dataing/adapters/db/mock.py index 0f091ca91..3999fb33d 100644 --- a/backend/src/dataing/adapters/db/mock.py +++ b/backend/src/dataing/adapters/db/mock.py @@ -2,7 +2,19 @@ from __future__ import annotations -from dataing.core.domain_types import QueryResult, SchemaContext, TableSchema +from datetime import UTC, datetime + +from dataing.adapters.datasource.types import ( + Catalog, + Column, + NormalizedType, + QueryResult, + Schema, + SchemaResponse, + SourceCategory, + SourceType, + Table, +) class MockDatabaseAdapter: @@ -21,7 +33,7 @@ class MockDatabaseAdapter: def __init__( self, responses: dict[str, QueryResult] | None = None, - schema: SchemaContext | None = None, + schema: SchemaResponse | None = None, ) -> None: """Initialize the mock adapter. @@ -33,42 +45,114 @@ def __init__( self._mock_schema = schema or self._default_schema() self.executed_queries: list[str] = [] - def _default_schema(self) -> SchemaContext: + def _default_schema(self) -> SchemaResponse: """Create a default mock schema for testing.""" - return SchemaContext( - tables=( - TableSchema( - table_name="public.users", - columns=("id", "email", "created_at", "updated_at"), - column_types={ - "id": "integer", - "email": "varchar", - "created_at": "timestamp", - "updated_at": "timestamp", - }, - ), - TableSchema( - table_name="public.orders", - columns=("id", "user_id", "total", "status", "created_at"), - column_types={ - "id": "integer", - "user_id": "integer", - "total": "numeric", - "status": "varchar", - "created_at": "timestamp", - }, - ), - TableSchema( - table_name="public.products", - columns=("id", "name", "price", "category"), - column_types={ - "id": "integer", - "name": "varchar", - "price": "numeric", - "category": "varchar", - }, - ), - ) + return SchemaResponse( + source_id="mock", + source_type=SourceType.POSTGRESQL, + source_category=SourceCategory.DATABASE, + fetched_at=datetime.now(UTC), + catalogs=[ + Catalog( + name="main", + schemas=[ + Schema( + name="public", + tables=[ + Table( + name="users", + table_type="table", + native_type="table", + native_path="public.users", + columns=[ + Column( + name="id", + data_type=NormalizedType.INTEGER, + native_type="integer", + ), + Column( + name="email", + data_type=NormalizedType.STRING, + native_type="varchar", + ), + Column( + name="created_at", + data_type=NormalizedType.TIMESTAMP, + native_type="timestamp", + ), + Column( + name="updated_at", + data_type=NormalizedType.TIMESTAMP, + native_type="timestamp", + ), + ], + ), + Table( + name="orders", + table_type="table", + native_type="table", + native_path="public.orders", + columns=[ + Column( + name="id", + data_type=NormalizedType.INTEGER, + native_type="integer", + ), + Column( + name="user_id", + data_type=NormalizedType.INTEGER, + native_type="integer", + ), + Column( + name="total", + data_type=NormalizedType.DECIMAL, + native_type="numeric", + ), + Column( + name="status", + data_type=NormalizedType.STRING, + native_type="varchar", + ), + Column( + name="created_at", + data_type=NormalizedType.TIMESTAMP, + native_type="timestamp", + ), + ], + ), + Table( + name="products", + table_type="table", + native_type="table", + native_path="public.products", + columns=[ + Column( + name="id", + data_type=NormalizedType.INTEGER, + native_type="integer", + ), + Column( + name="name", + data_type=NormalizedType.STRING, + native_type="varchar", + ), + Column( + name="price", + data_type=NormalizedType.DECIMAL, + native_type="numeric", + ), + Column( + name="category", + data_type=NormalizedType.STRING, + native_type="varchar", + ), + ], + ), + ], + ) + ], + ) + ], ) async def connect(self) -> None: @@ -100,22 +184,38 @@ async def execute_query(self, sql: str, timeout_seconds: int = 30) -> QueryResul return response # Default empty response - return QueryResult(columns=(), rows=(), row_count=0) + return QueryResult(columns=[], rows=[], row_count=0) - async def get_schema(self, table_pattern: str | None = None) -> SchemaContext: + async def get_schema(self, table_pattern: str | None = None) -> SchemaResponse: """Return mock schema. Args: table_pattern: Optional filter pattern. Returns: - Mock SchemaContext. + Mock SchemaResponse. """ if table_pattern: - filtered_tables = tuple( - t for t in self._mock_schema.tables if table_pattern.lower() in t.table_name.lower() + # Filter tables by pattern + filtered_catalogs = [] + for catalog in self._mock_schema.catalogs: + filtered_schemas = [] + for schema in catalog.schemas: + filtered_tables = [ + t for t in schema.tables if table_pattern.lower() in t.native_path.lower() + ] + if filtered_tables: + filtered_schemas.append(Schema(name=schema.name, tables=filtered_tables)) + if filtered_schemas: + filtered_catalogs.append(Catalog(name=catalog.name, schemas=filtered_schemas)) + + return SchemaResponse( + source_id=self._mock_schema.source_id, + source_type=self._mock_schema.source_type, + source_category=self._mock_schema.source_category, + fetched_at=self._mock_schema.fetched_at, + catalogs=filtered_catalogs, ) - return SchemaContext(tables=filtered_tables) return self._mock_schema def add_response(self, pattern: str, response: QueryResult) -> None: @@ -139,8 +239,8 @@ def add_row_count_response( count: Row count to return. """ self.responses[pattern] = QueryResult( - columns=("count",), - rows=({"count": count},), + columns=[{"name": "count", "data_type": "integer"}], + rows=[{"count": count}], row_count=1, ) diff --git a/backend/src/dataing/adapters/db/postgres.py b/backend/src/dataing/adapters/db/postgres.py deleted file mode 100644 index 2fcf7ea3c..000000000 --- a/backend/src/dataing/adapters/db/postgres.py +++ /dev/null @@ -1,142 +0,0 @@ -"""PostgreSQL implementation of DatabaseAdapter.""" - -from __future__ import annotations - -import asyncio -from typing import TYPE_CHECKING, Any - -import asyncpg - -from dataing.core.domain_types import QueryResult, SchemaContext, TableSchema - -if TYPE_CHECKING: - pass - - -class PostgresAdapter: - """PostgreSQL implementation of DatabaseAdapter. - - Uses asyncpg for async PostgreSQL connections with - connection pooling for efficiency. - - Attributes: - connection_string: PostgreSQL connection URL. - """ - - def __init__(self, connection_string: str) -> None: - """Initialize the Postgres adapter. - - Args: - connection_string: PostgreSQL connection URL. - """ - self.connection_string = connection_string - self._pool: asyncpg.Pool | None = None - - async def connect(self) -> None: - """Establish connection pool. - - Should be called during application startup. - """ - self._pool = await asyncpg.create_pool(self.connection_string) - - async def close(self) -> None: - """Close connection pool. - - Should be called during application shutdown. - """ - if self._pool: - await self._pool.close() - self._pool = None - - async def execute_query(self, sql: str, timeout_seconds: int = 30) -> QueryResult: - """Execute a read-only SQL query. - - Args: - sql: The SQL query to execute. - timeout_seconds: Maximum time to wait for query completion. - - Returns: - QueryResult with columns, rows, and row count. - - Raises: - RuntimeError: If connection pool not initialized. - asyncio.TimeoutError: If query exceeds timeout. - """ - if not self._pool: - raise RuntimeError("Connection pool not initialized. Call connect() first.") - - async with self._pool.acquire() as conn: - rows = await asyncio.wait_for( - conn.fetch(sql), - timeout=timeout_seconds, - ) - - if not rows: - return QueryResult( - columns=(), - rows=(), - row_count=0, - ) - - columns = tuple(rows[0].keys()) - result_rows = tuple(dict(r) for r in rows) - - return QueryResult( - columns=columns, - rows=result_rows, - row_count=len(rows), - ) - - async def get_schema(self, table_pattern: str | None = None) -> SchemaContext: - """Discover available tables and columns. - - Args: - table_pattern: Optional pattern to filter tables. - - Returns: - SchemaContext with all discovered tables. - - Raises: - RuntimeError: If connection pool not initialized. - """ - if not self._pool: - raise RuntimeError("Connection pool not initialized. Call connect() first.") - - query = """ - SELECT table_schema, table_name, column_name, data_type - FROM information_schema.columns - WHERE table_schema NOT IN ('pg_catalog', 'information_schema') - ORDER BY table_schema, table_name, ordinal_position - """ - - async with self._pool.acquire() as conn: - rows = await conn.fetch(query) - - # Group by table - use dict[str, Any] for mixed value types - tables_dict: dict[str, dict[str, Any]] = {} - for row in rows: - full_name = f"{row['table_schema']}.{row['table_name']}" - - # Apply filter if provided - if table_pattern and table_pattern.lower() not in full_name.lower(): - continue - - if full_name not in tables_dict: - tables_dict[full_name] = { - "columns": [], - "column_types": {}, - } - tables_dict[full_name]["columns"].append(row["column_name"]) - tables_dict[full_name]["column_types"][row["column_name"]] = row["data_type"] - - # Convert to TableSchema objects - tables = tuple( - TableSchema( - table_name=name, - columns=tuple(data["columns"]), - column_types=dict(data["column_types"]), - ) - for name, data in tables_dict.items() - ) - - return SchemaContext(tables=tables) diff --git a/backend/src/dataing/adapters/db/trino.py b/backend/src/dataing/adapters/db/trino.py deleted file mode 100644 index e70a4fac7..000000000 --- a/backend/src/dataing/adapters/db/trino.py +++ /dev/null @@ -1,183 +0,0 @@ -"""Trino implementation of DatabaseAdapter.""" - -from __future__ import annotations - -import asyncio -from concurrent.futures import ThreadPoolExecutor -from typing import Any - -from trino.dbapi import connect - -from dataing.core.domain_types import QueryResult, SchemaContext, TableSchema - - -class TrinoAdapter: - """Trino implementation of DatabaseAdapter. - - Trino's Python client is synchronous, so we wrap - calls in an executor for async compatibility. - - Attributes: - host: Trino server host. - port: Trino server port. - catalog: Trino catalog to use. - schema: Trino schema to use. - """ - - def __init__( - self, - host: str, - port: int, - catalog: str, - schema: str, - user: str = "dataing", - ) -> None: - """Initialize the Trino adapter. - - Args: - host: Trino server host. - port: Trino server port. - catalog: Trino catalog to use. - schema: Trino schema to use. - user: User for authentication. - """ - self.host = host - self.port = port - self.catalog = catalog - self.schema = schema - self.user = user - self._executor = ThreadPoolExecutor(max_workers=4) - - async def connect(self) -> None: - """Initialize connection (no-op for Trino as connections are per-query).""" - pass - - async def close(self) -> None: - """Cleanup executor.""" - self._executor.shutdown(wait=True) - - async def execute_query(self, sql: str, timeout_seconds: int = 30) -> QueryResult: - """Execute a read-only SQL query. - - Args: - sql: The SQL query to execute. - timeout_seconds: Maximum time to wait for query completion. - - Returns: - QueryResult with columns, rows, and row count. - - Raises: - asyncio.TimeoutError: If query exceeds timeout. - """ - loop = asyncio.get_event_loop() - return await asyncio.wait_for( - loop.run_in_executor(self._executor, self._execute_sync, sql), - timeout=timeout_seconds, - ) - - def _execute_sync(self, sql: str) -> QueryResult: - """Execute query synchronously. - - Args: - sql: The SQL query to execute. - - Returns: - QueryResult with columns, rows, and row count. - """ - conn = connect( - host=self.host, - port=self.port, - catalog=self.catalog, - schema=self.schema, - user=self.user, - ) - try: - cursor = conn.cursor() - cursor.execute(sql) - rows = cursor.fetchall() - columns = tuple(desc[0] for desc in cursor.description) if cursor.description else () - - result_rows = tuple(dict(zip(columns, row, strict=False)) for row in rows) - - return QueryResult( - columns=columns, - rows=result_rows, - row_count=len(rows), - ) - finally: - conn.close() - - async def get_schema(self, table_pattern: str | None = None) -> SchemaContext: - """Discover available tables and columns. - - Args: - table_pattern: Optional pattern to filter tables. - - Returns: - SchemaContext with all discovered tables. - """ - query = f""" - SELECT table_schema, table_name, column_name, data_type - FROM {self.catalog}.information_schema.columns - WHERE table_schema = '{self.schema}' - ORDER BY table_name, ordinal_position - """ - - loop = asyncio.get_event_loop() - rows: list[dict[str, Any]] = await loop.run_in_executor( - self._executor, self._fetch_schema_sync, query - ) - - # Group by table - use TypedDict-like structure - tables_dict: dict[str, dict[str, Any]] = {} - for row in rows: - full_name = f"{row['table_schema']}.{row['table_name']}" - - # Apply filter if provided - if table_pattern and table_pattern.lower() not in full_name.lower(): - continue - - if full_name not in tables_dict: - tables_dict[full_name] = { - "columns": [], - "column_types": {}, - } - tables_dict[full_name]["columns"].append(row["column_name"]) - tables_dict[full_name]["column_types"][row["column_name"]] = row["data_type"] - - # Convert to TableSchema objects - tables = tuple( - TableSchema( - table_name=name, - columns=tuple(data["columns"]), - column_types=dict(data["column_types"]), - ) - for name, data in tables_dict.items() - ) - - return SchemaContext(tables=tables) - - def _fetch_schema_sync(self, query: str) -> list[dict[str, Any]]: - """Fetch schema information synchronously. - - Args: - query: Schema query to execute. - - Returns: - List of row dictionaries. - """ - conn = connect( - host=self.host, - port=self.port, - catalog=self.catalog, - schema=self.schema, - user=self.user, - ) - try: - cursor = conn.cursor() - cursor.execute(query) - rows = cursor.fetchall() - columns = [desc[0] for desc in cursor.description] if cursor.description else [] - return [dict(zip(columns, row, strict=False)) for row in rows] - finally: - conn.close() diff --git a/backend/src/dataing/adapters/llm/client.py b/backend/src/dataing/adapters/llm/client.py index 8b5a6d7ad..ab23e4dac 100644 --- a/backend/src/dataing/adapters/llm/client.py +++ b/backend/src/dataing/adapters/llm/client.py @@ -6,11 +6,12 @@ import json import re import uuid -from typing import TYPE_CHECKING, Any, cast +from typing import Any, cast import anthropic from anthropic.types import MessageParam +from dataing.adapters.datasource.types import QueryResult, SchemaResponse from dataing.core.domain_types import ( AnomalyAlert, Evidence, @@ -18,16 +19,11 @@ Hypothesis, HypothesisCategory, InvestigationContext, - QueryResult, - SchemaContext, ) from dataing.core.exceptions import LLMError from .prompt_manager import PromptManager -if TYPE_CHECKING: - pass - class AnthropicClient: """Anthropic Claude implementation of LLMClient. @@ -92,7 +88,7 @@ async def generate_hypotheses( async def generate_query( self, hypothesis: Hypothesis, - schema: SchemaContext, + schema: SchemaResponse, previous_error: str | None = None, ) -> str: """Generate SQL query to test a hypothesis. @@ -115,7 +111,7 @@ async def generate_query( template, hypothesis=hypothesis, schema_context=schema.to_prompt_string(), - available_tables=[t.table_name for t in schema.tables], + available_tables=schema.get_table_names(), previous_error=previous_error, previous_query=hypothesis.suggested_query if previous_error else None, error_message=previous_error, diff --git a/backend/src/dataing/adapters/notifications/email.py b/backend/src/dataing/adapters/notifications/email.py index dad6b3d69..ad123f82d 100644 --- a/backend/src/dataing/adapters/notifications/email.py +++ b/backend/src/dataing/adapters/notifications/email.py @@ -19,8 +19,8 @@ class EmailConfig: smtp_port: int = 587 smtp_user: str | None = None smtp_password: str | None = None - from_email: str = "datadr@example.com" - from_name: str = "DataDr" + from_email: str = "dataing@example.com" + from_name: str = "Dataing" use_tls: bool = True @@ -123,7 +123,7 @@ def send_investigation_completed(

- This email was sent by DataDr. Please do not reply to this email. + This email was sent by Dataing. Please do not reply to this email.

@@ -141,7 +141,7 @@ def send_investigation_completed( {summary} --- -This email was sent by DataDr. Please do not reply to this email. +This email was sent by Dataing. Please do not reply to this email. """ return self.send(to_emails, subject, body_html, body_text) @@ -179,7 +179,7 @@ def send_approval_required(

- This email was sent by DataDr. Please do not reply to this email. + This email was sent by Dataing. Please do not reply to this email.

@@ -195,7 +195,7 @@ def send_approval_required( Please review and approve at: {approval_url} --- -This email was sent by DataDr. Please do not reply to this email. +This email was sent by Dataing. Please do not reply to this email. """ return self.send(to_emails, subject, body_html, body_text) diff --git a/backend/src/dataing/core/__init__.py b/backend/src/dataing/core/__init__.py index af8ce7b2f..ad4280bbf 100644 --- a/backend/src/dataing/core/__init__.py +++ b/backend/src/dataing/core/__init__.py @@ -7,9 +7,7 @@ Hypothesis, HypothesisCategory, InvestigationContext, - QueryResult, - SchemaContext, - TableSchema, + LineageContext, ) from .exceptions import ( CircuitBreakerTripped, @@ -31,9 +29,7 @@ "Hypothesis", "HypothesisCategory", "InvestigationContext", - "QueryResult", - "SchemaContext", - "TableSchema", + "LineageContext", # Exceptions "DataingError", "SchemaDiscoveryError", diff --git a/backend/src/dataing/core/domain_types.py b/backend/src/dataing/core/domain_types.py index faf5560eb..ce47d2f9f 100644 --- a/backend/src/dataing/core/domain_types.py +++ b/backend/src/dataing/core/domain_types.py @@ -7,13 +7,16 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from enum import Enum -from typing import Any +from typing import TYPE_CHECKING, Any from pydantic import BaseModel, ConfigDict +if TYPE_CHECKING: + from dataing.adapters.datasource.types import SchemaResponse + class AnomalyAlert(BaseModel): """Input: The anomaly that triggered the investigation. @@ -119,71 +122,6 @@ class Finding(BaseModel): duration_seconds: float -@dataclass(frozen=True) -class TableSchema: - """Schema information for a single table. - - Attributes: - table_name: Fully qualified table name (schema.table). - columns: List of column names. - column_types: Mapping of column names to data types. - """ - - table_name: str - columns: tuple[str, ...] - column_types: dict[str, str] = field(default_factory=dict) - - -@dataclass(frozen=True) -class SchemaContext: - """Container for discovered database schema. - - Attributes: - tables: List of discovered tables with their schemas. - """ - - tables: tuple[TableSchema, ...] - - def get_table(self, name: str) -> TableSchema | None: - """Get table by name (case-insensitive). - - Args: - name: Table name to look up. - - Returns: - TableSchema if found, None otherwise. - """ - name_lower = name.lower() - for table in self.tables: - if table.table_name.lower() == name_lower: - return table - return None - - def to_prompt_string(self) -> str: - """Format schema for LLM prompt. - - Returns: - Formatted string representation of the schema. - """ - lines = ["AVAILABLE TABLES AND COLUMNS (USE ONLY THESE):"] - - for table in self.tables[:10]: - lines.append(f"\n{table.table_name}") - for col in table.columns[:15]: - col_type = table.column_types.get(col, "") - if col_type: - lines.append(f" - {col} ({col_type})") - else: - lines.append(f" - {col}") - if len(table.columns) > 15: - lines.append(f" ... and {len(table.columns) - 15} more columns") - - lines.append("\nCRITICAL: Use ONLY the tables and columns listed above.") - lines.append("DO NOT invent tables or columns.") - - return "\n".join(lines) - - @dataclass(frozen=True) class LineageContext: """Upstream and downstream dependencies for a dataset. @@ -224,54 +162,14 @@ class InvestigationContext: """Combined context for an investigation. Attributes: - schema: Database schema context. + schema: Database schema from the unified datasource layer. lineage: Optional lineage context. """ - schema: SchemaContext + schema: SchemaResponse lineage: LineageContext | None = None -@dataclass(frozen=True) -class QueryResult: - """Result of executing a SQL query. - - Attributes: - columns: List of column names in the result. - rows: List of row dictionaries. - row_count: Total number of rows returned. - """ - - columns: tuple[str, ...] - rows: tuple[dict[str, str | int | float | bool | None], ...] - row_count: int - - def to_summary(self, max_rows: int = 5) -> str: - """Create a summary of the query results. - - Args: - max_rows: Maximum number of rows to include. - - Returns: - Formatted summary string. - """ - if not self.rows: - return "No rows returned" - - lines = [f"Columns: {', '.join(self.columns)}"] - lines.append(f"Total rows: {self.row_count}") - lines.append("\nSample rows:") - - for row in self.rows[:max_rows]: - row_str = ", ".join(f"{k}={v}" for k, v in row.items()) - lines.append(f" {row_str}") - - if self.row_count > max_rows: - lines.append(f" ... and {self.row_count - max_rows} more rows") - - return "\n".join(lines) - - class ApprovalRequestType(str, Enum): """Types of approval requests.""" diff --git a/backend/src/dataing/core/interfaces.py b/backend/src/dataing/core/interfaces.py index 52d6ac0c8..7f325e015 100644 --- a/backend/src/dataing/core/interfaces.py +++ b/backend/src/dataing/core/interfaces.py @@ -12,14 +12,14 @@ from typing import TYPE_CHECKING, Protocol, runtime_checkable if TYPE_CHECKING: + from dataing.adapters.datasource.types import QueryResult, SchemaResponse + from .domain_types import ( AnomalyAlert, Evidence, Finding, Hypothesis, InvestigationContext, - QueryResult, - SchemaContext, ) @@ -50,14 +50,14 @@ async def execute_query(self, sql: str, timeout_seconds: int = 30) -> QueryResul """ ... - async def get_schema(self, table_pattern: str | None = None) -> SchemaContext: + async def get_schema(self, table_pattern: str | None = None) -> SchemaResponse: """Discover available tables and columns. Args: table_pattern: Optional pattern to filter tables. Returns: - SchemaContext with all discovered tables. + SchemaResponse with all discovered tables. """ ... @@ -99,7 +99,7 @@ async def generate_hypotheses( async def generate_query( self, hypothesis: Hypothesis, - schema: SchemaContext, + schema: SchemaResponse, previous_error: str | None = None, ) -> str: """Generate SQL query to test a hypothesis. diff --git a/backend/src/dataing/core/orchestrator.py b/backend/src/dataing/core/orchestrator.py index b73ead345..be116ef8c 100644 --- a/backend/src/dataing/core/orchestrator.py +++ b/backend/src/dataing/core/orchestrator.py @@ -131,7 +131,7 @@ async def run_investigation( state = await self._gather_context(state) if state.schema_context is None: raise SchemaDiscoveryError("Schema context is None after gathering") - log.info("Context gathered", tables_found=len(state.schema_context.tables)) + log.info("Context gathered", tables_found=state.schema_context.table_count()) # 2. Generate Hypotheses state, hypotheses = await self._generate_hypotheses(state) @@ -210,7 +210,7 @@ async def _gather_context(self, state: InvestigationState) -> InvestigationState raise SchemaDiscoveryError(f"Context gathering failed: {e}") from e # FAIL FAST: Empty schema means DB connectivity issue or permissions problem - if not context.schema.tables: + if context.schema.is_empty(): state = state.append_event( Event( type="schema_discovery_failed", @@ -233,7 +233,7 @@ async def _gather_context(self, state: InvestigationState) -> InvestigationState type="context_gathered", timestamp=datetime.now(UTC), data={ - "tables_found": len(context.schema.tables), + "tables_found": context.schema.table_count(), "has_lineage": context.lineage is not None, }, ) diff --git a/backend/src/dataing/core/state.py b/backend/src/dataing/core/state.py index 6d98c9364..bf0b470eb 100644 --- a/backend/src/dataing/core/state.py +++ b/backend/src/dataing/core/state.py @@ -17,7 +17,9 @@ from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: - from .domain_types import AnomalyAlert, LineageContext, SchemaContext + from dataing.adapters.datasource.types import SchemaResponse + + from .domain_types import AnomalyAlert, LineageContext EventType = Literal[ @@ -75,7 +77,7 @@ class InvestigationState: id: str alert: AnomalyAlert events: list[Event] = field(default_factory=list) - schema_context: SchemaContext | None = None + schema_context: SchemaResponse | None = None lineage_context: LineageContext | None = None @property @@ -198,7 +200,7 @@ def append_event(self, event: Event) -> InvestigationState: def with_context( self, - schema_context: SchemaContext | None = None, + schema_context: SchemaResponse | None = None, lineage_context: LineageContext | None = None, ) -> InvestigationState: """Return new state with updated context. diff --git a/backend/src/dataing/demo/__init__.py b/backend/src/dataing/demo/__init__.py index b5c852568..8e838c31f 100644 --- a/backend/src/dataing/demo/__init__.py +++ b/backend/src/dataing/demo/__init__.py @@ -1,4 +1,4 @@ -"""Demo module for DataDr demo mode.""" +"""Demo module for Dataing demo mode.""" from .seed import seed_demo_data diff --git a/backend/src/dataing/demo/seed.py b/backend/src/dataing/demo/seed.py index c57b541fc..c5ac3c332 100644 --- a/backend/src/dataing/demo/seed.py +++ b/backend/src/dataing/demo/seed.py @@ -96,8 +96,9 @@ async def seed_demo_data(session: AsyncSession) -> None: fixture_path = get_fixture_path() encryption_key = get_encryption_key() - # For DuckDB, the config just needs the path + # For DuckDB directory mode, specify source_type and path connection_config = { + "source_type": "directory", "path": fixture_path, "read_only": True, } @@ -157,7 +158,7 @@ async def main() -> None: """Run demo seeding with a temporary database session.""" # Get database URL from env db_url = os.getenv( - "DATADR_DB_URL", "postgresql+asyncpg://datadr:datadr@localhost:5432/datadr_demo" + "DATADR_DB_URL", "postgresql+asyncpg://dataing:dataing@localhost:5432/dataing_demo" ) engine = create_async_engine(db_url) diff --git a/backend/src/dataing/entrypoints/api/deps.py b/backend/src/dataing/entrypoints/api/deps.py index 051e65ff7..86032442c 100644 --- a/backend/src/dataing/entrypoints/api/deps.py +++ b/backend/src/dataing/entrypoints/api/deps.py @@ -2,17 +2,20 @@ from __future__ import annotations +import json import logging import os from collections.abc import AsyncIterator from contextlib import asynccontextmanager from typing import TYPE_CHECKING, Any +from uuid import UUID +from cryptography.fernet import Fernet from fastapi import Request -from dataing.adapters.context import ContextEngine, DatabaseContext +from dataing.adapters.context import ContextEngine +from dataing.adapters.datasource import BaseAdapter, get_registry from dataing.adapters.db.app_db import AppDatabase -from dataing.adapters.db.postgres import PostgresAdapter from dataing.adapters.llm.client import AnthropicClient from dataing.core.orchestrator import InvestigationOrchestrator, OrchestratorConfig from dataing.safety.circuit_breaker import CircuitBreaker, CircuitBreakerConfig @@ -51,10 +54,6 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: - LLM client initialization - Orchestrator configuration """ - # Setup data warehouse adapter - db = PostgresAdapter(settings.database_url) - await db.connect() - # Setup application database app_db = AppDatabase(settings.app_database_url) await app_db.connect() @@ -64,10 +63,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: model=settings.llm_model, ) - # Create database context for resolving tenant data sources - database_context = DatabaseContext(app_db) - - # Create context engine (no longer needs db passed directly) + # Create context engine context_engine = ContextEngine() circuit_breaker = CircuitBreaker( @@ -78,8 +74,10 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: ) ) + # Note: Orchestrator now receives adapters per-request instead of at startup + # The db parameter is now optional and will be resolved per-tenant orchestrator = InvestigationOrchestrator( - db=db, # Fallback adapter + db=None, # Will be set per-request based on tenant's data source llm=llm, context_engine=context_engine, circuit_breaker=circuit_breaker, @@ -87,26 +85,47 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: ) # Store in app state - app.state.db = db app.state.app_db = app_db app.state.llm = llm - app.state.database_context = database_context app.state.context_engine = context_engine app.state.circuit_breaker = circuit_breaker app.state.orchestrator = orchestrator + # Check DATADR_ENCRYPTION_KEY first (used by demo), then ENCRYPTION_KEY + app.state.encryption_key = os.getenv("DATADR_ENCRYPTION_KEY") or os.getenv("ENCRYPTION_KEY") + + # Cache for active adapters (tenant_id:datasource_id -> adapter) + adapter_cache: dict[str, BaseAdapter] = {} + app.state.adapter_cache = adapter_cache + investigations_store: dict[str, dict[str, Any]] = {} app.state.investigations = investigations_store # Demo mode: seed demo data - if os.getenv("DATADR_DEMO_MODE", "").lower() == "true": - logger.info("Running in DEMO MODE - seeding demo data") + demo_mode = os.getenv("DATADR_DEMO_MODE", "").lower() + print(f"[DEBUG] DATADR_DEMO_MODE={demo_mode}", flush=True) + enc_key = app.state.encryption_key + enc_preview = enc_key[:15] if enc_key else "None" + print(f"[DEBUG] Initial encryption_key: {enc_preview}...", flush=True) + if demo_mode == "true": + print("[DEBUG] Running in DEMO MODE - seeding demo data", flush=True) await _seed_demo_data(app_db) + # Re-read encryption key in case _seed_demo_data generated one + app.state.encryption_key = os.getenv("DATADR_ENCRYPTION_KEY") or os.getenv("ENCRYPTION_KEY") + + enc_key = app.state.encryption_key + enc_preview = enc_key[:15] if enc_key else "None" + print(f"[DEBUG] Final encryption_key prefix: {enc_preview}...", flush=True) yield - # Teardown - await database_context.close_all() # Close cached adapters - await db.close() + # Teardown - close all cached adapters + for cache_key, adapter in app.state.adapter_cache.items(): + try: + await adapter.disconnect() + logger.debug(f"adapter_closed: {cache_key}") + except Exception as e: + logger.warning(f"adapter_close_failed: {cache_key}, error={e}") + await app_db.close() @@ -169,12 +188,14 @@ async def _seed_demo_data(app_db: AppDatabase) -> None: # Create demo data source (DuckDB pointing to fixtures) fixture_path = os.getenv("DATADR_FIXTURE_PATH", "./demo/fixtures/null_spike") - encryption_key = os.getenv("ENCRYPTION_KEY") + # Check DATADR_ENCRYPTION_KEY first (used by demo), then ENCRYPTION_KEY + encryption_key = os.getenv("DATADR_ENCRYPTION_KEY") or os.getenv("ENCRYPTION_KEY") if not encryption_key: encryption_key = Fernet.generate_key().decode() - os.environ["ENCRYPTION_KEY"] = encryption_key + os.environ["DATADR_ENCRYPTION_KEY"] = encryption_key connection_config = { + "source_type": "directory", "path": fixture_path, "read_only": True, } @@ -213,18 +234,6 @@ def get_orchestrator(request: Request) -> InvestigationOrchestrator: return request.app.state.orchestrator -def get_db(request: Request) -> PostgresAdapter: - """Get the database adapter from app state. - - Args: - request: The current request. - - Returns: - The configured PostgresAdapter. - """ - return request.app.state.db - - def get_investigations(request: Request) -> dict[str, dict[str, Any]]: """Get the investigations store from app state. @@ -250,16 +259,111 @@ def get_app_db(request: Request) -> AppDatabase: return request.app.state.app_db -def get_database_context(request: Request) -> DatabaseContext: - """Get the database context from app state. +async def get_tenant_adapter( + request: Request, + tenant_id: UUID, + data_source_id: UUID | None = None, +) -> BaseAdapter: + """Get or create a data source adapter for a tenant. + + This function replaces DatabaseContext, using the AdapterRegistry + pattern instead. It caches adapters for reuse within the app lifecycle. + + Args: + request: The current request (for accessing app state). + tenant_id: The tenant's UUID. + data_source_id: Optional specific data source ID. If not provided, + uses the tenant's default data source. + + Returns: + A connected BaseAdapter for the data source. + + Raises: + ValueError: If data source not found or type not supported. + RuntimeError: If decryption or connection fails. + """ + app_db: AppDatabase = request.app.state.app_db + adapter_cache: dict[str, BaseAdapter] = request.app.state.adapter_cache + encryption_key: str | None = request.app.state.encryption_key + + # Get data source configuration + if data_source_id: + ds = await app_db.get_data_source(data_source_id, tenant_id) + if not ds: + raise ValueError(f"Data source {data_source_id} not found for tenant {tenant_id}") + else: + # Get default data source + data_sources = await app_db.list_data_sources(tenant_id) + active_sources = [d for d in data_sources if d.get("is_active", True)] + if not active_sources: + raise ValueError(f"No active data sources found for tenant {tenant_id}") + ds = active_sources[0] + data_source_id = ds["id"] + + # Check cache + cache_key = f"{tenant_id}:{data_source_id}" + if cache_key in adapter_cache: + logger.debug(f"adapter_cache_hit: {cache_key}") + return adapter_cache[cache_key] + + # Decrypt connection config + if not encryption_key: + raise RuntimeError( + "ENCRYPTION_KEY not set - check DATADR_ENCRYPTION_KEY or ENCRYPTION_KEY env vars" + ) + + encrypted_config = ds.get("connection_config_encrypted", "") + key_preview = encryption_key[:10] if encryption_key else "None" + print(f"[DECRYPT DEBUG] encryption_key type: {type(encryption_key)}", flush=True) + print(f"[DECRYPT DEBUG] encryption_key full: {encryption_key}", flush=True) + print( + f"[DECRYPT DEBUG] encryption_key length: {len(encryption_key) if encryption_key else 0}", + flush=True, + ) + print(f"[DECRYPT DEBUG] encrypted_config length: {len(encrypted_config)}", flush=True) + print(f"[DECRYPT DEBUG] encrypted_config start: {encrypted_config[:50]}", flush=True) + try: + f = Fernet(encryption_key.encode()) + decrypted = f.decrypt(encrypted_config.encode()).decode() + config: dict[str, Any] = json.loads(decrypted) + print(f"[DECRYPT DEBUG] SUCCESS: {decrypted}", flush=True) + except Exception as e: + print(f"[DECRYPT DEBUG] FAILED: {e}", flush=True) + import traceback + + traceback.print_exc() + raise RuntimeError( + f"Failed to decrypt connection config (key_prefix={key_preview}): {e}" + ) from e + + # Create adapter using registry + registry = get_registry() + ds_type = ds["type"] + + try: + adapter = registry.create(ds_type, config) + await adapter.connect() + except Exception as e: + raise RuntimeError(f"Failed to create/connect adapter for {ds_type}: {e}") from e + + # Cache for reuse + adapter_cache[cache_key] = adapter + logger.info(f"adapter_created: type={ds_type}, name={ds.get('name')}, key={cache_key}") + + return adapter + + +async def get_default_tenant_adapter(request: Request, tenant_id: UUID) -> BaseAdapter: + """Get the default data source adapter for a tenant. - The database context resolves tenant data source adapters - for running investigations against tenant data. + Convenience wrapper around get_tenant_adapter that uses the default + data source. Args: request: The current request. + tenant_id: The tenant's UUID. Returns: - The configured DatabaseContext. + A connected BaseAdapter for the tenant's default data source. """ - return request.app.state.database_context + return await get_tenant_adapter(request, tenant_id) diff --git a/backend/src/dataing/entrypoints/api/routes/__init__.py b/backend/src/dataing/entrypoints/api/routes/__init__.py index c768dcb7c..f841cbbd7 100644 --- a/backend/src/dataing/entrypoints/api/routes/__init__.py +++ b/backend/src/dataing/entrypoints/api/routes/__init__.py @@ -5,6 +5,7 @@ from dataing.entrypoints.api.routes.approvals import router as approvals_router from dataing.entrypoints.api.routes.dashboard import router as dashboard_router from dataing.entrypoints.api.routes.datasources import router as datasources_router +from dataing.entrypoints.api.routes.datasources import router as datasources_v2_router from dataing.entrypoints.api.routes.investigations import router as investigations_router from dataing.entrypoints.api.routes.settings import router as settings_router from dataing.entrypoints.api.routes.users import router as users_router @@ -15,6 +16,7 @@ # Include all route modules api_router.include_router(investigations_router) api_router.include_router(datasources_router) +api_router.include_router(datasources_v2_router, prefix="/v2") # New unified adapter API api_router.include_router(approvals_router) api_router.include_router(settings_router) api_router.include_router(users_router) diff --git a/backend/src/dataing/entrypoints/api/routes/datasources.py b/backend/src/dataing/entrypoints/api/routes/datasources.py index 0bc8c0ee1..dfb5ab155 100644 --- a/backend/src/dataing/entrypoints/api/routes/datasources.py +++ b/backend/src/dataing/entrypoints/api/routes/datasources.py @@ -1,7 +1,12 @@ -"""Data source management routes.""" +"""Data source management routes using the new unified adapter architecture. + +This module provides API endpoints for managing data sources using the +pluggable adapter architecture defined in the data_context specification. +""" from __future__ import annotations +import json import os from datetime import datetime from typing import Annotated, Any @@ -11,13 +16,18 @@ from fastapi import APIRouter, Depends, HTTPException, Response from pydantic import BaseModel, Field +from dataing.adapters.datasource import ( + SchemaFilter, + SourceType, + get_registry, +) from dataing.adapters.db.app_db import AppDatabase -from dataing.adapters.db.duckdb import DuckDBAdapter -from dataing.adapters.db.postgres import PostgresAdapter -from dataing.adapters.db.trino import TrinoAdapter from dataing.entrypoints.api.deps import get_app_db -from dataing.entrypoints.api.middleware.auth import ApiKeyContext, require_scope, verify_api_key -from dataing.models.data_source import DataSourceType +from dataing.entrypoints.api.middleware.auth import ( + ApiKeyContext, + require_scope, + verify_api_key, +) router = APIRouter(prefix="/datasources", tags=["datasources"]) @@ -30,37 +40,24 @@ def get_encryption_key() -> bytes: """Get the encryption key for data source configs. - Returns: - Encryption key as bytes. - - Raises: - RuntimeError: If ENCRYPTION_KEY is not set. + Checks DATADR_ENCRYPTION_KEY first (used by demo), then ENCRYPTION_KEY. """ - key = os.getenv("ENCRYPTION_KEY") + key = os.getenv("DATADR_ENCRYPTION_KEY") or os.getenv("ENCRYPTION_KEY") if not key: - # For development, use a default key (NOT FOR PRODUCTION) key = Fernet.generate_key().decode() os.environ["ENCRYPTION_KEY"] = key return key.encode() if isinstance(key, str) else key -class ConnectionConfig(BaseModel): - """Database connection configuration.""" - - host: str - port: int = 5432 - database: str - username: str - password: str - ssl_mode: str = "prefer" +# Request/Response Models class CreateDataSourceRequest(BaseModel): """Request to create a new data source.""" name: str = Field(..., min_length=1, max_length=100) - type: DataSourceType - connection_config: ConnectionConfig + type: str = Field(..., description="Source type (e.g., 'postgresql', 'mongodb')") + config: dict[str, Any] = Field(..., description="Configuration for the adapter") is_default: bool = False @@ -68,7 +65,7 @@ class UpdateDataSourceRequest(BaseModel): """Request to update a data source.""" name: str | None = Field(None, min_length=1, max_length=100) - connection_config: ConnectionConfig | None = None + config: dict[str, Any] | None = None is_default: bool | None = None @@ -78,10 +75,11 @@ class DataSourceResponse(BaseModel): id: str name: str type: str + category: str is_default: bool is_active: bool + status: str last_health_check_at: datetime | None = None - last_health_check_status: str | None = None created_at: datetime @@ -92,97 +90,176 @@ class DataSourceListResponse(BaseModel): total: int +class TestConnectionRequest(BaseModel): + """Request to test a connection.""" + + type: str + config: dict[str, Any] + + class TestConnectionResponse(BaseModel): """Response for testing a connection.""" success: bool message: str - tables_found: int | None = None + latency_ms: int | None = None + server_version: str | None = None + + +class SourceTypeResponse(BaseModel): + """Response for a source type definition.""" + + type: str + display_name: str + category: str + icon: str + description: str + capabilities: dict[str, Any] + config_schema: dict[str, Any] + + +class SourceTypesResponse(BaseModel): + """Response for listing source types.""" + types: list[SourceTypeResponse] -class SchemaResponse(BaseModel): + +class SchemaTableResponse(BaseModel): + """Response for a table in the schema.""" + + name: str + table_type: str + native_type: str + native_path: str + columns: list[dict[str, Any]] + row_count: int | None = None + size_bytes: int | None = None + + +class SchemaResponseModel(BaseModel): """Response for schema discovery.""" - tables: list[dict[str, Any]] + source_id: str + source_type: str + source_category: str + fetched_at: datetime + catalogs: list[dict[str, Any]] -def _build_connection_string(config: ConnectionConfig, ds_type: DataSourceType) -> str: - """Build a connection string from config.""" - if ds_type == DataSourceType.POSTGRES: - ssl_suffix = f"?sslmode={config.ssl_mode}" if config.ssl_mode else "" - return f"postgresql://{config.username}:{config.password}@{config.host}:{config.port}/{config.database}{ssl_suffix}" - else: - raise HTTPException( - status_code=400, - detail=f"Data source type '{ds_type}' is not yet supported for connection strings", - ) +class QueryRequest(BaseModel): + """Request to execute a query.""" + query: str + timeout_seconds: int = 30 -def _create_adapter( - config: ConnectionConfig, ds_type: DataSourceType -) -> PostgresAdapter | TrinoAdapter: - """Create a database adapter from config.""" - if ds_type == DataSourceType.POSTGRES: - connection_string = _build_connection_string(config, ds_type) - return PostgresAdapter(connection_string) - elif ds_type == DataSourceType.TRINO: - # Parse database as catalog.schema for Trino - parts = config.database.split(".") - if len(parts) == 2: - catalog, schema = parts - else: - catalog = config.database - schema = "default" - return TrinoAdapter( - host=config.host, - port=config.port, - catalog=catalog, - schema=schema, - user=config.username, - ) - else: - raise ValueError(f"Data source type '{ds_type}' is not yet supported") +class QueryResponse(BaseModel): + """Response for query execution.""" -async def _test_connection( - config: ConnectionConfig, ds_type: DataSourceType -) -> tuple[bool, str, int]: - """Test a database connection. + columns: list[dict[str, Any]] + rows: list[dict[str, Any]] + row_count: int + truncated: bool = False + execution_time_ms: int | None = None - Returns: - Tuple of (success, message, table_count) - """ - try: - if ds_type in (DataSourceType.POSTGRES, DataSourceType.TRINO): - adapter = _create_adapter(config, ds_type) - await adapter.connect() - try: - schema = await adapter.get_schema() - return True, "Connection successful", len(schema.tables) - finally: - await adapter.close() - else: - return False, f"Data source type '{ds_type}' is not yet supported", 0 - except Exception as e: - return False, f"Connection failed: {str(e)}", 0 +class StatsRequest(BaseModel): + """Request for column statistics.""" + + table: str + columns: list[str] + + +class StatsResponse(BaseModel): + """Response for column statistics.""" + + table: str + row_count: int | None = None + columns: dict[str, dict[str, Any]] -def _encrypt_config(config: ConnectionConfig, key: bytes) -> str: - """Encrypt connection configuration.""" - import json +def _encrypt_config(config: dict[str, Any], key: bytes) -> str: + """Encrypt configuration.""" f = Fernet(key) - encrypted = f.encrypt(json.dumps(config.model_dump()).encode()) + encrypted = f.encrypt(json.dumps(config).encode()) return encrypted.decode() -def _decrypt_config(encrypted: str, key: bytes) -> ConnectionConfig: - """Decrypt connection configuration.""" - import json - +def _decrypt_config(encrypted: str, key: bytes) -> dict[str, Any]: + """Decrypt configuration.""" f = Fernet(key) decrypted = f.decrypt(encrypted.encode()) - return ConnectionConfig(**json.loads(decrypted.decode())) + result: dict[str, Any] = json.loads(decrypted.decode()) + return result + + +@router.get("/types", response_model=SourceTypesResponse) +async def list_source_types() -> SourceTypesResponse: + """List all supported data source types. + + Returns the configuration schema for each type, which can be used + to dynamically generate connection forms in the frontend. + """ + registry = get_registry() + types_list = [] + + for type_def in registry.list_types(): + types_list.append( + SourceTypeResponse( + type=type_def.type.value, + display_name=type_def.display_name, + category=type_def.category.value, + icon=type_def.icon, + description=type_def.description, + capabilities=type_def.capabilities.model_dump(), + config_schema=type_def.config_schema.model_dump(), + ) + ) + + return SourceTypesResponse(types=types_list) + + +@router.post("/test", response_model=TestConnectionResponse) +async def test_connection( + request: TestConnectionRequest, +) -> TestConnectionResponse: + """Test a connection without saving it. + + Use this endpoint to validate connection settings before creating + a data source. + """ + registry = get_registry() + + try: + source_type = SourceType(request.type) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {request.type}", + ) from None + + if not registry.is_registered(source_type): + raise HTTPException( + status_code=400, + detail=f"Source type not available: {request.type}", + ) + + try: + adapter = registry.create(source_type, request.config) + async with adapter: + result = await adapter.test_connection() + + return TestConnectionResponse( + success=result.success, + message=result.message, + latency_ms=result.latency_ms, + server_version=result.server_version, + ) + except Exception as e: + return TestConnectionResponse( + success=False, + message=str(e), + ) @router.post("/", response_model=DataSourceResponse, status_code=201) @@ -191,40 +268,68 @@ async def create_datasource( auth: WriteScopeDep, app_db: AppDbDep, ) -> DataSourceResponse: - """Create a new data source connection. + """Create a new data source. Tests the connection before saving. Returns 400 if connection test fails. """ + registry = get_registry() + + try: + source_type = SourceType(request.type) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {request.type}", + ) from None + + if not registry.is_registered(source_type): + raise HTTPException( + status_code=400, + detail=f"Source type not available: {request.type}", + ) + # Test connection first - success, message, tables = await _test_connection(request.connection_config, request.type) - if not success: - raise HTTPException(status_code=400, detail=message) + try: + adapter = registry.create(source_type, request.config) + async with adapter: + result = await adapter.test_connection() + if not result.success: + raise HTTPException(status_code=400, detail=result.message) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=400, detail=f"Connection failed: {str(e)}") from e - # Encrypt connection config + # Get type definition for category + type_def = registry.get_definition(source_type) + category = type_def.category.value if type_def else "database" + + # Encrypt config encryption_key = get_encryption_key() - encrypted_config = _encrypt_config(request.connection_config, encryption_key) + encrypted_config = _encrypt_config(request.config, encryption_key) # Save to database - result = await app_db.create_data_source( + db_result = await app_db.create_data_source( tenant_id=auth.tenant_id, name=request.name, - type=request.type.value, + type=request.type, connection_config_encrypted=encrypted_config, is_default=request.is_default, ) # Update health check status - await app_db.update_data_source_health(result["id"], "healthy") + await app_db.update_data_source_health(db_result["id"], "healthy") return DataSourceResponse( - id=str(result["id"]), - name=result["name"], - type=result["type"], - is_default=result["is_default"], - is_active=result["is_active"], + id=str(db_result["id"]), + name=db_result["name"], + type=db_result["type"], + category=category, + is_default=db_result["is_default"], + is_active=db_result["is_active"], + status="connected", last_health_check_at=datetime.now(), - last_health_check_status="healthy", - created_at=result["created_at"], + created_at=db_result["created_at"], ) @@ -235,22 +340,43 @@ async def list_datasources( ) -> DataSourceListResponse: """List all data sources for the current tenant.""" data_sources = await app_db.list_data_sources(auth.tenant_id) + registry = get_registry() + + responses = [] + for ds in data_sources: + # Get category from registry + try: + source_type = SourceType(ds["type"]) + type_def = registry.get_definition(source_type) + category = type_def.category.value if type_def else "database" + except ValueError: + category = "database" + + status = ds.get("last_health_check_status", "unknown") + if status == "healthy": + status = "connected" + elif status == "unhealthy": + status = "error" + else: + status = "disconnected" - return DataSourceListResponse( - data_sources=[ + responses.append( DataSourceResponse( id=str(ds["id"]), name=ds["name"], type=ds["type"], + category=category, is_default=ds["is_default"], is_active=ds["is_active"], + status=status, last_health_check_at=ds.get("last_health_check_at"), - last_health_check_status=ds.get("last_health_check_status"), created_at=ds["created_at"], ) - for ds in data_sources - ], - total=len(data_sources), + ) + + return DataSourceListResponse( + data_sources=responses, + total=len(responses), ) @@ -266,14 +392,31 @@ async def get_datasource( if not ds: raise HTTPException(status_code=404, detail="Data source not found") + registry = get_registry() + try: + source_type = SourceType(ds["type"]) + type_def = registry.get_definition(source_type) + category = type_def.category.value if type_def else "database" + except ValueError: + category = "database" + + status = ds.get("last_health_check_status", "unknown") + if status == "healthy": + status = "connected" + elif status == "unhealthy": + status = "error" + else: + status = "disconnected" + return DataSourceResponse( id=str(ds["id"]), name=ds["name"], type=ds["type"], + category=category, is_default=ds["is_default"], is_active=ds["is_active"], + status=status, last_health_check_at=ds.get("last_health_check_at"), - last_health_check_status=ds.get("last_health_check_status"), created_at=ds["created_at"], ) @@ -294,123 +437,290 @@ async def delete_datasource( @router.post("/{datasource_id}/test", response_model=TestConnectionResponse) -async def test_datasource( +async def test_datasource_connection( datasource_id: UUID, auth: AuthDep, app_db: AppDbDep, ) -> TestConnectionResponse: - """Test data source connectivity.""" + """Test connectivity for an existing data source.""" ds = await app_db.get_data_source(datasource_id, auth.tenant_id) if not ds: raise HTTPException(status_code=404, detail="Data source not found") - # Decrypt connection config + registry = get_registry() + + try: + source_type = SourceType(ds["type"]) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {ds['type']}", + ) from None + + if not registry.is_registered(source_type): + raise HTTPException( + status_code=400, + detail=f"Source type not available: {ds['type']}", + ) + + # Decrypt config encryption_key = get_encryption_key() try: config = _decrypt_config(ds["connection_config_encrypted"], encryption_key) except Exception as e: return TestConnectionResponse( success=False, - message=f"Failed to decrypt connection config: {str(e)}", + message=f"Failed to decrypt configuration: {str(e)}", ) # Test connection - ds_type = DataSourceType(ds["type"]) - success, message, tables = await _test_connection(config, ds_type) + try: + adapter = registry.create(source_type, config) + async with adapter: + result = await adapter.test_connection() - # Update health check status - status = "healthy" if success else "unhealthy" - await app_db.update_data_source_health(datasource_id, status) + # Update health check status + status = "healthy" if result.success else "unhealthy" + await app_db.update_data_source_health(datasource_id, status) - return TestConnectionResponse( - success=success, - message=message, - tables_found=tables if success else None, - ) + return TestConnectionResponse( + success=result.success, + message=result.message, + latency_ms=result.latency_ms, + server_version=result.server_version, + ) + except Exception as e: + await app_db.update_data_source_health(datasource_id, "unhealthy") + return TestConnectionResponse( + success=False, + message=str(e), + ) -@router.get("/{datasource_id}/schema", response_model=SchemaResponse) -async def get_schema( +@router.get("/{datasource_id}/schema", response_model=SchemaResponseModel) +async def get_datasource_schema( datasource_id: UUID, auth: AuthDep, app_db: AppDbDep, table_pattern: str | None = None, -) -> SchemaResponse: - """Get schema from data source. - - Args: - datasource_id: The data source ID. - auth: Authentication context (injected). - app_db: Application database (injected). - table_pattern: Optional pattern to filter tables. + include_views: bool = True, + max_tables: int = 1000, +) -> SchemaResponseModel: + """Get schema from a data source. + + Returns unified schema with catalogs, schemas, and tables. """ ds = await app_db.get_data_source(datasource_id, auth.tenant_id) if not ds: raise HTTPException(status_code=404, detail="Data source not found") - ds_type = DataSourceType(ds["type"]) + registry = get_registry() + + try: + source_type = SourceType(ds["type"]) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {ds['type']}", + ) from None + + if not registry.is_registered(source_type): + raise HTTPException( + status_code=400, + detail=f"Source type not available: {ds['type']}", + ) + + # Decrypt config encryption_key = get_encryption_key() + try: + config = _decrypt_config(ds["connection_config_encrypted"], encryption_key) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to decrypt configuration: {str(e)}", + ) from e + + # Build filter + schema_filter = SchemaFilter( + table_pattern=table_pattern, + include_views=include_views, + max_tables=max_tables, + ) + # Get schema try: - if ds_type == DataSourceType.DUCKDB: - # DuckDB uses raw config dict, not ConnectionConfig - import json + adapter = registry.create(source_type, config) + async with adapter: + schema = await adapter.get_schema(schema_filter) + + return SchemaResponseModel( + source_id=str(datasource_id), + source_type=schema.source_type.value, + source_category=schema.source_category.value, + fetched_at=schema.fetched_at, + catalogs=[cat.model_dump() for cat in schema.catalogs], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to fetch schema: {str(e)}", + ) from e - raw_config = json.loads( - Fernet(encryption_key).decrypt(ds["connection_config_encrypted"].encode()).decode() - ) - adapter = DuckDBAdapter(raw_config["path"], raw_config.get("read_only", True)) - await adapter.connect() - try: - schema = await adapter.get_schema(table_pattern) - return SchemaResponse( - tables=[ - { - "table_name": t.table_name, - "columns": list(t.columns), - "column_types": t.column_types, - } - for t in schema.tables - ] - ) - finally: - await adapter.close() - elif ds_type in (DataSourceType.POSTGRES, DataSourceType.TRINO): - # Decrypt connection config for traditional databases - try: - config = _decrypt_config(ds["connection_config_encrypted"], encryption_key) - except Exception as e: + +@router.post("/{datasource_id}/query", response_model=QueryResponse) +async def execute_query( + datasource_id: UUID, + request: QueryRequest, + auth: AuthDep, + app_db: AppDbDep, +) -> QueryResponse: + """Execute a query against a data source. + + Only works for sources that support SQL or similar query languages. + """ + ds = await app_db.get_data_source(datasource_id, auth.tenant_id) + + if not ds: + raise HTTPException(status_code=404, detail="Data source not found") + + registry = get_registry() + + try: + source_type = SourceType(ds["type"]) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {ds['type']}", + ) from None + + type_def = registry.get_definition(source_type) + if not type_def or not type_def.capabilities.supports_sql: + raise HTTPException( + status_code=400, + detail=f"Source type {ds['type']} does not support SQL queries", + ) + + # Decrypt config + encryption_key = get_encryption_key() + try: + config = _decrypt_config(ds["connection_config_encrypted"], encryption_key) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to decrypt configuration: {str(e)}", + ) from e + + # Execute query + try: + adapter = registry.create(source_type, config) + async with adapter: + # Check if adapter has execute_query method + if not hasattr(adapter, "execute_query"): raise HTTPException( - status_code=500, - detail=f"Failed to decrypt connection config: {str(e)}", - ) from e - adapter = _create_adapter(config, ds_type) - await adapter.connect() - try: - schema = await adapter.get_schema(table_pattern) - return SchemaResponse( - tables=[ - { - "table_name": t.table_name, - "columns": list(t.columns), - "column_types": t.column_types, - } - for t in schema.tables - ] + status_code=400, + detail=f"Source type {ds['type']} does not support query execution", ) - finally: - await adapter.close() - else: - raise HTTPException( - status_code=400, - detail=f"Schema discovery not supported for '{ds_type}'", + result = await adapter.execute_query( + request.query, + timeout_seconds=request.timeout_seconds, ) + + return QueryResponse( + columns=result.columns, + rows=result.rows, + row_count=result.row_count, + truncated=result.truncated, + execution_time_ms=result.execution_time_ms, + ) except HTTPException: raise except Exception as e: raise HTTPException( status_code=500, - detail=f"Failed to fetch schema: {str(e)}", + detail=f"Query execution failed: {str(e)}", + ) from e + + +@router.post("/{datasource_id}/stats", response_model=StatsResponse) +async def get_column_stats( + datasource_id: UUID, + request: StatsRequest, + auth: AuthDep, + app_db: AppDbDep, +) -> StatsResponse: + """Get statistics for columns in a table. + + Only works for sources that support column statistics. + """ + ds = await app_db.get_data_source(datasource_id, auth.tenant_id) + + if not ds: + raise HTTPException(status_code=404, detail="Data source not found") + + registry = get_registry() + + try: + source_type = SourceType(ds["type"]) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Unsupported source type: {ds['type']}", + ) from None + + type_def = registry.get_definition(source_type) + if not type_def or not type_def.capabilities.supports_column_stats: + raise HTTPException( + status_code=400, + detail=f"Source type {ds['type']} does not support column statistics", + ) + + # Decrypt config + encryption_key = get_encryption_key() + try: + config = _decrypt_config(ds["connection_config_encrypted"], encryption_key) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to decrypt configuration: {str(e)}", + ) from e + + # Get stats + try: + adapter = registry.create(source_type, config) + async with adapter: + # Check if adapter has get_column_stats method + if not hasattr(adapter, "get_column_stats"): + raise HTTPException( + status_code=400, + detail=f"Source type {ds['type']} does not support column statistics", + ) + + # Parse table name + parts = request.table.split(".") + if len(parts) == 2: + schema, table = parts + else: + schema = None + table = request.table + + stats = await adapter.get_column_stats(table, request.columns, schema) + + # Try to get row count + row_count = None + if hasattr(adapter, "count_rows"): + row_count = await adapter.count_rows(table, schema) + + return StatsResponse( + table=request.table, + row_count=row_count, + columns=stats, + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to get column statistics: {str(e)}", ) from e diff --git a/backend/src/dataing/entrypoints/api/routes/investigations.py b/backend/src/dataing/entrypoints/api/routes/investigations.py index e3029dcea..0278a2fb3 100644 --- a/backend/src/dataing/entrypoints/api/routes/investigations.py +++ b/backend/src/dataing/entrypoints/api/routes/investigations.py @@ -9,15 +9,18 @@ from typing import Annotated, Any import structlog -from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel -from dataing.adapters.context import DatabaseContext from dataing.core.domain_types import AnomalyAlert from dataing.core.orchestrator import InvestigationOrchestrator from dataing.core.state import InvestigationState -from dataing.entrypoints.api.deps import get_database_context, get_investigations, get_orchestrator +from dataing.entrypoints.api.deps import ( + get_default_tenant_adapter, + get_investigations, + get_orchestrator, +) from dataing.entrypoints.api.middleware.auth import ApiKeyContext, verify_api_key router = APIRouter(prefix="/investigations", tags=["investigations"]) @@ -27,7 +30,6 @@ # Annotated types for dependency injection AuthDep = Annotated[ApiKeyContext, Depends(verify_api_key)] OrchestratorDep = Annotated[InvestigationOrchestrator, Depends(get_orchestrator)] -DatabaseContextDep = Annotated[DatabaseContext, Depends(get_database_context)] InvestigationsDep = Annotated[dict[str, dict[str, Any]], Depends(get_investigations)] @@ -59,15 +61,16 @@ class InvestigationStatusResponse(BaseModel): status: str events: list[dict[str, Any]] finding: dict[str, Any] | None = None + error: str | None = None @router.post("/", response_model=InvestigationResponse) async def create_investigation( + http_request: Request, request: CreateInvestigationRequest, background_tasks: BackgroundTasks, auth: AuthDep, orchestrator: OrchestratorDep, - database_context: DatabaseContextDep, investigations: InvestigationsDep, ) -> InvestigationResponse: """Start a new investigation. @@ -105,8 +108,8 @@ async def create_investigation( # Run investigation in background with tenant's data source async def run_investigation() -> None: try: - # Resolve tenant's data source adapter - data_adapter = await database_context.get_default_adapter(auth.tenant_id) + # Resolve tenant's data source adapter using AdapterRegistry + data_adapter = await get_default_tenant_adapter(http_request, auth.tenant_id) # Run investigation against tenant's actual data finding = await orchestrator.run_investigation(state, data_adapter) @@ -158,6 +161,7 @@ async def get_investigation( for e in state.events ], finding=inv.get("finding"), + error=inv.get("error"), ) diff --git a/backend/src/dataing/entrypoints/mcp/server.py b/backend/src/dataing/entrypoints/mcp/server.py index 12b272952..ae675ba16 100644 --- a/backend/src/dataing/entrypoints/mcp/server.py +++ b/backend/src/dataing/entrypoints/mcp/server.py @@ -19,7 +19,7 @@ from mcp.types import TextContent, Tool from dataing.adapters.context.engine import DefaultContextEngine -from dataing.adapters.db.postgres import PostgresAdapter +from dataing.adapters.datasource import BaseAdapter, get_registry from dataing.adapters.llm.client import AnthropicClient from dataing.core.domain_types import AnomalyAlert from dataing.core.orchestrator import InvestigationOrchestrator, OrchestratorConfig @@ -29,7 +29,7 @@ def create_server( - db: PostgresAdapter, + db: BaseAdapter, llm: AnthropicClient, ) -> Server: """Create and configure the MCP server. @@ -43,7 +43,7 @@ def create_server( """ server = Server("dataing") - context_engine = DefaultContextEngine(db=db) + context_engine = DefaultContextEngine() circuit_breaker = CircuitBreaker(CircuitBreakerConfig()) orchestrator = InvestigationOrchestrator( @@ -196,7 +196,7 @@ async def _investigate_anomaly( async def _query_dataset( - db: PostgresAdapter, + db: BaseAdapter, args: dict[str, Any], ) -> list[TextContent]: """Execute a read-only query. @@ -214,7 +214,7 @@ async def _query_dataset( # Validate query for safety validate_query(sql) - result = await db.execute_query(sql) + result = await db.execute(sql) # Format results if not result.rows: @@ -238,7 +238,7 @@ async def _query_dataset( async def _get_table_schema( - db: PostgresAdapter, + db: BaseAdapter, args: dict[str, Any], ) -> list[TextContent]: """Get schema for a table. @@ -251,19 +251,34 @@ async def _get_table_schema( List of TextContent with schema information. """ table_name = args["table_name"] + table_name_lower = table_name.lower() try: - schema = await db.get_schema(table_pattern=table_name) - table = schema.get_table(table_name) - - if not table: + schema = await db.get_schema() + + # Find the table in the nested structure + found_table = None + for catalog in schema.catalogs: + for db_schema in catalog.schemas: + for table in db_schema.tables: + if ( + table.native_path.lower() == table_name_lower + or table.name.lower() == table_name_lower + ): + found_table = table + break + if found_table: + break + if found_table: + break + + if not found_table: return [TextContent(type="text", text=f"Table not found: {table_name}")] - lines = [f"Table: {table.table_name}", ""] + lines = [f"Table: {found_table.native_path}", ""] lines.append("Columns:") - for col in table.columns: - col_type = table.column_types.get(col, "unknown") - lines.append(f" - {col}: {col_type}") + for col in found_table.columns: + lines.append(f" - {col.name}: {col.data_type.value}") return [TextContent(type="text", text="\n".join(lines))] @@ -278,7 +293,8 @@ async def run_server(database_url: str, anthropic_api_key: str) -> None: database_url: PostgreSQL connection URL. anthropic_api_key: Anthropic API key. """ - db = PostgresAdapter(database_url) + registry = get_registry() + db = registry.create("postgres", {"dsn": database_url}) await db.connect() llm = AnthropicClient(api_key=anthropic_api_key) @@ -288,4 +304,4 @@ async def run_server(database_url: str, anthropic_api_key: str) -> None: async with stdio_server() as (read_stream, write_stream): await server.run(read_stream, write_stream, server.create_initialization_options()) - await db.close() + await db.disconnect() diff --git a/dashboard/src/app/(auth)/callback/page.tsx b/dashboard/src/app/(auth)/callback/page.tsx new file mode 100644 index 000000000..f74cdeb26 --- /dev/null +++ b/dashboard/src/app/(auth)/callback/page.tsx @@ -0,0 +1,10 @@ +export default function CallbackPage() { + return ( +
+
+

Signing you in...

+

Hang tight while we finalize authentication.

+
+
+ ); +} diff --git a/dashboard/src/app/(auth)/login/page.tsx b/dashboard/src/app/(auth)/login/page.tsx new file mode 100644 index 000000000..950dabb71 --- /dev/null +++ b/dashboard/src/app/(auth)/login/page.tsx @@ -0,0 +1,23 @@ +import Link from "next/link"; +import { Button } from "@/components/ui/Button"; + +export default function LoginPage() { + return ( +
+
+

Welcome back

+

Sign in with your SSO provider to access the dashboard.

+
+ + +
+

+ By signing in you agree to the DataDr usage policy. +

+ + Continue to dashboard + +
+
+ ); +} diff --git a/dashboard/src/app/(auth)/logout/page.tsx b/dashboard/src/app/(auth)/logout/page.tsx new file mode 100644 index 000000000..1fb59d2d9 --- /dev/null +++ b/dashboard/src/app/(auth)/logout/page.tsx @@ -0,0 +1,18 @@ +import Link from "next/link"; +import { Button } from "@/components/ui/Button"; + +export default function LogoutPage() { + return ( +
+
+

You are signed out

+

Thanks for keeping your workspace secure.

+
+ + + +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/analytics/costs/page.tsx b/dashboard/src/app/(dashboard)/analytics/costs/page.tsx new file mode 100644 index 000000000..aa3b6f823 --- /dev/null +++ b/dashboard/src/app/(dashboard)/analytics/costs/page.tsx @@ -0,0 +1,18 @@ +import { CostBreakdown } from "@/components/analytics/CostBreakdown"; +import { Card } from "@/components/ui/Card"; +import { getCostBreakdown } from "@/lib/api/analytics"; + +export default async function CostsPage() { + const costs = await getCostBreakdown(); + return ( +
+
+

Cost Analysis

+

Understand where investigation spend is concentrated.

+
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/analytics/mttr/page.tsx b/dashboard/src/app/(dashboard)/analytics/mttr/page.tsx new file mode 100644 index 000000000..0a6e0b1b0 --- /dev/null +++ b/dashboard/src/app/(dashboard)/analytics/mttr/page.tsx @@ -0,0 +1,19 @@ +import { TrendChart } from "@/components/analytics/TrendChart"; +import { Card } from "@/components/ui/Card"; +import { getTrendSeries } from "@/lib/api/analytics"; + +export default async function MttrPage() { + const trend = await getTrendSeries(); + + return ( +
+
+

MTTR Deep Dive

+

Track resolution time changes week over week.

+
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/analytics/page.tsx b/dashboard/src/app/(dashboard)/analytics/page.tsx new file mode 100644 index 000000000..d4bd1a44f --- /dev/null +++ b/dashboard/src/app/(dashboard)/analytics/page.tsx @@ -0,0 +1,58 @@ +import Link from "next/link"; +import { CostBreakdown } from "@/components/analytics/CostBreakdown"; +import { DistributionChart } from "@/components/analytics/DistributionChart"; +import { MetricCard } from "@/components/analytics/MetricCard"; +import { TrendChart } from "@/components/analytics/TrendChart"; +import { ScheduledReports } from "@/components/analytics/ScheduledReports"; +import { Card } from "@/components/ui/Card"; +import { getCostBreakdown, getOrgStats, getTrendSeries, getUsageMetrics } from "@/lib/api/analytics"; +import { formatCurrency } from "@/lib/utils/formatters"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function AnalyticsPage() { + const [stats, trends, costs, usage] = await Promise.all([ + getOrgStats(), + getTrendSeries(), + getCostBreakdown(), + getUsageMetrics(), + ]); + + return ( +
+
+

Executive Analytics

+

Signals across reliability, cost, and velocity.

+
+ +
+ + + + +
+ +
+ View details}> + + + View details}> + + +
+ + + ({ label: metric.label, value: metric.value }))} + /> + + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/analytics/trends/page.tsx b/dashboard/src/app/(dashboard)/analytics/trends/page.tsx new file mode 100644 index 000000000..6940e3001 --- /dev/null +++ b/dashboard/src/app/(dashboard)/analytics/trends/page.tsx @@ -0,0 +1,18 @@ +import { TrendChart } from "@/components/analytics/TrendChart"; +import { Card } from "@/components/ui/Card"; +import { getTrendSeries } from "@/lib/api/analytics"; + +export default async function TrendsPage() { + const trends = await getTrendSeries(); + return ( +
+
+

Anomaly Trends

+

Seasonal changes in anomaly volume.

+
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/datasets/[datasetId]/anomalies/page.tsx b/dashboard/src/app/(dashboard)/datasets/[datasetId]/anomalies/page.tsx new file mode 100644 index 000000000..3a717b3cc --- /dev/null +++ b/dashboard/src/app/(dashboard)/datasets/[datasetId]/anomalies/page.tsx @@ -0,0 +1,26 @@ +import { Card } from "@/components/ui/Card"; +import { getDatasetAnomalies } from "@/lib/api/datasets"; + +export default async function DatasetAnomaliesPage({ + params, +}: { + params: Promise<{ datasetId: string }>; +}) { + const { datasetId } = await params; + const { anomalies } = await getDatasetAnomalies(datasetId); + return ( +
+

Anomaly History

+
+ {anomalies.map((anomaly) => ( + +

+ {new Date(anomaly.detected_at).toLocaleString()} +

+

Severity: {anomaly.severity}

+
+ ))} +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/datasets/[datasetId]/lineage/page.tsx b/dashboard/src/app/(dashboard)/datasets/[datasetId]/lineage/page.tsx new file mode 100644 index 000000000..644582df3 --- /dev/null +++ b/dashboard/src/app/(dashboard)/datasets/[datasetId]/lineage/page.tsx @@ -0,0 +1,16 @@ +import { LineageGraph } from "@/components/datasets/LineageGraph"; +import { getDataset, getDatasetLineage } from "@/lib/api/datasets"; + +export default async function DatasetLineagePage({ params }: { params: { datasetId: string } }) { + const [dataset, lineage] = await Promise.all([ + getDataset(params.datasetId), + getDatasetLineage(params.datasetId), + ]); + + return ( +
+

Lineage

+ +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/datasets/[datasetId]/page.tsx b/dashboard/src/app/(dashboard)/datasets/[datasetId]/page.tsx new file mode 100644 index 000000000..3945a9ac5 --- /dev/null +++ b/dashboard/src/app/(dashboard)/datasets/[datasetId]/page.tsx @@ -0,0 +1,107 @@ +import Link from "next/link"; +import { DatasetTable } from "@/components/datasets/DatasetTable"; +import { LineageGraph } from "@/components/datasets/LineageGraph"; +import { SchemaViewer } from "@/components/datasets/SchemaViewer"; +import { MetricCard } from "@/components/analytics/MetricCard"; +import { InvestigationTable } from "@/components/investigations/InvestigationTable"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/Tabs"; +import { Card } from "@/components/ui/Card"; +import { + getDataset, + getDatasets, + getDatasetAnomalies, + getDatasetInvestigations, + getDatasetLineage, + getDatasetSchema, +} from "@/lib/api/datasets"; + +export const dynamic = "force-dynamic"; +export const revalidate = 60; + +export default async function DatasetDetailPage({ + params, +}: { + params: Promise<{ datasetId: string }>; +}) { + const { datasetId } = await params; + const dataset = await getDataset(datasetId); + const [investigations, anomalyResult, lineage, schema, allDatasets] = await Promise.all([ + getDatasetInvestigations(datasetId, { limit: 5 }), + getDatasetAnomalies(datasetId), + getDatasetLineage(datasetId), + getDatasetSchema(datasetId), + getDatasets(), + ]); + const anomalies = anomalyResult.anomalies; + + // Create a lookup map from dataset identifier/name to UUID + const datasetLookup = new Map(); + for (const ds of allDatasets) { + datasetLookup.set(ds.name, ds.id); + if (ds.identifier) { + datasetLookup.set(ds.identifier, ds.id); + } + } + + return ( +
+
+

{dataset.name}

+

{dataset.description}

+
+ +
+ + + + +
+ + + + Overview + Schema + Lineage + Investigations + Anomaly History + + + + +

Owner team: {dataset.owner_team_id}

+ +
+
+ + + + + + + + + + + + + View all {dataset.investigation_count} investigations + + + + +
+ {anomalies.map((anomaly) => ( +
+

{anomaly.description}

+

{new Date(anomaly.detected_at).toLocaleString()}

+
+ ))} +
+
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/datasets/[datasetId]/schema/page.tsx b/dashboard/src/app/(dashboard)/datasets/[datasetId]/schema/page.tsx new file mode 100644 index 000000000..c63b69640 --- /dev/null +++ b/dashboard/src/app/(dashboard)/datasets/[datasetId]/schema/page.tsx @@ -0,0 +1,18 @@ +import { SchemaViewer } from "@/components/datasets/SchemaViewer"; +import { getDatasetSchema } from "@/lib/api/datasets"; + +export default async function DatasetSchemaPage({ + params, +}: { + params: Promise<{ datasetId: string }>; +}) { + const { datasetId } = await params; + const schema = await getDatasetSchema(datasetId); + + return ( +
+

Schema

+ +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/datasets/page.tsx b/dashboard/src/app/(dashboard)/datasets/page.tsx new file mode 100644 index 000000000..53b180391 --- /dev/null +++ b/dashboard/src/app/(dashboard)/datasets/page.tsx @@ -0,0 +1,18 @@ +import { DatasetTable } from "@/components/datasets/DatasetTable"; +import { getDatasets } from "@/lib/api/datasets"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function DatasetsPage() { + const datasets = await getDatasets(); + return ( +
+
+

Dataset Catalog

+

Search and inspect dataset health and lineage.

+
+ +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/home/page.tsx b/dashboard/src/app/(dashboard)/home/page.tsx new file mode 100644 index 000000000..89add4854 --- /dev/null +++ b/dashboard/src/app/(dashboard)/home/page.tsx @@ -0,0 +1,72 @@ +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +import { Card } from "@/components/ui/Card"; +import { OnboardingChecklist } from "@/components/common/OnboardingChecklist"; +import { HeatmapCalendar } from "@/components/analytics/HeatmapCalendar"; +import { MetricCard } from "@/components/analytics/MetricCard"; +import { LiveInvestigationFeed } from "@/components/realtime/LiveInvestigationFeed"; +import { getActiveInvestigations, getOrgStats, getRecentAnomalies } from "@/lib/api/analytics"; +import { formatCurrency } from "@/lib/utils/formatters"; + +export default async function HomePage() { + const [stats, activeInvestigations, recentAnomalies] = await Promise.all([ + getOrgStats(), + getActiveInvestigations(), + getRecentAnomalies({ limit: 5 }), + ]); + + return ( +
+
+

Executive Overview

+

Live pulse across investigations, SLAs, and spend.

+
+ +
+ + + + +
+ +
+ + + + +
+ {recentAnomalies.map((anomaly) => ( +
+

{anomaly.title}

+

Detected {new Date(anomaly.detected_at).toLocaleString()}

+
+ ))} +
+
+
+ + + item.count)} /> + + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/integrations/anomaly-sources/page.tsx b/dashboard/src/app/(dashboard)/integrations/anomaly-sources/page.tsx new file mode 100644 index 000000000..551261d07 --- /dev/null +++ b/dashboard/src/app/(dashboard)/integrations/anomaly-sources/page.tsx @@ -0,0 +1,29 @@ +import { Card } from "@/components/ui/Card"; +import { IntegrationCard } from "@/components/integrations/IntegrationCard"; +import { WebhookTester } from "@/components/integrations/WebhookTester"; + +export default function AnomalySourcesPage() { + return ( +
+
+

Anomaly Sources

+

Connect detection tools to trigger investigations.

+
+
+ + +
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/integrations/lineage/page.tsx b/dashboard/src/app/(dashboard)/integrations/lineage/page.tsx new file mode 100644 index 000000000..2e5b63998 --- /dev/null +++ b/dashboard/src/app/(dashboard)/integrations/lineage/page.tsx @@ -0,0 +1,29 @@ +import { Card } from "@/components/ui/Card"; +import { IntegrationCard } from "@/components/integrations/IntegrationCard"; +import { WebhookTester } from "@/components/integrations/WebhookTester"; + +export default function LineageIntegrationsPage() { + return ( +
+
+

Lineage Providers

+

Configure upstream lineage metadata sources.

+
+
+ + +
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/integrations/notifications/page.tsx b/dashboard/src/app/(dashboard)/integrations/notifications/page.tsx new file mode 100644 index 000000000..41a780cd0 --- /dev/null +++ b/dashboard/src/app/(dashboard)/integrations/notifications/page.tsx @@ -0,0 +1,33 @@ +import { Card } from "@/components/ui/Card"; +import { IntegrationCard } from "@/components/integrations/IntegrationCard"; +import { Select } from "@/components/ui/Select"; + +export default function NotificationIntegrationsPage() { + return ( +
+
+

Notifications

+

Route investigation updates to the right channels.

+
+
+ + +
+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/integrations/page.tsx b/dashboard/src/app/(dashboard)/integrations/page.tsx new file mode 100644 index 000000000..401491d4c --- /dev/null +++ b/dashboard/src/app/(dashboard)/integrations/page.tsx @@ -0,0 +1,33 @@ +import Link from "next/link"; +import { IntegrationCard } from "@/components/integrations/IntegrationCard"; + +export default function IntegrationsPage() { + return ( +
+
+

Integration Hub

+

Connect lineage, anomaly sources, and notifications.

+
+
+ Manage} + /> + Manage} + /> + Manage} + /> +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/knowledge/page.tsx b/dashboard/src/app/(dashboard)/knowledge/page.tsx new file mode 100644 index 000000000..9f4cd475c --- /dev/null +++ b/dashboard/src/app/(dashboard)/knowledge/page.tsx @@ -0,0 +1,40 @@ +import Link from "next/link"; +import { Card } from "@/components/ui/Card"; + +export default function KnowledgePage() { + const entries = [ + { id: "kb-001", title: "Investigating volume drops", category: "Playbook" }, + { id: "kb-002", title: "Root cause: delayed ingestion", category: "Pattern" }, + ]; + + return ( +
+
+

Knowledge Store

+

Team playbooks and learned patterns.

+
+
+ + + Browse tribal knowledge + + + + + Browse learned patterns + + +
+ +
+ {entries.map((entry) => ( +
+

{entry.title}

+

{entry.category}

+
+ ))} +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/knowledge/patterns/page.tsx b/dashboard/src/app/(dashboard)/knowledge/patterns/page.tsx new file mode 100644 index 000000000..bca0315a1 --- /dev/null +++ b/dashboard/src/app/(dashboard)/knowledge/patterns/page.tsx @@ -0,0 +1,21 @@ +import { Card } from "@/components/ui/Card"; + +export default function PatternKnowledgePage() { + const patterns = [ + { id: "pattern-001", title: "Volume drop investigation", uses: 12 }, + { id: "pattern-002", title: "Latency regression", uses: 8 }, + ]; + + return ( +
+

Learned Patterns

+
+ {patterns.map((pattern) => ( + +

Used {pattern.uses} times

+
+ ))} +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/knowledge/tribal/page.tsx b/dashboard/src/app/(dashboard)/knowledge/tribal/page.tsx new file mode 100644 index 000000000..0f98eaaf5 --- /dev/null +++ b/dashboard/src/app/(dashboard)/knowledge/tribal/page.tsx @@ -0,0 +1,21 @@ +import { Card } from "@/components/ui/Card"; + +export default function TribalKnowledgePage() { + const entries = [ + { id: "tribal-001", title: "Payment feed retry strategy", owner: "Revenue Ops" }, + { id: "tribal-002", title: "Backfill workflow for churn metrics", owner: "Customer Insights" }, + ]; + + return ( +
+

Tribal Knowledge

+
+ {entries.map((entry) => ( + +

Owner: {entry.owner}

+
+ ))} +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/layout.tsx b/dashboard/src/app/(dashboard)/layout.tsx new file mode 100644 index 000000000..82d99e775 --- /dev/null +++ b/dashboard/src/app/(dashboard)/layout.tsx @@ -0,0 +1,33 @@ +import { Breadcrumbs } from "@/components/layout/Breadcrumbs"; +import { Header } from "@/components/layout/Header"; +import { Sidebar } from "@/components/layout/Sidebar"; +import { KeyboardShortcuts } from "@/components/layout/KeyboardShortcuts"; +import { getTeams } from "@/lib/api/teams"; +import { getCurrentUser } from "@/lib/api/users"; + +export default async function DashboardLayout({ + children, +}: { + children: React.ReactNode; +}) { + const user = await getCurrentUser(); + const teams = await getTeams(); + + return ( +
+ +
+
+
+ +
+
+
+ {children} +
+
+ +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/org/audit-log/page.tsx b/dashboard/src/app/(dashboard)/org/audit-log/page.tsx new file mode 100644 index 000000000..00651c76e --- /dev/null +++ b/dashboard/src/app/(dashboard)/org/audit-log/page.tsx @@ -0,0 +1,18 @@ +import { AuditLogTable } from "@/components/admin/AuditLogTable"; +import { getAuditLog } from "@/lib/api/admin"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function AuditLogPage() { + const events = await getAuditLog(); + return ( +
+
+

Audit Log

+

Every administrative change across the org.

+
+ +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/org/page.tsx b/dashboard/src/app/(dashboard)/org/page.tsx new file mode 100644 index 000000000..32718022c --- /dev/null +++ b/dashboard/src/app/(dashboard)/org/page.tsx @@ -0,0 +1,51 @@ +import Link from "next/link"; +import { Button } from "@/components/ui/Button"; +import { Card } from "@/components/ui/Card"; +import { MetricCard } from "@/components/analytics/MetricCard"; +import { TeamCard } from "@/components/teams/TeamCard"; +import { requirePermission } from "@/lib/auth/permissions"; +import { Permission } from "@/lib/auth/roles"; +import { getOrgUsage } from "@/lib/api/admin"; +import { getOrganization } from "@/lib/api/org"; +import { getTeams } from "@/lib/api/teams"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function OrgPage() { + await requirePermission(Permission.ORG_ADMIN); + const [org, teams, usage] = await Promise.all([ + getOrganization(), + getTeams(), + getOrgUsage(), + ]); + + return ( +
+
+
+

{org.name}

+

Plan: {org.plan}

+
+ + + +
+ +
+ + + + +
+ + +
+ {teams.map((team) => ( + + ))} +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/org/settings/page.tsx b/dashboard/src/app/(dashboard)/org/settings/page.tsx new file mode 100644 index 000000000..e6db5aca9 --- /dev/null +++ b/dashboard/src/app/(dashboard)/org/settings/page.tsx @@ -0,0 +1,51 @@ +import { Button } from "@/components/ui/Button"; +import { Card } from "@/components/ui/Card"; +import { Input } from "@/components/ui/Input"; +import { Select } from "@/components/ui/Select"; + +export default function OrgSettingsPage() { + return ( +
+

Organization Settings

+ + +
+ + +
+
+ +
+
+ + +
+ + +
+
+ +
+
+ + +
+ + +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/org/usage/page.tsx b/dashboard/src/app/(dashboard)/org/usage/page.tsx new file mode 100644 index 000000000..0607538d4 --- /dev/null +++ b/dashboard/src/app/(dashboard)/org/usage/page.tsx @@ -0,0 +1,35 @@ +import { UsageChart } from "@/components/admin/UsageChart"; +import { Card } from "@/components/ui/Card"; +import { getUsageSeries } from "@/lib/api/admin"; +import { getUsageMetrics } from "@/lib/api/analytics"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function UsagePage() { + const [series, metrics] = await Promise.all([getUsageSeries(), getUsageMetrics()]); + + return ( +
+
+

Usage & Billing

+

Consumption trends and capacity planning.

+
+ + + + + +
+ {metrics.map((metric) => ( + +

{metric.value.toLocaleString()}

+ {typeof metric.delta === "number" && ( +

Delta {metric.delta}%

+ )} +
+ ))} +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/profile/activity/page.tsx b/dashboard/src/app/(dashboard)/profile/activity/page.tsx new file mode 100644 index 000000000..aa523668a --- /dev/null +++ b/dashboard/src/app/(dashboard)/profile/activity/page.tsx @@ -0,0 +1,26 @@ +import { Card } from "@/components/ui/Card"; +import { getCurrentUser, getUserActivity } from "@/lib/api/users"; +import { formatRelative } from "@/lib/utils/formatters"; + +export default async function ProfileActivityPage() { + const user = await getCurrentUser(); + const { activity } = await getUserActivity(user.id); + + return ( +
+

Activity Log

+ +
+ {activity.map((entry) => ( +
+

{entry.description}

+

+ {formatRelative(entry.timestamp || entry.created_at || "")} +

+
+ ))} +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/profile/api-keys/page.tsx b/dashboard/src/app/(dashboard)/profile/api-keys/page.tsx new file mode 100644 index 000000000..892526a2f --- /dev/null +++ b/dashboard/src/app/(dashboard)/profile/api-keys/page.tsx @@ -0,0 +1,32 @@ +import { Button } from "@/components/ui/Button"; +import { Card } from "@/components/ui/Card"; + +export default function ApiKeysPage() { + const keys = [ + { id: "key-001", name: "Automation", last_used: "2d ago" }, + { id: "key-002", name: "CLI", last_used: "6h ago" }, + ]; + + return ( +
+
+

API Keys

+ +
+ + +
+ {keys.map((key) => ( +
+
+

{key.name}

+

Last used {key.last_used}

+
+ +
+ ))} +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/profile/page.tsx b/dashboard/src/app/(dashboard)/profile/page.tsx new file mode 100644 index 000000000..69640b07d --- /dev/null +++ b/dashboard/src/app/(dashboard)/profile/page.tsx @@ -0,0 +1,81 @@ +import Link from "next/link"; +import { Avatar } from "@/components/ui/Avatar"; +import { Badge } from "@/components/ui/Badge"; +import { Card } from "@/components/ui/Card"; +import { getCurrentUser, getUserActivity, getUserTeams } from "@/lib/api/users"; +import { formatRelative } from "@/lib/utils/formatters"; + +export default async function ProfilePage() { + const user = await getCurrentUser(); + const [activityResult, teams] = await Promise.all([ + getUserActivity(user.id), + getUserTeams(user.id), + ]); + const activity = activityResult.activity; + + return ( +
+
+ +
+

{user.name}

+

{user.email}

+
+ {user.roles.map((role) => ( + + {role} + + ))} +
+
+
+ +
+ +
+ {teams.map((team) => ( + + {team.name} + + ))} +
+
+ + +
+

Investigations triggered: {user.stats.investigations_triggered}

+

Approvals given: {user.stats.approvals_given}

+

Knowledge entries: {user.stats.knowledge_entries}

+
+
+ + +
+ + Notifications & Preferences + + + API Keys + + + Activity Log + +
+
+
+ + +
+ {activity.map((entry) => ( +
+

{entry.description}

+

+ {formatRelative(entry.timestamp || entry.created_at || "")} +

+
+ ))} +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/profile/preferences/page.tsx b/dashboard/src/app/(dashboard)/profile/preferences/page.tsx new file mode 100644 index 000000000..de8f9e99b --- /dev/null +++ b/dashboard/src/app/(dashboard)/profile/preferences/page.tsx @@ -0,0 +1,48 @@ +"use client"; + +import { Card } from "@/components/ui/Card"; +import { Select } from "@/components/ui/Select"; +import { usePreferencesStore } from "@/lib/stores/preferences-store"; +import { useTheme } from "@/lib/theme"; + +export default function PreferencesPage() { + const density = usePreferencesStore((state) => state.density); + const notifications = usePreferencesStore((state) => state.notifications); + const setDensity = usePreferencesStore((state) => state.setDensity); + const setNotifications = usePreferencesStore((state) => state.setNotifications); + const { theme, setTheme } = useTheme(); + + return ( +
+

Preferences

+ +
+ + +
+
+ + + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/teams/[teamId]/datasets/page.tsx b/dashboard/src/app/(dashboard)/teams/[teamId]/datasets/page.tsx new file mode 100644 index 000000000..779ce43f3 --- /dev/null +++ b/dashboard/src/app/(dashboard)/teams/[teamId]/datasets/page.tsx @@ -0,0 +1,12 @@ +import { DatasetTable } from "@/components/datasets/DatasetTable"; +import { getTeamDatasets } from "@/lib/api/teams"; + +export default async function TeamDatasetsPage({ params }: { params: { teamId: string } }) { + const datasets = await getTeamDatasets(params.teamId); + return ( +
+

Team Datasets

+ +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/teams/[teamId]/members/page.tsx b/dashboard/src/app/(dashboard)/teams/[teamId]/members/page.tsx new file mode 100644 index 000000000..5f73ffe68 --- /dev/null +++ b/dashboard/src/app/(dashboard)/teams/[teamId]/members/page.tsx @@ -0,0 +1,15 @@ +import { Card } from "@/components/ui/Card"; +import { MemberList } from "@/components/teams/MemberList"; +import { getTeamMembers } from "@/lib/api/teams"; + +export default async function TeamMembersPage({ params }: { params: { teamId: string } }) { + const members = await getTeamMembers(params.teamId); + return ( +
+

Team Members

+ + + +
+ ); +} diff --git a/dashboard/src/app/(dashboard)/teams/[teamId]/page.tsx b/dashboard/src/app/(dashboard)/teams/[teamId]/page.tsx new file mode 100644 index 000000000..a2f7c4bec --- /dev/null +++ b/dashboard/src/app/(dashboard)/teams/[teamId]/page.tsx @@ -0,0 +1,69 @@ +import { Card } from "@/components/ui/Card"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/Tabs"; +import { MetricCard } from "@/components/analytics/MetricCard"; +import { InvestigationTable } from "@/components/investigations/InvestigationTable"; +import { DatasetTable } from "@/components/datasets/DatasetTable"; +import { MemberList } from "@/components/teams/MemberList"; +import { getTeam, getTeamDatasets, getTeamInvestigations, getTeamMembers, getTeamStats } from "@/lib/api/teams"; + +export default async function TeamPage({ params }: { params: { teamId: string } }) { + const team = await getTeam(params.teamId); + const [stats, investigations, datasets, members] = await Promise.all([ + getTeamStats(params.teamId), + getTeamInvestigations(params.teamId, { limit: 10 }), + getTeamDatasets(params.teamId), + getTeamMembers(params.teamId), + ]); + + return ( +
+
+

{team.name}

+

{team.description}

+
+ +
+ + + + +
+ + + + Overview + Investigations + Datasets ({datasets.length}) + Members ({team.member_count}) + + + +
+ + + + + + +
+
+ + + + + + + + + + + + +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/teams/[teamId]/settings/page.tsx b/dashboard/src/app/(dashboard)/teams/[teamId]/settings/page.tsx new file mode 100644 index 000000000..925ce2bf8 --- /dev/null +++ b/dashboard/src/app/(dashboard)/teams/[teamId]/settings/page.tsx @@ -0,0 +1,26 @@ +import { Button } from "@/components/ui/Button"; +import { Card } from "@/components/ui/Card"; +import { Input } from "@/components/ui/Input"; +import { Select } from "@/components/ui/Select"; +import { getTeam } from "@/lib/api/teams"; + +export default async function TeamSettingsPage({ params }: { params: { teamId: string } }) { + const team = await getTeam(params.teamId); + return ( +
+

{team.name} Settings

+ +
+ + +
+
+ +
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/teams/page.tsx b/dashboard/src/app/(dashboard)/teams/page.tsx new file mode 100644 index 000000000..551d5e522 --- /dev/null +++ b/dashboard/src/app/(dashboard)/teams/page.tsx @@ -0,0 +1,23 @@ +import { TeamCard } from "@/components/teams/TeamCard"; +import { getTeams } from "@/lib/api/teams"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function TeamsPage() { + const teams = await getTeams(); + + return ( +
+
+

Teams

+

Navigate and manage team workspaces.

+
+
+ {teams.map((team) => ( + + ))} +
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/users/[userId]/page.tsx b/dashboard/src/app/(dashboard)/users/[userId]/page.tsx new file mode 100644 index 000000000..6585c3cf4 --- /dev/null +++ b/dashboard/src/app/(dashboard)/users/[userId]/page.tsx @@ -0,0 +1,62 @@ +import { Avatar } from "@/components/ui/Avatar"; +import { Badge } from "@/components/ui/Badge"; +import { Card } from "@/components/ui/Card"; +import { getUser, getUserActivity, getUserTeams } from "@/lib/api/users"; +import { formatRelative } from "@/lib/utils/formatters"; + +export default async function UserDetailPage({ + params, +}: { + params: Promise<{ userId: string }>; +}) { + const { userId } = await params; + const user = await getUser(userId); + const [activityResult, teams] = await Promise.all([ + getUserActivity(userId), + getUserTeams(userId), + ]); + const activity = activityResult.activity; + + return ( +
+
+ +
+

{user.name}

+

{user.email}

+
+ {user.roles.map((role) => ( + + {role} + + ))} +
+
+
+ +
+ +
+ {teams.map((team) => ( +

+ {team.name} +

+ ))} +
+
+ +
+ {activity.map((entry) => ( +
+

{entry.description}

+

+ {formatRelative(entry.timestamp || entry.created_at || "")} +

+
+ ))} +
+
+
+
+ ); +} diff --git a/dashboard/src/app/(dashboard)/users/page.tsx b/dashboard/src/app/(dashboard)/users/page.tsx new file mode 100644 index 000000000..521564819 --- /dev/null +++ b/dashboard/src/app/(dashboard)/users/page.tsx @@ -0,0 +1,23 @@ +import { Button } from "@/components/ui/Button"; +import { UserTable } from "@/components/admin/UserTable"; +import { requirePermission } from "@/lib/auth/permissions"; +import { Permission } from "@/lib/auth/roles"; +import { getUsers } from "@/lib/api/users"; + +export const dynamic = 'force-dynamic'; +export const revalidate = 60; + +export default async function UsersPage() { + await requirePermission(Permission.USER_MANAGE); + const users = await getUsers(); + + return ( +
+
+

Users

+ +
+ +
+ ); +} diff --git a/dashboard/src/app/api/auth/[...nextauth]/route.ts b/dashboard/src/app/api/auth/[...nextauth]/route.ts new file mode 100644 index 000000000..d7ce0d650 --- /dev/null +++ b/dashboard/src/app/api/auth/[...nextauth]/route.ts @@ -0,0 +1,3 @@ +import { GET, POST } from "@/auth"; + +export { GET, POST }; diff --git a/dashboard/src/app/api/proxy/[...path]/route.ts b/dashboard/src/app/api/proxy/[...path]/route.ts new file mode 100644 index 000000000..d5ceed982 --- /dev/null +++ b/dashboard/src/app/api/proxy/[...path]/route.ts @@ -0,0 +1,82 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/auth"; + +const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL ?? ""; + +async function secureProxy( + request: Request, + path: string[], + session: any +) { + const target = `${API_BASE_URL}/${path.join("/")}${new URL(request.url).search}`; + const body = request.method === "GET" || request.method === "HEAD" ? undefined : await request.text(); + + // Forward authentication headers to backend + const headers = new Headers(); + headers.set("Content-Type", request.headers.get("content-type") ?? "application/json"); + + if (session?.accessToken) { + headers.set("Authorization", `Bearer ${session.accessToken}`); + } + + if (session?.user?.id) { + headers.set("X-User-ID", session.user.id); + } + + if (session?.user?.role) { + headers.set("X-User-Role", session.user.role); + } + + try { + const response = await fetch(target, { + method: request.method, + headers, + body, + signal: AbortSignal.timeout(30000), // 30 second timeout + }); + + const responseBody = await response.text(); + return new NextResponse(responseBody, { + status: response.status, + headers: { "Content-Type": response.headers.get("content-type") ?? "application/json" }, + }); + } catch (error) { + console.error("Proxy request failed:", error); + return NextResponse.json( + { error: "Proxy request failed" }, + { status: 502 } + ); + } +} + +export async function GET(request: Request, context: { params: { path: string[] } }) { + const session = await auth(); + if (!session?.user) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + return secureProxy(request, context.params.path, session); +} + +export async function POST(request: Request, context: { params: { path: string[] } }) { + const session = await auth(); + if (!session?.user) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + return secureProxy(request, context.params.path, session); +} + +export async function PUT(request: Request, context: { params: { path: string[] } }) { + const session = await auth(); + if (!session?.user) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + return secureProxy(request, context.params.path, session); +} + +export async function DELETE(request: Request, context: { params: { path: string[] } }) { + const session = await auth(); + if (!session?.user) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + return secureProxy(request, context.params.path, session); +} diff --git a/dashboard/src/app/share/[token]/page.tsx b/dashboard/src/app/share/[token]/page.tsx new file mode 100644 index 000000000..68ceaec86 --- /dev/null +++ b/dashboard/src/app/share/[token]/page.tsx @@ -0,0 +1,208 @@ +import { Card } from "@/components/ui/Card"; +import { getSharedInvestigation } from "@/lib/api/share"; + +interface PageProps { + params: Promise<{ token: string }>; +} + +export default async function SharedInvestigationPage({ params }: PageProps) { + const { token } = await params; + + let investigation; + let error: string | null = null; + + try { + investigation = await getSharedInvestigation(token); + } catch (err) { + error = err instanceof Error ? err.message : "Failed to load shared investigation"; + } + + if (error || !investigation) { + return ( +
+ +
+

+ {error || "This share link may have expired or does not exist."} +

+ + Learn more about DataDr → + +
+
+
+ ); + } + + // Parse JSON fields + let inputContext: any = {}; + let result: any = {}; + + try { + inputContext = investigation.input_context ? JSON.parse(investigation.input_context) : {}; + } catch (e) { + console.error("Failed to parse input_context:", e); + } + + try { + result = investigation.result ? JSON.parse(investigation.result) : {}; + } catch (e) { + console.error("Failed to parse result:", e); + } + + const isComplete = investigation.status === "completed"; + const statusColor = isComplete ? "text-green-600" : investigation.status === "failed" ? "text-red-600" : "text-yellow-600"; + const datasetList = Array.isArray(inputContext.datasets) + ? inputContext.datasets + : Array.isArray(inputContext.all_datasets) + ? inputContext.all_datasets + : null; + const datasetDisplay = datasetList + ? datasetList.map((entry: any) => entry?.identifier).filter(Boolean).join(", ") + : inputContext.table_name || inputContext.dataset_name || "Unknown"; + + return ( +
+ {/* Header */} +
+
+
+
+

+ Investigation Shared via DataDr +

+

+ Shared by {investigation.user_name || "Unknown"} +

+
+ + Get DataDr Free + +
+
+
+ + {/* Content */} +
+ {/* Status Card */} + +
+
+

Status

+

+ {investigation.status.charAt(0).toUpperCase() + investigation.status.slice(1)} +

+
+
+

Anomaly Type

+

+ {investigation.anomaly_type || "Unknown"} +

+
+
+

Dataset

+

+ {datasetDisplay} +

+
+
+

Created

+

+ {investigation.created_at + ? new Date(investigation.created_at).toLocaleDateString() + : "Unknown"} +

+
+
+
+ + {/* Diagnosis Card */} + {isComplete && result.root_cause && ( + +
+
+

Root Cause

+

{result.root_cause}

+
+ {result.summary && ( +
+

Summary

+

{result.summary}

+
+ )} + {result.confidence !== undefined && ( +
+

Confidence

+
+
+
+
+ + {Math.round(result.confidence * 100)}% + +
+
+ )} +
+ + )} + + {/* In Progress Message */} + {!isComplete && ( + +

+ This investigation is still running. The full diagnosis will be available once complete. +

+
+ )} + + {/* CTA Card */} + +
+

+ DataDr automatically investigates data quality issues in your pipelines, + saving your team hours of manual debugging. +

+ +
+
+
+ + {/* Footer */} +
+

+ Powered by{" "} + + DataDr + + {" "}— AI-powered data quality investigations +

+
+
+ ); +} diff --git a/demo/README.md b/demo/README.md index 422362248..c9094aaed 100644 --- a/demo/README.md +++ b/demo/README.md @@ -1,6 +1,42 @@ -# DataDr Demo Fixtures - -Realistic e-commerce data with pre-baked anomalies for demonstrating DataDr's detection capabilities. +# Dataing Demo Fixtures + +Realistic e-commerce data with pre-baked anomalies for demonstrating Dataing's detection capabilities. + +## Happy Path Demo (NewInvestigation Form) + +After running `just demo`, navigate to http://localhost:3000 and click "New Investigation". Use these values for a working demo: + +### Scenario: NULL Spike in Orders + +| Field | Value | +|-------|-------| +| **Dataset** | Select "E-Commerce Demo" → search for `orders` → select `public.orders` | +| **Anomaly Date** | `2024-01-10` (middle of the anomaly window) | +| **Metric Name** | `null_count` | +| **Expected Value** | `5` | +| **Actual Value** | `200` | +| **Deviation %** | `3900` | +| **Severity** | High | +| **Description** | "Spike in NULL user_id values in the orders table. Started around Jan 9th. Possibly related to mobile app deployment." | + +### Scenario: Volume Drop in Events + +| Field | Value | +|-------|-------| +| **Dataset** | Select "E-Commerce Demo" → search for `events` → select `public.events` | +| **Anomaly Date** | `2024-01-12` | +| **Metric Name** | `row_count` | +| **Expected Value** | `70000` | +| **Actual Value** | `14000` | +| **Deviation %** | `-80` | +| **Severity** | Critical | +| **Description** | "Significant drop in EU event volume. Non-EU traffic appears normal." | + +### Tips for Demo +- The AI will query the database, discover the anomaly pattern, and generate hypotheses +- For NULL spike: it should find that ~40% of orders on days 3-5 have NULL user_id +- For volume drop: it should find that EU events dropped 80% on days 5-6 +- The investigation typically takes 30-60 seconds to complete ## Quick Start diff --git a/demo/docker-compose.demo.yml b/demo/docker-compose.demo.yml index 1f42e5512..cba5a2154 100644 --- a/demo/docker-compose.demo.yml +++ b/demo/docker-compose.demo.yml @@ -4,16 +4,16 @@ services: postgres: image: postgres:16-alpine environment: - POSTGRES_DB: datadr_demo - POSTGRES_USER: datadr - POSTGRES_PASSWORD: datadr + POSTGRES_DB: dataing_demo + POSTGRES_USER: dataing + POSTGRES_PASSWORD: dataing volumes: - demo-pgdata:/var/lib/postgresql/data - ./backend/migrations:/docker-entrypoint-initdb.d:ro ports: - "5432:5432" healthcheck: - test: ["CMD-SHELL", "pg_isready -U datadr -d datadr_demo"] + test: ["CMD-SHELL", "pg_isready -U dataing -d dataing_demo"] interval: 5s timeout: 5s retries: 5 @@ -24,10 +24,10 @@ services: dockerfile: Dockerfile environment: DATADR_DEMO_MODE: "true" - DATABASE_URL: postgresql://datadr:datadr@postgres:5432/datadr_demo - APP_DATABASE_URL: postgresql://datadr:datadr@postgres:5432/datadr_demo + DATABASE_URL: postgresql://dataing:dataing@postgres:5432/dataing_demo + APP_DATABASE_URL: postgresql://dataing:dataing@postgres:5432/dataing_demo DATADR_FIXTURE_PATH: /app/fixtures/null_spike - DATADR_ENCRYPTION_KEY: ${DATADR_ENCRYPTION_KEY:-demo-encryption-key-32-bytes!!} + DATADR_ENCRYPTION_KEY: ${DATADR_ENCRYPTION_KEY:-fj_HDi4E6H02W11I5cJtMB_u7HoUrc8AMN6_L0rqGcg=} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} volumes: - ./demo/fixtures:/app/fixtures:ro diff --git a/demo/generate.py b/demo/generate.py index 9a64e6616..2417b4288 100755 --- a/demo/generate.py +++ b/demo/generate.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -DataDr Demo Fixtures Generator. +Dataing Demo Fixtures Generator. Generates realistic e-commerce data with pre-baked anomalies for demos. Run with: uv run python demo/generate.py @@ -30,8 +30,8 @@ # ============================================================================== # Simulation period: 7 days -SIMULATION_START = date(2024, 1, 8) -SIMULATION_END = date(2024, 1, 14) +SIMULATION_START = date(2026, 1, 8) +SIMULATION_END = date(2026, 1, 14) # Volume targets USER_COUNT = 10_000 @@ -1118,7 +1118,7 @@ def create_manifest( def generate_all_fixtures(): """Generate all fixture sets.""" print("=" * 60) - print("DataDr Demo Fixtures Generator") + print("Dataing Demo Fixtures Generator") print("=" * 60) # Generate baseline data diff --git a/demo/load_duckdb.sql b/demo/load_duckdb.sql index 7ca3b7b4d..d96ca865e 100644 --- a/demo/load_duckdb.sql +++ b/demo/load_duckdb.sql @@ -1,4 +1,4 @@ --- DataDr Demo Fixtures: DuckDB Loading Script +-- Dataing Demo Fixtures: DuckDB Loading Script -- Usage: duckdb demo.db < load_duckdb.sql -- Or: duckdb -c ".read load_duckdb.sql" diff --git a/demo/test_demo.sh b/demo/test_demo.sh index 03e72f432..1bff01bc4 100755 --- a/demo/test_demo.sh +++ b/demo/test_demo.sh @@ -1,10 +1,10 @@ #!/bin/bash -# DataDr Demo Acceptance Test +# Dataing Demo Acceptance Test # Run this script to verify the demo is working correctly set -e -echo "=== DataDr Demo Acceptance Test ===" +echo "=== Dataing Demo Acceptance Test ===" echo "" # Colors for output diff --git a/demo/validate.sql b/demo/validate.sql index ca45a6247..d7ad7069a 100644 --- a/demo/validate.sql +++ b/demo/validate.sql @@ -1,4 +1,4 @@ --- DataDr Demo Fixtures: Validation Queries +-- Dataing Demo Fixtures: Validation Queries -- Run these queries to verify fixtures are correct before demos. -- ============================================================================== diff --git a/demo_working/.gitignore b/demo_working/.gitignore new file mode 100644 index 000000000..1549c0aca --- /dev/null +++ b/demo_working/.gitignore @@ -0,0 +1,24 @@ +# Environment files +.env + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# dbt +dbt/target/ +dbt/dbt_packages/ +dbt/logs/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db diff --git a/docs/prompts/backend/architecture/adapter_pattern.md b/docs/prompts/backend/architecture/adapter_pattern.md new file mode 100644 index 000000000..566c0f81a --- /dev/null +++ b/docs/prompts/backend/architecture/adapter_pattern.md @@ -0,0 +1,329 @@ +Good question. Let me think through your codebase architecture and where adapter patterns would add value. + +## Candidates for Adapter Pattern + +### 1. **LLM Provider Adapter** ✅ High Value + +You're likely hardcoded to Anthropic. But customers will want: +- OpenAI (GPT-4) +- Azure OpenAI (enterprise compliance) +- AWS Bedrock (stay in AWS) +- Local models (Ollama, vLLM for sensitive data) + +``` +┌─────────────────────────────────────────────────────────────┐ +│ LLM ADAPTER LAYER │ +│ │ +│ LLMAdapter (Protocol) │ +│ ├── complete(messages, tools?) -> Response │ +│ ├── stream(messages) -> AsyncIterator[Chunk] │ +│ └── capabilities: supports_tools, supports_vision, etc. │ +│ │ +│ Implementations: │ +│ ├── AnthropicAdapter (Claude) │ +│ ├── OpenAIAdapter (GPT-4) │ +│ ├── AzureOpenAIAdapter (Enterprise GPT) │ +│ ├── BedrockAdapter (AWS-hosted Claude/Titan) │ +│ └── OllamaAdapter (Local/self-hosted) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Enterprise customers often mandate specific providers for compliance. + +--- + +### 2. **Notification/Alerting Adapter** ✅ High Value + +When DataDr finds an anomaly, where does the alert go? + +``` +┌─────────────────────────────────────────────────────────────┐ +│ NOTIFICATION ADAPTER LAYER │ +│ │ +│ NotificationAdapter (Protocol) │ +│ ├── send_alert(alert: Alert) -> None │ +│ ├── send_report(report: Report) -> None │ +│ └── test_connection() -> bool │ +│ │ +│ Implementations: │ +│ ├── SlackAdapter (Webhook + Bot) │ +│ ├── EmailAdapter (SMTP, SendGrid, SES) │ +│ ├── PagerDutyAdapter (Incidents) │ +│ ├── OpsgenieAdapter (Alerts) │ +│ ├── TeamsAdapter (Microsoft) │ +│ ├── WebhookAdapter (Generic HTTP) │ +│ └── SNSAdapter (AWS) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Every company has different alerting infrastructure. + +--- + +### 3. **Secret Store Adapter** ✅ High Value + +You're using Fernet encryption with env var key. Enterprise wants: +- HashiCorp Vault +- AWS Secrets Manager +- GCP Secret Manager +- Azure Key Vault + +``` +┌─────────────────────────────────────────────────────────────┐ +│ SECRET STORE ADAPTER LAYER │ +│ │ +│ SecretStoreAdapter (Protocol) │ +│ ├── get_secret(key: str) -> str │ +│ ├── set_secret(key: str, value: str) -> None │ +│ ├── delete_secret(key: str) -> None │ +│ └── rotate_secret(key: str) -> str │ +│ │ +│ Implementations: │ +│ ├── EnvVarSecretStore (Current - dev/simple) │ +│ ├── VaultAdapter (HashiCorp Vault) │ +│ ├── AWSSecretsAdapter (AWS Secrets Manager) │ +│ ├── GCPSecretsAdapter (GCP Secret Manager) │ +│ └── AzureKeyVaultAdapter (Azure Key Vault) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Credential management is a security audit checkbox. + +--- + +### 4. **Storage/Artifact Adapter** ✅ Medium-High Value + +Where do investigation results, reports, exports go? + +``` +┌─────────────────────────────────────────────────────────────┐ +│ STORAGE ADAPTER LAYER │ +│ │ +│ StorageAdapter (Protocol) │ +│ ├── put(key: str, data: bytes) -> str │ +│ ├── get(key: str) -> bytes │ +│ ├── delete(key: str) -> None │ +│ ├── list(prefix: str) -> list[str] │ +│ └── get_signed_url(key: str, expires: int) -> str │ +│ │ +│ Implementations: │ +│ ├── LocalStorageAdapter (Filesystem - dev) │ +│ ├── S3Adapter (AWS) │ +│ ├── GCSAdapter (GCP) │ +│ ├── AzureBlobAdapter (Azure) │ +│ └── MinioAdapter (Self-hosted S3-compatible) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Cloud-agnostic deployment, air-gapped environments. + +--- + +### 5. **Orchestrator/Scheduler Adapter** ⚠️ Medium Value + +If DataDr needs to trigger or read from existing pipelines: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ORCHESTRATOR ADAPTER LAYER │ +│ │ +│ OrchestratorAdapter (Protocol) │ +│ ├── list_jobs() -> list[Job] │ +│ ├── get_job_runs(job_id) -> list[Run] │ +│ ├── get_lineage(dataset) -> LineageGraph │ +│ └── trigger_job(job_id) -> Run │ +│ │ +│ Implementations: │ +│ ├── AirflowAdapter (REST API) │ +│ ├── DagsterAdapter (GraphQL) │ +│ ├── PrefectAdapter (REST API) │ +│ ├── DbtCloudAdapter (REST API) │ +│ └── TemporalAdapter (gRPC) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Links DataDr to existing data pipelines. Enables "who broke this table?" answers. + +--- + +### 6. **Lineage/Catalog Adapter** ⚠️ Medium Value + +Read lineage from existing catalogs rather than inferring: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ CATALOG ADAPTER LAYER │ +│ │ +│ CatalogAdapter (Protocol) │ +│ ├── get_dataset(urn: str) -> Dataset │ +│ ├── get_lineage(urn: str) -> LineageGraph │ +│ ├── get_owners(urn: str) -> list[Owner] │ +│ ├── search(query: str) -> list[Dataset] │ +│ └── get_quality_rules(urn: str) -> list[Rule] │ +│ │ +│ Implementations: │ +│ ├── DataHubAdapter (GraphQL) │ +│ ├── OpenMetadataAdapter (REST) │ +│ ├── AtlanAdapter (REST) │ +│ ├── AlationAdapter (REST) │ +│ └── OpenLineageAdapter (Marquez backend) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** Don't reinvent lineage - read from existing investments. + +--- + +### 7. **Auth Provider Adapter** ⚠️ Medium Value + +You probably have auth, but enterprise wants: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ AUTH ADAPTER LAYER │ +│ │ +│ AuthAdapter (Protocol) │ +│ ├── authenticate(credentials) -> User │ +│ ├── validate_token(token) -> User │ +│ ├── get_user_groups(user_id) -> list[Group] │ +│ └── refresh_token(token) -> Token │ +│ │ +│ Implementations: │ +│ ├── LocalAuthAdapter (Username/password - dev) │ +│ ├── OktaAdapter (SAML/OIDC) │ +│ ├── Auth0Adapter (OIDC) │ +│ ├── AzureADAdapter (Microsoft SSO) │ +│ ├── GoogleWorkspaceAdapter (Google SSO) │ +│ └── LDAPAdapter (Enterprise directory) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Why it matters:** SSO is mandatory for enterprise sales. + +--- + +### 8. **Audit Log Adapter** ⚠️ Lower Value (For Now) + +Where do audit logs go? + +``` +┌─────────────────────────────────────────────────────────────┐ +│ AUDIT LOG ADAPTER LAYER │ +│ │ +│ AuditLogAdapter (Protocol) │ +│ ├── log_event(event: AuditEvent) -> None │ +│ ├── query_events(filter: Filter) -> list[AuditEvent] │ +│ └── export_events(filter: Filter, format: str) -> bytes │ +│ │ +│ Implementations: │ +│ ├── PostgresAuditAdapter (Same DB - simple) │ +│ ├── ElasticsearchAdapter (Searchable logs) │ +│ ├── SplunkAdapter (Enterprise SIEM) │ +│ ├── DatadogAdapter (Observability platform) │ +│ └── S3AuditAdapter (Cheap archival) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Priority Ranking + +| Adapter | Priority | Reason | +|---------|----------|--------| +| **Data Source** | ✅ Already planned | Core functionality | +| **LLM Provider** | 🔴 High | Enterprise compliance, cost optimization | +| **Notification** | 🔴 High | Everyone needs alerts somewhere | +| **Secret Store** | 🔴 High | Security audit requirement | +| **Storage** | 🟡 Medium | Cloud flexibility | +| **Auth Provider** | 🟡 Medium | SSO for enterprise | +| **Orchestrator** | 🟡 Medium | Pipeline integration | +| **Catalog** | 🟡 Medium | Lineage integration | +| **Audit Log** | 🟢 Lower | Can start simple | + +--- + +## Suggested Implementation Order + +**Phase 1: Core (Now)** +- Data Source Adapter ← You're doing this + +**Phase 2: Enterprise Essentials (Next)** +- LLM Provider Adapter +- Secret Store Adapter +- Notification Adapter + +**Phase 3: Enterprise Nice-to-Have** +- Auth Provider Adapter (SSO) +- Storage Adapter + +**Phase 4: Integration Layer** +- Orchestrator Adapter +- Catalog Adapter + +--- + +## Unified Adapter Registry Pattern + +You could have a single registry pattern across all adapter types: + +```python +# backend/src/datadr/adapters/registry.py + +class AdapterRegistry: + """Universal adapter registry for all pluggable components.""" + + _registries: dict[str, dict[str, type]] = { + "datasource": {}, + "llm": {}, + "notification": {}, + "secret_store": {}, + "storage": {}, + "auth": {}, + } + + @classmethod + def register( + cls, + category: str, + type_name: str, + adapter_cls: type, + config_schema: dict, + ) -> None: + """Register an adapter implementation.""" + cls._registries[category][type_name] = adapter_cls + + @classmethod + def create(cls, category: str, type_name: str, config: dict) -> Any: + """Create an adapter instance.""" + adapter_cls = cls._registries[category].get(type_name) + if not adapter_cls: + raise ValueError(f"Unknown {category} adapter: {type_name}") + return adapter_cls(**config) + + @classmethod + def list_types(cls, category: str) -> list[str]: + """List registered types for a category.""" + return list(cls._registries[category].keys()) + + +# Usage +AdapterRegistry.register("datasource", "postgresql", PostgresAdapter, PG_CONFIG_SCHEMA) +AdapterRegistry.register("llm", "anthropic", AnthropicAdapter, ANTHROPIC_CONFIG_SCHEMA) +AdapterRegistry.register("notification", "slack", SlackAdapter, SLACK_CONFIG_SCHEMA) +``` + +This gives you a consistent pattern across the entire codebase. Adding a new integration is always: + +1. Create adapter class implementing the protocol +2. Register it with config schema +3. Frontend automatically gets the new option + +Want me to spec out any of these in detail? diff --git a/docs/prompts/backend_prompt.md b/docs/prompts/backend/backend_prompt.md similarity index 98% rename from docs/prompts/backend_prompt.md rename to docs/prompts/backend/backend_prompt.md index d792c1d2f..e92c0d538 100644 --- a/docs/prompts/backend_prompt.md +++ b/docs/prompts/backend/backend_prompt.md @@ -1,4 +1,4 @@ -# DataDr v2 Gap Analysis & Implementation Prompt +# Dataing v2 Gap Analysis & Implementation Prompt ## Executive Summary @@ -57,7 +57,7 @@ The current codebase implements a **solid MVP** with clean architecture. However ## Implementation Prompt for LLM -You are implementing features for DataDr v2, an autonomous data quality investigation system. The codebase follows hexagonal architecture with Python/FastAPI backend and React/TypeScript frontend. +You are implementing features for Dataing v2, an autonomous data quality investigation system. The codebase follows hexagonal architecture with Python/FastAPI backend and React/TypeScript frontend. ### Context diff --git a/docs/prompts/backend/context/database_schema_plan.md b/docs/prompts/backend/context/database_schema_plan.md new file mode 100644 index 000000000..b84a75a19 --- /dev/null +++ b/docs/prompts/backend/context/database_schema_plan.md @@ -0,0 +1,190 @@ +# Context Module Cleanup Plan (Pre-Launch) + +## Overview + +Since we're pre-launch, we can do a clean replacement of the old adapter layer with the new unified system. No backward compatibility needed. + +## What to DELETE + +### Files to Remove Completely + +``` +backend/src/dataing/adapters/ +├── db/ +│ ├── duckdb.py # DELETE - replaced by datasource/sql/duckdb.py +│ ├── postgres.py # DELETE - replaced by datasource/sql/postgres.py +│ ├── trino.py # DELETE - replaced by datasource/sql/trino.py +│ └── mock.py # DELETE - use registry-based mocking instead +│ +└── context/ + └── database_context.py # DELETE - replaced by AdapterRegistry +``` + +### Files to KEEP + +``` +backend/src/dataing/adapters/ +├── db/ +│ └── app_db.py # KEEP - this is the app's own metadata DB, not a data source adapter +│ +└── context/ + ├── schema_context.py # KEEP + UPDATE - LLM formatting logic + ├── engine.py # KEEP + UPDATE - investigation orchestration + ├── anomaly_context.py # KEEP - unique functionality + ├── correlation_context.py # KEEP - unique functionality + ├── lineage.py # KEEP - OpenLineage integration + └── query_context.py # KEEP - query tracking +``` + +## Required Updates + +### 1. Update `schema_context.py` + +Replace `SchemaContext` usage with `SchemaResponse`: + +```python +# Before +from dataing.core.domain_types import SchemaContext, TableSchema + +class SchemaContextBuilder: + def build(self, adapter) -> SchemaContext: + ... + +# After +from dataing.adapters.datasource.types import SchemaResponse + +class SchemaContextBuilder: + def build(self, adapter) -> SchemaResponse: + return await adapter.get_schema() + + def format_for_llm(self, schema: SchemaResponse) -> str: + """Format schema as markdown for LLM prompts.""" + lines = [] + for catalog in schema.catalogs: + for db_schema in catalog.schemas: + for table in db_schema.tables: + lines.append(f"## {table.native_path}") + lines.append("") + lines.append("| Column | Type | Nullable |") + lines.append("|--------|------|----------|") + for col in table.columns: + nullable = "Yes" if col.nullable else "No" + lines.append(f"| {col.name} | {col.data_type.value} | {nullable} |") + lines.append("") + return "\n".join(lines) +``` + +### 2. Update `engine.py` (ContextEngine) + +Replace `DatabaseContext` with `AdapterRegistry`: + +```python +# Before +from dataing.adapters.context.database_context import DatabaseContext + +class ContextEngine: + def __init__(self, database_context: DatabaseContext): + self._db_context = database_context + + async def get_adapter(self, tenant_id: str, ds_id: str): + return await self._db_context.resolve_adapter(tenant_id, ds_id) + +# After +from dataing.adapters.datasource import AdapterRegistry +from dataing.adapters.db.app_db import AppDatabase + +class ContextEngine: + def __init__(self, app_db: AppDatabase): + self._app_db = app_db + + async def get_adapter(self, tenant_id: str, ds_id: str): + # Get data source config from app_db + ds_config = await self._app_db.get_datasource(tenant_id, ds_id) + + # Create adapter from registry + adapter = AdapterRegistry.create(ds_config["type"], ds_config["config"]) + await adapter.connect() + return adapter +``` + +### 3. Update API Dependencies + +Update `entrypoints/api/deps.py`: + +```python +# Before +from dataing.adapters.context.database_context import DatabaseContext + +def get_database_context() -> DatabaseContext: + return DatabaseContext(app_db=get_app_db()) + +# After +from dataing.adapters.datasource import AdapterRegistry +from dataing.adapters.datasource.base import BaseAdapter + +async def get_adapter(source_type: str, config: dict) -> BaseAdapter: + adapter = AdapterRegistry.create(source_type, config) + await adapter.connect() + return adapter +``` + +## Implementation Steps + +### Step 1: Delete Old Adapters +```bash +rm backend/src/dataing/adapters/db/duckdb.py +rm backend/src/dataing/adapters/db/postgres.py +rm backend/src/dataing/adapters/db/trino.py +rm backend/src/dataing/adapters/db/mock.py +rm backend/src/dataing/adapters/context/database_context.py +``` + +### Step 2: Update schema_context.py +- Change imports to use `SchemaResponse` +- Update `format_for_llm()` to handle nested catalog/schema/table structure +- Remove any `SchemaContext` references + +### Step 3: Update engine.py +- Change imports to use `AdapterRegistry` +- Update adapter creation to use registry pattern +- Remove `DatabaseContext` dependency + +### Step 4: Update API Routes +- Update any routes that use `DatabaseContext` +- Use `AdapterRegistry` directly for adapter creation + +### Step 5: Update Tests +- Remove tests for deleted files +- Update tests for modified files +- All adapter tests should use `AdapterRegistry` + +### Step 6: Clean Up Imports +- Remove unused imports from `__init__.py` files +- Update type hints in `core/interfaces.py` if needed + +## File-by-File Checklist + +| File | Action | Notes | +|------|--------|-------| +| `db/duckdb.py` | DELETE | Replaced by `datasource/sql/duckdb.py` | +| `db/postgres.py` | DELETE | Replaced by `datasource/sql/postgres.py` | +| `db/trino.py` | DELETE | Replaced by `datasource/sql/trino.py` | +| `db/mock.py` | DELETE | Use mock adapters in tests | +| `db/app_db.py` | KEEP | App metadata DB | +| `context/database_context.py` | DELETE | Use `AdapterRegistry` | +| `context/schema_context.py` | UPDATE | Use `SchemaResponse` | +| `context/engine.py` | UPDATE | Use `AdapterRegistry` | +| `context/anomaly_context.py` | KEEP | No changes needed | +| `context/correlation_context.py` | KEEP | No changes needed | +| `context/lineage.py` | KEEP | No changes needed | +| `context/query_context.py` | KEEP | No changes needed | + +## Summary + +Since we're pre-launch: +1. **Delete** the old `adapters/db/` layer (except `app_db.py`) +2. **Delete** `database_context.py` - replaced by `AdapterRegistry` +3. **Update** `schema_context.py` and `engine.py` to use new types +4. **Keep** all other context modules (anomaly, correlation, lineage, query) + +The new `adapters/datasource/` layer is the single source of truth for all data source operations. diff --git a/docs/prompts/backend/lineage/github_unification.md b/docs/prompts/backend/lineage/github_unification.md new file mode 100644 index 000000000..33949650f --- /dev/null +++ b/docs/prompts/backend/lineage/github_unification.md @@ -0,0 +1,309 @@ +You've identified a real gap. The data ecosystem has fragmented solutions but no unified standard. + +## Current State of the Art + +| Tool | What It Links | Limitation | +|------|---------------|------------| +| **dbt** | SQL models → lineage → docs | Only covers dbt-managed transforms | +| **OpenLineage** | Job runs → datasets → inputs/outputs | No code linkage, just execution metadata | +| **DataHub** | Schemas + lineage + ownership | Code links are manual annotations | +| **Amundsen** | Discovery + ownership | No native code integration | +| **Marquez** | OpenLineage backend | Same limitations | +| **Great Expectations** | Data quality → docs | No lineage or code | + +**The gap:** None of them treat "the code that produced this table" as a first-class citizen. + +## Why This Is Hard + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ THE MISSING LINK PROBLEM │ +│ │ +│ GitHub ??? Warehouse │ +│ ┌─────────────┐ ┌─────────────┐ │ +│ │ │ │ │ │ +│ │ transforms/ │ │ orders │ │ +│ │ orders.sql │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─▶ │ (table) │ │ +│ │ │ How do we know │ │ │ +│ │ pipelines/ │ this relationship? │ users │ │ +│ │ etl.py │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─▶ │ (table) │ │ +│ │ │ │ │ │ +│ └─────────────┘ └─────────────┘ │ +│ │ +│ Problems: │ +│ 1. Table names in code don't match actual table names (aliases, envs) │ +│ 2. Dynamic SQL - table names constructed at runtime │ +│ 3. Multiple repos can write to same table │ +│ 4. Orchestrators (Airflow) add indirection │ +│ 5. No standard metadata format │ +│ │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +## Existing Standards (Partial Solutions) + +### 1. OpenLineage + +The closest thing to a standard. Captures lineage at **runtime**: + +```json +{ + "eventType": "COMPLETE", + "job": { + "namespace": "my-airflow", + "name": "etl_orders" + }, + "inputs": [{"namespace": "postgres", "name": "raw.events"}], + "outputs": [{"namespace": "snowflake", "name": "analytics.orders"}], + "run": { + "facets": { + "sourceCode": { + "sourceCodeLocation": "https://github.com/org/repo/blob/main/dags/etl.py" + } + } + } +} +``` + +**Limitation:** Only captures what ran, not what *could* run. And `sourceCodeLocation` is a facet, not a core field. + +### 2. dbt Manifest + +dbt's `manifest.json` has the richest code ↔ data linkage: + +```json +{ + "nodes": { + "model.project.orders": { + "database": "analytics", + "schema": "public", + "name": "orders", + "original_file_path": "models/orders.sql", + "depends_on": {"nodes": ["source.project.raw.events"]} + } + } +} +``` + +**Limitation:** Only works for dbt models. Doesn't cover Python, Spark, etc. + +### 3. DataHub URNs + +DataHub uses URNs to identify everything: + +``` +urn:li:dataset:(urn:li:dataPlatform:snowflake,analytics.orders,PROD) +urn:li:dataJob:(urn:li:dataFlow:(airflow,etl_pipeline,PROD),orders_task) +``` + +You can link a dataset to a GitHub file via custom properties, but it's not standardized. + +## What a Real Standard Would Look Like + +```yaml +# .datalink.yaml (proposed standard - doesn't exist yet) +version: "1.0" + +datasets: + - urn: "snowflake://analytics.public.orders" + + # Code that produces this dataset + producers: + - repo: "github.com/company/data-pipelines" + path: "transforms/orders.sql" + ref: "main" # or commit SHA + type: "dbt_model" + + # Code that consumes this dataset + consumers: + - repo: "github.com/company/analytics-api" + path: "src/queries/orders.py" + ref: "main" + type: "python" + + # Lineage + upstream: + - "snowflake://raw.public.events" + - "snowflake://raw.public.users" + + downstream: + - "snowflake://analytics.public.order_metrics" + + # Quality + quality: + - repo: "github.com/company/data-tests" + path: "tests/orders_test.yaml" + type: "great_expectations" +``` + +## Practical Approaches Today + +### Option A: Convention-Based (Simplest) + +Enforce naming conventions that make linkage obvious: + +``` +repo: data-warehouse/ +├── models/ +│ └── analytics/ +│ └── orders.sql → analytics.orders (table) +│ └── users.sql → analytics.users (table) +``` + +Table name = file path. No metadata needed. + +**dbt does this.** That's why it works. + +### Option B: Annotations in Code + +Embed metadata in SQL comments or Python docstrings: + +```sql +-- @dataset: snowflake://analytics.public.orders +-- @upstream: snowflake://raw.public.events, snowflake://raw.public.users +-- @owner: data-team@company.com +-- @repo: github.com/company/pipelines/blob/main/transforms/orders.sql + +SELECT ... +``` + +Parse these annotations at build/deploy time. + +### Option C: Sidecar Metadata Files + +Each SQL/Python file has a companion `.meta.yaml`: + +``` +transforms/ +├── orders.sql +├── orders.meta.yaml ← Metadata sidecar +├── users.sql +└── users.meta.yaml +``` + +```yaml +# orders.meta.yaml +dataset: snowflake://analytics.public.orders +upstream: + - snowflake://raw.public.events +repo_path: transforms/orders.sql +tests: tests/orders_test.yaml +``` + +### Option D: Centralized Registry + +Single `catalog.yaml` at repo root: + +```yaml +# catalog.yaml +datasets: + analytics.orders: + source: transforms/orders.sql + upstream: [raw.events, raw.users] + tests: tests/orders.yaml + + analytics.users: + source: transforms/users.sql + upstream: [raw.users] +``` + +**OpenMetadata and DataHub** support importing from files like this. + +## What I'd Recommend for DataDr + +Since you're building an investigation tool, you could **become the linker**: + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ DATADR AS THE LINK │ +│ │ +│ When user investigates a table: │ +│ │ +│ 1. Query schema from warehouse (you already do this) │ +│ │ +│ 2. Search for code references: │ +│ - GitHub API: search for table name in connected repos │ +│ - Parse dbt manifest.json if available │ +│ - Parse Airflow DAGs if available │ +│ │ +│ 3. Query lineage: │ +│ - OpenLineage/Marquez if available │ +│ - dbt manifest if available │ +│ - Static analysis of SQL if nothing else │ +│ │ +│ 4. Present unified view: │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ orders (Snowflake) │ │ +│ │ │ │ +│ │ Code: github.com/co/pipelines/transforms/orders.sql │ │ +│ │ Lineage: raw.events → orders → order_metrics │ │ +│ │ Tests: github.com/co/pipelines/tests/orders_test.yaml │ │ +│ │ Last Run: 2024-01-15 10:30 (Airflow: etl_orders) │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +### Implementation Sketch + +```python +class CodeLinker: + """Links datasets to their source code.""" + + async def find_code_references( + self, + dataset: str, # e.g., "analytics.orders" + ) -> list[CodeReference]: + """ + Search for code that references this dataset. + + Strategies (in order of reliability): + 1. dbt manifest - authoritative for dbt models + 2. Explicit annotations - if you define a standard + 3. GitHub code search - fuzzy but catches everything + 4. Static SQL analysis - parse CREATE TABLE statements + """ + references = [] + + # Strategy 1: dbt manifest + if self._dbt_manifest: + ref = self._search_dbt_manifest(dataset) + if ref: + references.append(ref) + + # Strategy 2: GitHub search + github_refs = await self._search_github(dataset) + references.extend(github_refs) + + return self._deduplicate_and_rank(references) + + async def _search_github(self, dataset: str) -> list[CodeReference]: + """Search GitHub for references to this dataset.""" + # GitHub code search API + query = f'"{dataset}" extension:sql extension:py' + results = await self._github.search_code(query) + + return [ + CodeReference( + repo=r.repository.full_name, + path=r.path, + url=r.html_url, + snippet=r.text_matches[0].fragment if r.text_matches else None, + confidence=0.7, # Lower than dbt manifest + ) + for r in results + ] +``` + +## The Opportunity + +There's no "ODBC for data lineage + code." The company that builds this becomes essential infrastructure. + +**What would need to exist:** + +1. **Standard URN format** for datasets (OpenLineage is close) +2. **Standard annotation format** for embedding in code +3. **Standard API** for querying lineage + code links +4. **Connectors** for dbt, Airflow, Dagster, Spark, etc. + +DataHub and OpenLineage are trying, but neither has won. If DataDr could automatically surface "here's the code that built this broken table" during an investigation, that's a killer feature nobody else has. diff --git a/docs/prompts/demo_prompt.md b/docs/prompts/demo_prompt.md index 19ef1bbf1..10b1cd93a 100644 --- a/docs/prompts/demo_prompt.md +++ b/docs/prompts/demo_prompt.md @@ -1,10 +1,10 @@ -# DataDr Demo Fixtures: Technical Specification +# Dataing Demo Fixtures: Technical Specification ## Implementation Directive **TIMELINE:** 2-3 days maximum. This is a demo, not a product. Resist the urge to overengineer. -**GOAL:** Generate realistic e-commerce data with pre-baked anomalies that DataDr can detect, producing compelling "aha moments" in demos. +**GOAL:** Generate realistic e-commerce data with pre-baked anomalies that Dataing can detect, producing compelling "aha moments" in demos. **OUTPUT:** Parquet files loadable into DuckDB/PostgreSQL/Trino that look like real production data. @@ -221,7 +221,7 @@ Hourly Distribution (UTC): ## 3. Anomaly Scenarios -Each scenario is a separate fixture set. DataDr should detect each anomaly type. +Each scenario is a separate fixture set. Dataing should detect each anomaly type. ### 3.1 Scenario: NULL Spike @@ -752,21 +752,21 @@ ORDER BY 1; ```bash # Create schema -psql -d datadr_demo -f schema.sql +psql -d dataing_demo -f schema.sql # Load data (requires parquet_fdw or convert to CSV first) # Option 1: Use DuckDB to convert duckdb -c "COPY (SELECT * FROM 'fixtures/null_spike/orders.parquet') TO 'orders.csv' (HEADER, DELIMITER ',');" # Option 2: Use pgloader -pgloader parquet://fixtures/null_spike/orders.parquet postgresql:///datadr_demo +pgloader parquet://fixtures/null_spike/orders.parquet postgresql:///dataing_demo ``` ### 6.3 Trino (For Production-Like Demo) ```sql -- Create external tables pointing to parquet files -CREATE SCHEMA IF NOT EXISTS demo WITH (location = 's3://datadr-demo-fixtures/'); +CREATE SCHEMA IF NOT EXISTS demo WITH (location = 's3://dataing-demo-fixtures/'); CREATE TABLE demo.orders ( order_id VARCHAR, @@ -776,7 +776,7 @@ CREATE TABLE demo.orders ( -- ... ) WITH ( - external_location = 's3://datadr-demo-fixtures/null_spike/orders/', + external_location = 's3://dataing-demo-fixtures/null_spike/orders/', format = 'PARQUET' ); ``` @@ -884,8 +884,8 @@ ORDER BY 1, 2; - [ ] Create manifest.json generator - [ ] Write DuckDB loading script - [ ] Write PostgreSQL loading script (if needed) -- [ ] Run DataDr against each fixture, verify detection -- [ ] Document demo script ("At this point, DataDr will detect...") +- [ ] Run Dataing against each fixture, verify detection +- [ ] Document demo script ("At this point, Dataing will detect...") - [ ] Create demo video script/storyboard --- @@ -898,21 +898,21 @@ ORDER BY 1, 2; ### Slide 2: "Live Demo" -> "Let me show you DataDr detecting a real issue in this e-commerce dataset." +> "Let me show you Dataing detecting a real issue in this e-commerce dataset." -**Action:** Load `null_spike` fixture, run DataDr investigation. +**Action:** Load `null_spike` fixture, run Dataing investigation. ### Slide 3: "Detection" -> "DataDr identified a NULL spike in `orders.user_id` that started on January 10th. 41% of orders are affected." +> "Dataing identified a NULL spike in `orders.user_id` that started on January 10th. 41% of orders are affected." -**Show:** DataDr UI with detection result. +**Show:** Dataing UI with detection result. ### Slide 4: "Root Cause" > "It correlated this with mobile traffic and identified the likely cause: the mobile app checkout flow isn't passing user context." -**Show:** DataDr root cause analysis. +**Show:** Dataing root cause analysis. ### Slide 5: "Impact" @@ -922,7 +922,7 @@ ORDER BY 1, 2; ### Slide 6: "Call to Action" -> "DataDr would have caught this in minutes, not days. Want to see it on your data?" +> "Dataing would have caught this in minutes, not days. Want to see it on your data?" --- @@ -931,7 +931,7 @@ ORDER BY 1, 2; ```toml # pyproject.toml [project] -name = "datadr-fixtures" +name = "dataing-fixtures" version = "0.1.0" dependencies = [ "polars>=0.20.0", # Fast DataFrame operations @@ -950,7 +950,7 @@ dev = [ ## Final Notes -**Remember:** This is a demo, not a product. The goal is to show DataDr detecting problems in realistic-looking data. Don't spend time on: +**Remember:** This is a demo, not a product. The goal is to show Dataing detecting problems in realistic-looking data. Don't spend time on: - Perfect statistical distributions - Edge cases that won't appear in demos @@ -966,7 +966,7 @@ dev = [ Yes, `./demo` at the root is perfect. Clean and obvious. ``` -datadr/ +dataing/ ├── backend/ ├── frontend/ ├── demo/ @@ -1007,7 +1007,7 @@ cd demo && python generate.py && cd ../backend && make run # Makefile (root) demo: cd demo && uv run generate.py - cd backend && uv run python -m datadr.cli investigate --source duckdb://demo/fixtures/null_spike + cd backend && uv run python -m dataing.cli investigate --source duckdb://demo/fixtures/null_spike ``` This keeps the demo self-contained without polluting your core codebase. When you're ready to ship, you can even exclude `demo/` from the production Docker image. diff --git a/docs/prompts/demo_prompt_2.md b/docs/prompts/demo_prompt_2.md index aa0d8beb0..2849d27c4 100644 --- a/docs/prompts/demo_prompt_2.md +++ b/docs/prompts/demo_prompt_2.md @@ -1,4 +1,4 @@ -# DataDr Demo Integration: Technical Specification +# Dataing Demo Integration: Technical Specification ## Acceptance Criteria @@ -150,10 +150,10 @@ DuckDBConfig: ### 2.4 Implementation Skeleton ```python -# Location: backend/src/datadr/adapters/connectors/duckdb_connector.py +# Location: backend/src/dataing/adapters/connectors/duckdb_connector.py """ -DuckDB Connector for DataDr. +DuckDB Connector for Dataing. Supports two modes: 1. Parquet directory: Auto-registers all .parquet files as views @@ -211,7 +211,7 @@ Always read-only for safety. Add DuckDB to the connector registry: ```python -# Location: backend/src/datadr/adapters/connectors/__init__.py +# Location: backend/src/dataing/adapters/connectors/__init__.py CONNECTOR_REGISTRY = { "postgresql": PostgresConnector, @@ -238,12 +238,12 @@ On demo startup, we need to: ### 3.2 Seed Data Structure ```python -# Location: backend/src/datadr/demo/seed.py +# Location: backend/src/dataing/demo/seed.py """ Demo seed data. -Run with: python -m datadr.demo.seed +Run with: python -m dataing.demo.seed Or automatically on startup when DATADR_DEMO_MODE=true """ @@ -380,8 +380,8 @@ demo-fixtures: # Start backend in demo mode demo-backend: DATADR_DEMO_MODE=true \ - DATADR_DB_URL=postgresql://localhost:5432/datadr_demo \ - cd backend && uv run python -m datadr.main & + DATADR_DB_URL=postgresql://localhost:5432/dataing_demo \ + cd backend && uv run python -m dataing.main & # Start frontend demo-frontend: @@ -390,7 +390,7 @@ demo-frontend: # Clean demo data demo-clean: rm -rf demo/fixtures/*/ - dropdb datadr_demo --if-exists + dropdb dataing_demo --if-exists ``` ### 4.2 Docker Compose (Alternative) @@ -404,15 +404,15 @@ services: postgres: image: postgres:16-alpine environment: - POSTGRES_DB: datadr_demo - POSTGRES_USER: datadr - POSTGRES_PASSWORD: datadr + POSTGRES_DB: dataing_demo + POSTGRES_USER: dataing + POSTGRES_PASSWORD: dataing volumes: - demo-pgdata:/var/lib/postgresql/data ports: - "5432:5432" healthcheck: - test: ["CMD-SHELL", "pg_isready -U datadr -d datadr_demo"] + test: ["CMD-SHELL", "pg_isready -U dataing -d dataing_demo"] interval: 5s timeout: 5s retries: 5 @@ -423,7 +423,7 @@ services: dockerfile: Dockerfile environment: DATADR_DEMO_MODE: "true" - DATADR_DB_URL: postgresql://datadr:datadr@postgres:5432/datadr_demo + DATADR_DB_URL: postgresql://dataing:dataing@postgres:5432/dataing_demo DATADR_FIXTURE_PATH: /app/fixtures volumes: - ./demo/fixtures:/app/fixtures:ro @@ -461,10 +461,10 @@ volumes: ### 4.3 Backend Demo Mode ```python -# Location: backend/src/datadr/main.py +# Location: backend/src/dataing/main.py import os -from datadr.demo.seed import seed_demo_data +from dataing.demo.seed import seed_demo_data async def lifespan(app: FastAPI): """Application lifespan handler.""" @@ -568,7 +568,7 @@ make demo **[0:00] Introduction** -> "Let me show you how DataDr works. I've got a sample e-commerce dataset here - 7 days of orders, events, user data." +> "Let me show you how Dataing works. I've got a sample e-commerce dataset here - 7 days of orders, events, user data." **[0:30] Show Data Source** @@ -584,13 +584,13 @@ make demo **[1:30] While Running** -> "DataDr is now analyzing the orders table. It's looking at schema, data distributions, temporal patterns, checking for anomalies." +> "Dataing is now analyzing the orders table. It's looking at schema, data distributions, temporal patterns, checking for anomalies." *Show the investigation progress indicator* **[2:00] Results** -> "Here we go. DataDr found a NULL spike in the user_id column. 41% of orders from January 10th to 12th have no user ID." +> "Here we go. Dataing found a NULL spike in the user_id column. 41% of orders from January 10th to 12th have no user ID." *Show results page with:* - *Pattern type: NULL_SPIKE* @@ -605,7 +605,7 @@ make demo **[3:00] Impact** -> "Without DataDr, you'd find this when your marketing attribution dashboard shows weird numbers in 3 days. With DataDr, you'd catch it in minutes." +> "Without Dataing, you'd find this when your marketing attribution dashboard shows weird numbers in 3 days. With Dataing, you'd catch it in minutes." **[3:30] Close** @@ -616,10 +616,10 @@ make demo ## 7. Directory Structure ``` -datadr/ +dataing/ ├── backend/ │ └── src/ -│ └── datadr/ +│ └── dataing/ │ ├── adapters/ │ │ └── connectors/ │ │ ├── __init__.py # Registry @@ -714,10 +714,10 @@ datadr/ ```bash # Check if seed ran -psql -d datadr_demo -c "SELECT * FROM datasources WHERE id LIKE 'demo%';" +psql -d dataing_demo -c "SELECT * FROM datasources WHERE id LIKE 'demo%';" # If empty, re-run seed -DATADR_DEMO_MODE=true python -m datadr.demo.seed +DATADR_DEMO_MODE=true python -m dataing.demo.seed ``` ### "Can't connect to DuckDB" @@ -787,7 +787,7 @@ Run this after implementation to verify demo works: #!/bin/bash set -e -echo "=== DataDr Demo Acceptance Test ===" +echo "=== Dataing Demo Acceptance Test ===" # 1. Clean slate echo "[1/7] Cleaning previous state..." diff --git a/docs/prompts/ui/dark_mode_prompt.md b/docs/prompts/ui/dark_mode_prompt.md index e6c9e53ad..f1a443bc5 100644 --- a/docs/prompts/ui/dark_mode_prompt.md +++ b/docs/prompts/ui/dark_mode_prompt.md @@ -1,4 +1,4 @@ -Based on the shadcn/ui documentation, here's how to add dark mode to DataDr's Vite + React frontend: +Based on the shadcn/ui documentation, here's how to add dark mode to Dataing's Vite + React frontend: Dark Mode Implementation for Vite Step 1: Create Theme Provider File: frontend/src/components/theme-provider.tsx @@ -27,7 +27,7 @@ const ThemeProviderContext = createContext(initialState) export function ThemeProvider({ children, defaultTheme = "system", - storageKey = "datadr-ui-theme", + storageKey = "dataing-ui-theme", ...props }: ThemeProviderProps) { const [theme, setTheme] = useState( @@ -133,7 +133,7 @@ ReactDOM.createRoot(document.getElementById('root')!).render( - + @@ -144,7 +144,7 @@ ReactDOM.createRoot(document.getElementById('root')!).render( , ) Step 4: Add Toggle to Header/Sidebar -Option A: In the header (recommended for DataDr) +Option A: In the header (recommended for Dataing) File: frontend/src/App.tsx (update the header section) typescriptimport { ModeToggle } from '@/components/mode-toggle' import { Separator } from '@/components/ui/separator' diff --git a/docs/prompts/ui/frontend_prompt.md b/docs/prompts/ui/frontend_prompt.md index a9a2a2ea9..27f04d79c 100644 --- a/docs/prompts/ui/frontend_prompt.md +++ b/docs/prompts/ui/frontend_prompt.md @@ -1,9 +1,9 @@ ## Frontend Implementation Details -# DataDr v2 Frontend Implementation Guide +# Dataing v2 Frontend Implementation Guide ## Using shadcn/ui (Manual Installation) -This document provides a complete implementation prompt for building the DataDr v2 frontend using shadcn/ui components with manual installation, following best practices for React + Vite + TypeScript. +This document provides a complete implementation prompt for building the Dataing v2 frontend using shadcn/ui components with manual installation, following best practices for React + Vite + TypeScript. --- @@ -774,7 +774,7 @@ export function AppSidebar() {
- DataDr + Dataing {tenant?.name ?? 'Data Quality'} @@ -2084,7 +2084,7 @@ export function DataSourceForm({ open, onOpenChange }: DataSourceFormProps) { onChange={(e) => setFormData({ ...formData, username: e.target.value }) } - placeholder="datadr" + placeholder="dataing" required />
diff --git a/docs/roadmap/communal-learning.md b/docs/roadmap/communal-learning.md index 359b457a6..78d9dbc43 100644 --- a/docs/roadmap/communal-learning.md +++ b/docs/roadmap/communal-learning.md @@ -1,4 +1,4 @@ -DataDr Privacy Layer: Technical SpecificationImplementation Directive for Engineering TeamCRITICAL INSTRUCTION: This specification must be implemented in its entirety. Every component, interface, and security measure described herein is required for production deployment. Do not skip sections. Do not implement partial solutions. Do not defer security measures to "later." The privacy guarantees we advertise to customers depend on complete implementation of this architecture.This document covers: +Dataing Privacy Layer: Technical SpecificationImplementation Directive for Engineering TeamCRITICAL INSTRUCTION: This specification must be implemented in its entirety. Every component, interface, and security measure described herein is required for production deployment. Do not skip sections. Do not implement partial solutions. Do not defer security measures to "later." The privacy guarantees we advertise to customers depend on complete implementation of this architecture.This document covers: Customer-side data collection agent Privacy-preserving transformation layer @@ -24,7 +24,7 @@ Deployment Configuration │ │ │ ┌──────────────┐ ┌──────────────────┐ ┌────────────────────────────┐ │ │ │ │ │ │ │ │ │ -│ │ Customer's │───▶│ DataDr Agent │───▶│ Local Privacy Transform │ │ +│ │ Customer's │───▶│ Dataing Agent │───▶│ Local Privacy Transform │ │ │ │ Data │ │ (Collector) │ │ (DP + ZKP Generation) │ │ │ │ Warehouse │ │ │ │ │ │ │ └──────────────┘ └──────────────────┘ └─────────────┬──────────────┘ │ @@ -73,7 +73,7 @@ Deployment Configuration │ │ │ │ │ └───────────────────────────────────────────────────────────────────────┘ │ │ │ -└──────────────────────────────────────────────────────────────────────────────┘1.2 Data Flow SummaryStageLocationWhat ExistsPrivacy Mechanism1. CollectionCustomerRaw query logs, schemas, metricsNone yet (customer's data)2. Local TransformCustomerAggregated patterns, statisticsLocal DP applied3. ZKP GenerationCustomerValidity proofsCryptographic hiding4. TransmissionNetworkEncrypted packagemTLS, certificate pinning5. StorageDataDrEncrypted contributionsAES-256-GCM, tenant isolation6. AggregationDataDrCross-tenant aggregatesSecure aggregation protocol7. ComputationDataDrGlobal patternsDP with formal (ε,δ) guarantees1.3 Privacy GuaranteesThis system provides the following formally provable guarantees: +└──────────────────────────────────────────────────────────────────────────────┘1.2 Data Flow SummaryStageLocationWhat ExistsPrivacy Mechanism1. CollectionCustomerRaw query logs, schemas, metricsNone yet (customer's data)2. Local TransformCustomerAggregated patterns, statisticsLocal DP applied3. ZKP GenerationCustomerValidity proofsCryptographic hiding4. TransmissionNetworkEncrypted packagemTLS, certificate pinning5. StorageDataingEncrypted contributionsAES-256-GCM, tenant isolation6. AggregationDataingCross-tenant aggregatesSecure aggregation protocol7. ComputationDataingGlobal patternsDP with formal (ε,δ) guarantees1.3 Privacy GuaranteesThis system provides the following formally provable guarantees: Differential Privacy (ε,δ)-guarantee: For ε=1.0, δ=1e-8, the probability of any inference about a single record changes by at most e^ε ≈ 2.718x whether that record is included or not. Zero-Knowledge: The ZKP system reveals nothing about the underlying data beyond the validity of the contribution. @@ -81,8 +81,8 @@ Zero-Knowledge: The ZKP system reveals nothing about the underlying data beyond Forward Secrecy: Compromise of current keys does not compromise historical data. Tenant Isolation: One customer's data cannot leak to another customer even with server compromise. -2. Customer-Side Agent2.1 Agent ArchitectureThe DataDr Agent runs within the customer's infrastructure. It MUST be deployed as a containerized service with minimal privileges.┌─────────────────────────────────────────────────────────────┐ -│ DataDr Agent Container │ +2. Customer-Side Agent2.1 Agent ArchitectureThe Dataing Agent runs within the customer's infrastructure. It MUST be deployed as a containerized service with minimal privileges.┌─────────────────────────────────────────────────────────────┐ +│ Dataing Agent Container │ │ │ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────┐ │ │ │ │ │ │ │ │ │ @@ -108,7 +108,7 @@ Snowflake BigQuery Redshift Databricks -CRITICAL: Connectors MUST be read-only. The agent MUST NOT have write access to customer data.2.2.1 Connector Interfacepython# datadr_agent/connectors/base.py +CRITICAL: Connectors MUST be read-only. The agent MUST NOT have write access to customer data.2.2.1 Connector Interfacepython# dataing_agent/connectors/base.py from abc import ABC, abstractmethod from dataclasses import dataclass @@ -207,7 +207,7 @@ class BaseConnector(ABC): This is the PRIMARY data collection mechanism. Query logs contain metadata about queries, not the data itself. """ - ...2.2.2 PostgreSQL Connector Implementationpython# datadr_agent/connectors/postgres.py + ...2.2.2 PostgreSQL Connector Implementationpython# dataing_agent/connectors/postgres.py import asyncpg from datetime import datetime @@ -260,12 +260,12 @@ class PostgresConnector(BaseConnector): # This query should fail if we have write access (good!) try: await conn.execute( - "CREATE TEMP TABLE _datadr_write_test (id int)" + "CREATE TEMP TABLE _dataing_write_test (id int)" ) - await conn.execute("DROP TABLE _datadr_write_test") + await conn.execute("DROP TABLE _dataing_write_test") # If we get here, we have write access - this is a configuration error raise SecurityError( - "DataDr agent has write access to database. " + "Dataing agent has write access to database. " "Please configure read-only credentials." ) except asyncpg.InsufficientPrivilegeError: @@ -342,7 +342,7 @@ class PostgresConnector(BaseConnector): return tuple(sorted(tables)) except Exception: # If parsing fails, return empty (don't crash) - return ()2.3 Pattern ExtractorThe Pattern Extractor processes query logs and schema metadata to identify data quality patterns.python# datadr_agent/extraction/pattern_extractor.py + return ()2.3 Pattern ExtractorThe Pattern Extractor processes query logs and schema metadata to identify data quality patterns.python# dataing_agent/extraction/pattern_extractor.py from dataclasses import dataclass from enum import Enum @@ -552,7 +552,7 @@ class PatternExtractor: elif count < 1000000000: return "large" else: - return "huge"3. Local Privacy Transformation3.1 Differential Privacy EngineThe DP Engine applies differential privacy to extracted patterns BEFORE they leave the customer's infrastructure.python# datadr_agent/privacy/dp_engine.py + return "huge"3. Local Privacy Transformation3.1 Differential Privacy EngineThe DP Engine applies differential privacy to extracted patterns BEFORE they leave the customer's infrastructure.python# dataing_agent/privacy/dp_engine.py from dataclasses import dataclass from typing import TypeVar, Generic @@ -584,7 +584,7 @@ class LocalDPEngine: "Local" means noise is added on the customer's machine, before data is transmitted. This provides the strongest - privacy guarantee - even DataDr cannot see true values. + privacy guarantee - even Dataing cannot see true values. We use the Laplace mechanism for numeric values and randomized response for categorical values. @@ -695,7 +695,7 @@ class PrivatizedPattern: """ A pattern after differential privacy has been applied. - This is safe to transmit to DataDr servers. + This is safe to transmit to Dataing servers. """ pattern_type: PatternType schema_fingerprint: str @@ -706,7 +706,7 @@ class PrivatizedPattern: # Privacy accounting epsilon_spent: float - delta: float3.2 Privacy Budget ManagerTracks privacy budget consumption to prevent exceeding guarantees.python# datadr_agent/privacy/budget_manager.py + delta: float3.2 Privacy Budget ManagerTracks privacy budget consumption to prevent exceeding guarantees.python# dataing_agent/privacy/budget_manager.py from dataclasses import dataclass, field from datetime import datetime, timedelta @@ -741,7 +741,7 @@ class PrivacyBudgetManager: epsilon_per_period: float = 1.0, delta_per_period: float = 1e-8, period_duration: timedelta = timedelta(days=7), - state_path: Path = Path("/var/lib/datadr/privacy_budget.json"), + state_path: Path = Path("/var/lib/dataing/privacy_budget.json"), ): self._epsilon_per_period = epsilon_per_period self._delta_per_period = delta_per_period @@ -874,7 +874,7 @@ class PrivacyBudgetManager: } with open(self._state_path, "w") as f: - json.dump(data, f, indent=2)4. Secure Transmission Layer4.1 Transport SecurityAll communication between customer agents and DataDr servers uses mTLS with certificate pinning.python# datadr_agent/transport/secure_client.py + json.dump(data, f, indent=2)4. Secure Transmission Layer4.1 Transport SecurityAll communication between customer agents and Dataing servers uses mTLS with certificate pinning.python# dataing_agent/transport/secure_client.py import ssl import aiohttp @@ -890,15 +890,15 @@ class TransportConfig: All certificates should be rotated at least annually. """ - # DataDr server endpoint - server_url: str = "https://ingest.datadr.io" + # Dataing server endpoint + server_url: str = "https://ingest.dataing.io" # Client certificate (for mTLS) - client_cert_path: Path = Path("/etc/datadr/client.crt") - client_key_path: Path = Path("/etc/datadr/client.key") + client_cert_path: Path = Path("/etc/dataing/client.crt") + client_key_path: Path = Path("/etc/dataing/client.key") # Server certificate pinning - # This is the SHA-256 fingerprint of DataDr's server certificate + # This is the SHA-256 fingerprint of Dataing's server certificate # If this doesn't match, connection is refused (prevents MITM) server_cert_fingerprint: str = "sha256//AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" @@ -912,7 +912,7 @@ class TransportConfig: class SecureTransportClient: """ - Secure HTTP client for transmitting privatized data to DataDr. + Secure HTTP client for transmitting privatized data to Dataing. Security features: - mTLS (mutual TLS) - both client and server authenticate @@ -968,7 +968,7 @@ class SecureTransportClient: contribution: "ContributionPackage", ) -> "SubmissionReceipt": """ - Submit a privatized contribution to DataDr. + Submit a privatized contribution to Dataing. The contribution is encrypted before transmission (defense in depth - TLS + application-layer encryption). @@ -981,9 +981,9 @@ class SecureTransportClient: headers = { "Content-Type": "application/octet-stream", - "X-DataDr-Tenant": self._tenant_id, - "X-DataDr-Signature": signature, - "X-DataDr-Algorithm": "Ed25519", + "X-Dataing-Tenant": self._tenant_id, + "X-Dataing-Signature": signature, + "X-Dataing-Algorithm": "Ed25519", } # Submit with retries @@ -1023,11 +1023,11 @@ class SecureTransportClient: contribution: "ContributionPackage" ) -> bytes: """ - Encrypt contribution using DataDr's public key. + Encrypt contribution using Dataing's public key. Uses hybrid encryption: - Generate ephemeral X25519 key pair - - Derive shared secret with DataDr's public key + - Derive shared secret with Dataing's public key - Encrypt payload with AES-256-GCM using derived key - Send ephemeral public key + ciphertext """ @@ -1037,7 +1037,7 @@ class SecureTransportClient: from cryptography.hazmat.primitives.kdf.hkdf import HKDF import os - # Load DataDr's public key (embedded in agent) + # Load Dataing's public key (embedded in agent) server_public_key = self._load_server_public_key() # Generate ephemeral key pair @@ -1052,7 +1052,7 @@ class SecureTransportClient: algorithm=hashes.SHA256(), length=32, salt=None, - info=b"datadr-contribution-encryption", + info=b"dataing-contribution-encryption", ) encryption_key = kdf.derive(shared_secret) @@ -1084,7 +1084,7 @@ class SecureTransportClient: # Sign the payload signature = signing_key.sign(payload) - return base64.b64encode(signature).decode()4.2 Contribution Package Formatpython# datadr_agent/transport/contribution.py + return base64.b64encode(signature).decode()4.2 Contribution Package Formatpython# dataing_agent/transport/contribution.py from dataclasses import dataclass from datetime import datetime @@ -1149,7 +1149,7 @@ class ContributionPackage: "upstream_correlations": pattern.upstream_correlations, "epsilon_spent": pattern.epsilon_spent, "delta": pattern.delta, - }5. Server-Side Secure Storage5.1 Storage ArchitectureDataDr stores contributions in an encrypted, tenant-isolated storage layer.┌─────────────────────────────────────────────────────────────────────────────┐ + }5. Server-Side Secure Storage5.1 Storage ArchitectureDataing stores contributions in an encrypted, tenant-isolated storage layer.┌─────────────────────────────────────────────────────────────────────────────┐ │ Storage Architecture │ │ │ │ ┌───────────────────────────────────────────────────────────────────────┐ │ @@ -1194,7 +1194,7 @@ class ContributionPackage: │ │ └─────────────────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ -└──────────────────────────────────────────────────────────────────────────────┘5.2 Storage Service Implementationpython# datadr_server/storage/contribution_store.py +└──────────────────────────────────────────────────────────────────────────────┘5.2 Storage Service Implementationpython# dataing_server/storage/contribution_store.py from dataclasses import dataclass from datetime import datetime, date @@ -1207,9 +1207,9 @@ import os @dataclass(frozen=True) class StorageConfig: """Storage configuration.""" - bucket_prefix: str = "datadr-contributions" + bucket_prefix: str = "dataing-contributions" region: str = "us-east-1" - kms_key_alias: str = "alias/datadr-master" + kms_key_alias: str = "alias/dataing-master" class ContributionStore: """ @@ -1261,10 +1261,10 @@ class ContributionStore: Key=s3_key, Body=nonce + ciphertext, Metadata={ - "x-datadr-tenant": tenant_id, - "x-datadr-contribution-id": contribution_id, - "x-datadr-created-at": contribution.created_at.isoformat(), - "x-datadr-epsilon": str(contribution.total_epsilon_spent), + "x-dataing-tenant": tenant_id, + "x-dataing-contribution-id": contribution_id, + "x-dataing-created-at": contribution.created_at.isoformat(), + "x-dataing-epsilon": str(contribution.total_epsilon_spent), }, ServerSideEncryption="aws:kms", SSEKMSKeyId=self._config.kms_key_alias, @@ -1420,7 +1420,7 @@ class ContributionStore: ) -> None: """Log access for audit trail.""" # Implementation depends on audit system (CloudWatch, Splunk, etc.) - pass5.3 Tenant Isolation Verificationpython# datadr_server/storage/isolation_verifier.py + pass5.3 Tenant Isolation Verificationpython# dataing_server/storage/isolation_verifier.py class TenantIsolationVerifier: """ @@ -1448,7 +1448,7 @@ class TenantIsolationVerifier: results = [] for tenant_id in await self._list_tenants(): - bucket_name = f"datadr-contributions-{tenant_id}" + bucket_name = f"dataing-contributions-{tenant_id}" # Check bucket policy policy = await self._get_bucket_policy(bucket_name) @@ -1485,7 +1485,7 @@ class TenantIsolationVerifier: - Privacy budget is tracked """ # Implementation - pass6. Differential Privacy Computation Engine6.1 Server-Side DP AggregatorThe server aggregates contributions from multiple tenants while maintaining DP guarantees.python# datadr_server/dp/aggregator.py + pass6. Differential Privacy Computation Engine6.1 Server-Side DP AggregatorThe server aggregates contributions from multiple tenants while maintaining DP guarantees.python# dataing_server/dp/aggregator.py from dataclasses import dataclass from typing import Sequence, Dict @@ -1685,7 +1685,7 @@ class AggregatedPatternStats: contributor_count: int mean_severity: float metric_means: Dict[str, float] - epsilon_spent: float6.2 Global Privacy Budget Accountingpython# datadr_server/dp/global_budget.py + epsilon_spent: float6.2 Global Privacy Budget Accountingpython# dataing_server/dp/global_budget.py from dataclasses import dataclass from datetime import datetime @@ -1775,10 +1775,10 @@ class GlobalPrivacyBudgetManager: return ( state.total_epsilon_budget - state.total_epsilon_spent, self._delta_per_period, # Delta doesn't compose the same way - )7. Zero-Knowledge Proof System7.1 ZKP Circuit DesignWe use zero-knowledge proofs to verify contribution validity without revealing the underlying data.python# datadr_agent/zkp/circuits.py + )7. Zero-Knowledge Proof System7.1 ZKP Circuit DesignWe use zero-knowledge proofs to verify contribution validity without revealing the underlying data.python# dataing_agent/zkp/circuits.py """ -Zero-Knowledge Proof Circuits for DataDr +Zero-Knowledge Proof Circuits for Dataing We use RISC Zero (https://risczero.com) for ZK proof generation. RISC Zero allows writing circuits in Rust that compile to a ZK-provable VM. @@ -1792,7 +1792,7 @@ We chose RISC Zero because: 1. Write circuits in standard Rust (no DSL) 2. Good performance for our proof sizes 3. Active development and support -"""7.1.1 Contribution Validity Circuit (Rust)This circuit proves that a contribution is valid without revealing the underlying data.rust// datadr_agent/zkp/circuits/contribution_validity/src/main.rs +"""7.1.1 Contribution Validity Circuit (Rust)This circuit proves that a contribution is valid without revealing the underlying data.rust// dataing_agent/zkp/circuits/contribution_validity/src/main.rs //! Zero-Knowledge Proof Circuit for Contribution Validity //! @@ -1807,7 +1807,7 @@ We chose RISC Zero because: //! - Private inputs: raw query logs, schema metadata //! - Public inputs: contribution hash, tenant_id //! -//! The verifier (DataDr server) learns only: +//! The verifier (Dataing server) learns only: //! - The contribution is valid (all checks pass) //! - Nothing about the underlying data @@ -2043,7 +2043,7 @@ fn compute_contribution_hash(patterns: &[PrivatizedPattern]) -> [u8; 32] { hasher.update(&pattern.severity.to_le_bytes()); } hasher.finalize().into() -}7.2 Proof Generation (Agent Side)python# datadr_agent/zkp/prover.py +}7.2 Proof Generation (Agent Side)python# dataing_agent/zkp/prover.py from dataclasses import dataclass from pathlib import Path @@ -2054,8 +2054,8 @@ import tempfile @dataclass(frozen=True) class ZKProofConfig: """Configuration for ZK proof generation.""" - circuit_path: Path = Path("/opt/datadr/circuits/contribution_validity") - risc0_path: Path = Path("/opt/datadr/risc0") + circuit_path: Path = Path("/opt/dataing/circuits/contribution_validity") + risc0_path: Path = Path("/opt/dataing/risc0") proof_timeout_seconds: int = 300 class ContributionProver: @@ -2164,7 +2164,7 @@ class ContributionProver: hasher.update(bytes.fromhex(pattern.schema_fingerprint)) hasher.update(pattern.severity.to_bytes(8, 'little')) - return hasher.digest()7.3 Proof Verification (Server Side)python# datadr_server/zkp/verifier.py + return hasher.digest()7.3 Proof Verification (Server Side)python# dataing_server/zkp/verifier.py from dataclasses import dataclass from pathlib import Path @@ -2176,7 +2176,7 @@ import tempfile class ZKVerifyConfig: """Configuration for ZK proof verification.""" circuit_image_id: str # The expected RISC Zero image ID - risc0_path: Path = Path("/opt/datadr/risc0") + risc0_path: Path = Path("/opt/dataing/risc0") verify_timeout_seconds: int = 30 class ContributionVerifier: @@ -2261,7 +2261,7 @@ class VerificationResult: """Result of ZK proof verification.""" valid: bool public_inputs: dict - error: str | None8. Privacy Budget Management8.1 Multi-Level Budget Trackingpython# datadr_server/privacy/budget_tracker.py + error: str | None8. Privacy Budget Management8.1 Multi-Level Budget Trackingpython# dataing_server/privacy/budget_tracker.py from dataclasses import dataclass from datetime import datetime, timedelta @@ -2357,7 +2357,7 @@ class MultiLevelBudgetTracker: await self._spen -# DataDr Privacy Layer: Technical Specification (Part 2) +# Dataing Privacy Layer: Technical Specification (Part 2) ## Continuation from Part 1 @@ -2457,7 +2457,7 @@ When budget is exhausted: ### 9.1 Protocol Overview -DataDr uses federated learning to build models from distributed data without centralizing raw data. +Dataing uses federated learning to build models from distributed data without centralizing raw data. ``` ┌──────────────────────────────────────────────────────────────────────────────┐ @@ -2811,12 +2811,12 @@ This test verifies that aggregates don't reveal individual records. **Docker Compose (Customer Side):** ```yaml -# docker-compose.yml for DataDr Agent +# docker-compose.yml for Dataing Agent version: '3.8' services: - datadr-agent: - image: datadr/agent:${VERSION} + dataing-agent: + image: dataing/agent:${VERSION} restart: unless-stopped # Security: Run as non-root @@ -2835,13 +2835,13 @@ services: volumes: # Configuration (read-only) - - ./config:/etc/datadr:ro + - ./config:/etc/dataing:ro # Certificates (read-only) - - ./certs:/etc/datadr/certs:ro + - ./certs:/etc/dataing/certs:ro # State directory (read-write, for budget tracking) - - datadr-state:/var/lib/datadr + - dataing-state:/var/lib/dataing # Temp directory for ZKP computation - type: tmpfs @@ -2856,9 +2856,9 @@ services: - DATADR_DP_DELTA=1e-8 - DATADR_MIN_SAMPLE_SIZE=100 - # Network: Only outbound to DataDr servers + # Network: Only outbound to Dataing servers networks: - - datadr-net + - dataing-net # Resource limits deploy: @@ -2871,17 +2871,17 @@ services: memory: 1G volumes: - datadr-state: + dataing-state: driver: local networks: - datadr-net: + dataing-net: driver: bridge ``` ### 12.2 Server Infrastructure -**Kubernetes Deployment (DataDr Side):** +**Kubernetes Deployment (Dataing Side):** ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -3002,7 +3002,7 @@ networks: │ │ AGENT CERTIFICATES (mTLS) ││ │ │ ││ │ │ • One certificate per tenant ││ -│ │ • Issued by DataDr CA ││ +│ │ • Issued by Dataing CA ││ │ │ • Rotated annually ││ │ │ • Revocable via CRL/OCSP ││ │ └─────────────────────────────────────────────────────────┘│ @@ -3013,7 +3013,7 @@ networks: │ │ ││ │ │ • Ed25519 key pair per tenant ││ │ │ • Used to sign contribution proofs ││ -│ │ • Public key registered with DataDr ││ +│ │ • Public key registered with Dataing ││ │ └─────────────────────────────────────────────────────────┘│ │ │ └─────────────────────────────────────────────────────────────┘ diff --git a/frontend/README.md b/frontend/README.md index 058b01c12..05ea7b586 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -1,6 +1,6 @@ -# DataDr Frontend +# Dataing Frontend -Web application for DataDr - an AI-powered data quality investigation platform. +Web application for Dataing - an AI-powered data quality investigation platform. ## Setup diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index a1d4ca4b4..223ea5d4a 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -27,7 +27,7 @@ function AppLayout({ children }: { children: React.ReactNode }) {
- DataDr + Dataing
diff --git a/frontend/src/components/layout/app-sidebar.tsx b/frontend/src/components/layout/app-sidebar.tsx index 084c0006b..88ddca40b 100644 --- a/frontend/src/components/layout/app-sidebar.tsx +++ b/frontend/src/components/layout/app-sidebar.tsx @@ -86,7 +86,7 @@ export function AppSidebar() {
- DataDr + Dataing {tenant?.name ?? 'Data Quality'} diff --git a/frontend/src/components/theme-provider.tsx b/frontend/src/components/theme-provider.tsx index 0d5ef75c5..c09c3807f 100644 --- a/frontend/src/components/theme-provider.tsx +++ b/frontend/src/components/theme-provider.tsx @@ -23,7 +23,7 @@ const ThemeProviderContext = createContext(initialState) export function ThemeProvider({ children, defaultTheme = "system", - storageKey = "datadr-ui-theme", + storageKey = "dataing-ui-theme", ...props }: ThemeProviderProps) { const [theme, setTheme] = useState( diff --git a/frontend/src/components/ui/DatePicker.tsx b/frontend/src/components/ui/DatePicker.tsx new file mode 100644 index 000000000..0d5e16b1e --- /dev/null +++ b/frontend/src/components/ui/DatePicker.tsx @@ -0,0 +1,376 @@ +import { useState, useRef, useEffect, useCallback } from 'react' +import { Calendar, ChevronLeft, ChevronRight } from 'lucide-react' +import { cn } from '@/lib/utils' + +export type DatePickerMode = 'single' | 'range' + +export interface DatePickerValue { + mode: DatePickerMode + start: Date | null + end: Date | null +} + +interface QuickSelect { + label: string + getValue: () => DatePickerValue +} + +interface DatePickerProps { + label?: string + value: DatePickerValue + onChange: (value: DatePickerValue) => void + disabled?: boolean + className?: string + quickSelects?: QuickSelect[] + placeholder?: string + required?: boolean + error?: string + hint?: string +} + +const DEFAULT_QUICK_SELECTS: QuickSelect[] = [ + { + label: 'Today', + getValue: () => { + const today = new Date() + today.setHours(0, 0, 0, 0) + return { mode: 'single', start: today, end: today } + }, + }, + { + label: 'Yesterday', + getValue: () => { + const yesterday = new Date() + yesterday.setDate(yesterday.getDate() - 1) + yesterday.setHours(0, 0, 0, 0) + return { mode: 'single', start: yesterday, end: yesterday } + }, + }, + { + label: 'Last 7 Days', + getValue: () => { + const end = new Date() + end.setHours(0, 0, 0, 0) + const start = new Date() + start.setDate(start.getDate() - 7) + start.setHours(0, 0, 0, 0) + return { mode: 'range', start, end } + }, + }, + { + label: 'Last 30 Days', + getValue: () => { + const end = new Date() + end.setHours(0, 0, 0, 0) + const start = new Date() + start.setDate(start.getDate() - 30) + start.setHours(0, 0, 0, 0) + return { mode: 'range', start, end } + }, + }, +] + +const WEEKDAYS = ['Su', 'Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa'] + +function isSameDay(a: Date | null, b: Date | null): boolean { + if (!a || !b) return false + return ( + a.getFullYear() === b.getFullYear() && + a.getMonth() === b.getMonth() && + a.getDate() === b.getDate() + ) +} + +function isInRange(date: Date, start: Date | null, end: Date | null): boolean { + if (!start || !end) return false + const d = date.getTime() + return d >= start.getTime() && d <= end.getTime() +} + +export function DatePicker({ + label, + value, + onChange, + disabled = false, + className, + quickSelects = DEFAULT_QUICK_SELECTS, + placeholder = 'Select date...', + required = false, + error, + hint, +}: DatePickerProps) { + const [isOpen, setIsOpen] = useState(false) + const [viewMonth, setViewMonth] = useState(() => value.start || new Date()) + const containerRef = useRef(null) + + const formatDate = useCallback((date: Date | null): string => { + if (!date) return '' + return date.toLocaleDateString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + }) + }, []) + + const displayValue = useCallback((): string => { + if (!value.start) return placeholder + if (value.mode === 'single' || !value.end || isSameDay(value.start, value.end)) { + return formatDate(value.start) + } + return `${formatDate(value.start)} - ${formatDate(value.end)}` + }, [value, placeholder, formatDate]) + + useEffect(() => { + const handleClickOutside = (e: MouseEvent) => { + if (containerRef.current && !containerRef.current.contains(e.target as Node)) { + setIsOpen(false) + } + } + document.addEventListener('mousedown', handleClickOutside) + return () => document.removeEventListener('mousedown', handleClickOutside) + }, []) + + useEffect(() => { + if (value.start) { + setViewMonth(value.start) + } + }, [value.start]) + + const generateCalendarDays = useCallback((): Array<{ date: Date; isCurrentMonth: boolean }> => { + const year = viewMonth.getFullYear() + const month = viewMonth.getMonth() + const firstDay = new Date(year, month, 1) + const lastDay = new Date(year, month + 1, 0) + const startOffset = firstDay.getDay() + + const days: Array<{ date: Date; isCurrentMonth: boolean }> = [] + + for (let i = startOffset - 1; i >= 0; i--) { + const date = new Date(year, month, -i) + date.setHours(0, 0, 0, 0) + days.push({ date, isCurrentMonth: false }) + } + + for (let d = 1; d <= lastDay.getDate(); d++) { + const date = new Date(year, month, d) + date.setHours(0, 0, 0, 0) + days.push({ date, isCurrentMonth: true }) + } + + const remaining = 42 - days.length + for (let i = 1; i <= remaining; i++) { + const date = new Date(year, month + 1, i) + date.setHours(0, 0, 0, 0) + days.push({ date, isCurrentMonth: false }) + } + + return days + }, [viewMonth]) + + const handleDayClick = (date: Date) => { + if (value.mode === 'single') { + onChange({ mode: 'single', start: date, end: date }) + setIsOpen(false) + } else { + if (!value.start || (value.start && value.end)) { + onChange({ mode: 'range', start: date, end: null }) + } else { + const [start, end] = + date < value.start ? [date, value.start] : [value.start, date] + onChange({ mode: 'range', start, end }) + setIsOpen(false) + } + } + } + + const handleModeChange = (newMode: DatePickerMode) => { + if (newMode === 'single') { + onChange({ mode: 'single', start: value.start, end: value.start }) + } else { + onChange({ mode: 'range', start: value.start, end: null }) + } + } + + const navigateMonth = (delta: number) => { + setViewMonth((prev) => new Date(prev.getFullYear(), prev.getMonth() + delta, 1)) + } + + const isSelected = (date: Date): boolean => { + if (value.mode === 'single') { + return isSameDay(date, value.start) + } + return isSameDay(date, value.start) || isSameDay(date, value.end) + } + + const isRangeMiddle = (date: Date): boolean => { + if (value.mode !== 'range' || !value.start || !value.end) return false + return ( + isInRange(date, value.start, value.end) && + !isSameDay(date, value.start) && + !isSameDay(date, value.end) + ) + } + + const isToday = (date: Date): boolean => { + const today = new Date() + return isSameDay(date, today) + } + + return ( +
+ {label && ( + + )} + + + + {(error || hint) && ( +

+ {error || hint} +

+ )} + + {isOpen && ( +
+
+ + +
+ +
+ {quickSelects.map((qs) => ( + + ))} +
+ +
+ + + {viewMonth.toLocaleDateString('en-US', { month: 'long', year: 'numeric' })} + + +
+ +
+ {WEEKDAYS.map((day) => ( +
+ {day} +
+ ))} + + {generateCalendarDays().map(({ date, isCurrentMonth }, i) => { + const selected = isSelected(date) + const rangeMiddle = isRangeMiddle(date) + const today = isToday(date) + + return ( + + ) + })} +
+ + {value.mode === 'range' && value.start && !value.end && ( +

+ Select end date to complete range +

+ )} +
+ )} +
+ ) +} + +export function datePickerValueToString(value: DatePickerValue): string | null { + if (!value.start) return null + const year = value.start.getFullYear() + const month = String(value.start.getMonth() + 1).padStart(2, '0') + const day = String(value.start.getDate()).padStart(2, '0') + return `${year}-${month}-${day}` +} + +export function stringToDatePickerValue(dateStr: string): DatePickerValue { + const date = new Date(dateStr + 'T00:00:00') + return { mode: 'single', start: date, end: date } +} + +export function createEmptyDatePickerValue(): DatePickerValue { + return { mode: 'single', start: null, end: null } +} diff --git a/frontend/src/features/auth/login-page.tsx b/frontend/src/features/auth/login-page.tsx index 3aa6ff8a2..0933bc932 100644 --- a/frontend/src/features/auth/login-page.tsx +++ b/frontend/src/features/auth/login-page.tsx @@ -49,7 +49,7 @@ export function LoginPage() {
- Welcome to DataDr + Welcome to Dataing Enter your API key to access the platform diff --git a/frontend/src/features/datasources/datasource-form.tsx b/frontend/src/features/datasources/datasource-form.tsx index 77f0ee6ad..8a1d018b4 100644 --- a/frontend/src/features/datasources/datasource-form.tsx +++ b/frontend/src/features/datasources/datasource-form.tsx @@ -205,7 +205,7 @@ export function DataSourceForm({ open, onOpenChange }: DataSourceFormProps) { onChange={(e) => setFormData({ ...formData, username: e.target.value }) } - placeholder="datadr" + placeholder="dataing" required /> diff --git a/frontend/src/features/datasources/datasource-page.tsx b/frontend/src/features/datasources/datasource-page.tsx index cf89a0821..76382f56d 100644 --- a/frontend/src/features/datasources/datasource-page.tsx +++ b/frontend/src/features/datasources/datasource-page.tsx @@ -1,5 +1,6 @@ import * as React from 'react' -import { Plus, Database } from 'lucide-react' +import { Plus, Database, AlertCircle, RefreshCw } from 'lucide-react' +import { Link } from 'react-router-dom' import { Button } from '@/components/ui/Button' import { PageHeader } from '@/components/shared/page-header' @@ -12,7 +13,7 @@ import { EmptyState } from '@/components/shared/empty-state' export function DataSourcePage() { const [formOpen, setFormOpen] = React.useState(false) - const { data: datasources, isLoading, error } = useDataSources() + const { data: datasources, isLoading, error, refetch } = useDataSources() if (isLoading) { return ( @@ -24,8 +25,25 @@ export function DataSourcePage() { if (error) { return ( -
- Failed to load data sources +
+
+ +
+

Failed to load data sources

+

+ {error.message || 'Please check your API key and try again.'} +

+
+
+
+ + + + +
) } diff --git a/frontend/src/features/investigation/NewInvestigation.tsx b/frontend/src/features/investigation/NewInvestigation.tsx index f6d0ecf70..3ffede944 100644 --- a/frontend/src/features/investigation/NewInvestigation.tsx +++ b/frontend/src/features/investigation/NewInvestigation.tsx @@ -1,38 +1,440 @@ -import { useState } from 'react' -import { useNavigate } from 'react-router-dom' +import { useState, useEffect, useRef, useMemo, useCallback } from 'react' +import { useNavigate, Link } from 'react-router-dom' import { useCreateInvestigation } from '@/lib/api/investigations' +import { + useDataSources, + useDataSourceSchema, + useTableSearch, + SchemaTable, + SchemaColumn, +} from '@/lib/api/datasources' import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/Card' import { Button } from '@/components/ui/Button' import { Input } from '@/components/ui/Input' +import { + DatePicker, + DatePickerValue, + datePickerValueToString, + stringToDatePickerValue, +} from '@/components/ui/DatePicker' +import { + ArrowLeft, + Database, + Server, + HardDrive, + Zap, + Table as TableIcon, + Search, + ArrowUpRight, + ArrowDownRight, + Key, + Loader2, + X, + Plus, + AlertCircle, +} from 'lucide-react' + +// Source type icons +const SOURCE_ICONS: Record = { + postgresql: Database, + postgres: Database, + mysql: Server, + trino: Database, + snowflake: Database, + bigquery: Database, + redshift: Database, + duckdb: Database, + mongodb: Database, + dynamodb: Zap, + cassandra: Database, + s3: HardDrive, + gcs: HardDrive, + hdfs: HardDrive, + salesforce: Database, + hubspot: Database, + stripe: Database, +} + +// Schema Viewer Component +function SchemaViewer({ + table, + isLoading, +}: { + table: SchemaTable | null + isLoading: boolean +}) { + if (isLoading) { + return ( +
+ +
+ ) + } + + if (!table) { + return ( +
+

+ Enter a dataset identifier to preview its schema +

+
+ ) + } + + return ( +
+
+ +

{table.name}

+
+ + {table.row_count && ( +
+ + ~{table.row_count.toLocaleString()} rows + +
+ )} + +
+
+ Columns ({table.columns.length}) +
+
+ {table.columns.map((column: SchemaColumn) => ( +
+
+
+ {column.is_primary_key && ( + + )} + {column.name} + {column.nullable && ( + + nullable + + )} +
+
+ + {column.native_type || column.data_type} + +
+ ))} +
+
+
+ ) +} + +// Lineage Panel Component +function LineagePanel({ + tableName, + isLoading, +}: { + tableName: string | null + isLoading: boolean +}) { + const mockLineage = useMemo(() => { + if (!tableName) return { upstream: [], downstream: [] } + const name = tableName.toLowerCase() + const upstream: string[] = [] + const downstream: string[] = [] + + if (name.includes('orders') || name.includes('order')) { + upstream.push('raw.customers', 'raw.products') + downstream.push('analytics.daily_sales', 'reporting.order_summary') + } else if (name.includes('users') || name.includes('customer')) { + upstream.push('raw.signups', 'external.crm_data') + downstream.push('analytics.user_cohorts') + } else if (name.includes('events')) { + upstream.push('raw.clickstream') + downstream.push('analytics.funnels') + } else { + upstream.push(`raw.${name}_source`) + downstream.push(`analytics.${name}_agg`) + } + return { upstream, downstream } + }, [tableName]) + + if (isLoading) { + return ( +
+ +
+ ) + } + + if (!tableName) { + return ( +
+

Select a dataset to view lineage

+
+ ) + } + + return ( +
+
+
+
+ + Upstream +
+
+ {mockLineage.upstream.map((dep) => ( +
+ + + {dep} + +
+ ))} +
+
+ +
+
+ + {tableName} +
+
+ +
+
+ + Downstream +
+
+ {mockLineage.downstream.map((dep) => ( +
+ + + {dep} + +
+ ))} +
+
+
+
+ ) +} + +// Dataset Entry Component +function DatasetEntry({ + datasourceId, + datasourceType, + identifier, + onDatasourceChange, + onIdentifierChange, + onRemove, + canRemove, + disabled, + autoFocus, + dataSources, + onTableSelect, +}: { + datasourceId: string + datasourceType: string + identifier: string + onDatasourceChange: (id: string) => void + onIdentifierChange: (value: string) => void + onRemove: () => void + canRemove: boolean + disabled?: boolean + autoFocus?: boolean + dataSources: Array<{ id: string; name: string; type: string }> + onTableSelect: (table: SchemaTable) => void +}) { + const [isOpen, setIsOpen] = useState(false) + const [searchTerm, setSearchTerm] = useState('') + const inputRef = useRef(null) + const dropdownRef = useRef(null) + + const { data: tables, isLoading } = useTableSearch(datasourceId, searchTerm) + + useEffect(() => { + const timer = setTimeout(() => setSearchTerm(identifier), 300) + return () => clearTimeout(timer) + }, [identifier]) + + useEffect(() => { + function handleClickOutside(event: MouseEvent) { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) && + inputRef.current && + !inputRef.current.contains(event.target as Node) + ) { + setIsOpen(false) + } + } + document.addEventListener('mousedown', handleClickOutside) + return () => document.removeEventListener('mousedown', handleClickOutside) + }, []) + + const handleSelect = (table: SchemaTable) => { + onIdentifierChange(table.native_path) + onTableSelect(table) + setIsOpen(false) + } + + const Icon = SOURCE_ICONS[datasourceType] || Database + + return ( +
+
+ + +
+ +
+ { + onIdentifierChange(e.target.value) + setIsOpen(true) + }} + onFocus={() => setIsOpen(true)} + disabled={disabled || !datasourceId} + autoFocus={autoFocus} + placeholder={datasourceId ? 'Search for table...' : 'Select a data source first'} + className="pr-8" + /> + + + {isOpen && datasourceId && ( +
+ {isLoading ? ( +
+ +
+ ) : tables && tables.length > 0 ? ( +
+ {tables.slice(0, 10).map((table) => ( + + ))} + {tables.length > 10 && ( +
+ +{tables.length - 10} more... +
+ )} +
+ ) : identifier.length >= 2 ? ( +
No tables found
+ ) : ( +
+ Type at least 2 characters to search... +
+ )} +
+ )} +
+ + +
+ ) +} export function NewInvestigation() { const navigate = useNavigate() const createInvestigation = useCreateInvestigation() + const [selectedTable, setSelectedTable] = useState(null) + const [datasets, setDatasets] = useState([ + { id: crypto.randomUUID(), datasourceId: '', identifier: '' }, + ]) + const [anomalyDate, setAnomalyDate] = useState(() => + stringToDatePickerValue(new Date().toISOString().split('T')[0]) + ) const [formData, setFormData] = useState({ - dataset_id: '', metric_name: 'row_count', expected_value: '', actual_value: '', deviation_pct: '', - anomaly_date: new Date().toISOString().split('T')[0], severity: 'medium', + description: '', }) + const { data: dataSources, isLoading: isLoadingDataSources, error: dataSourcesError } = useDataSources() + const { isLoading: isLoadingSchema } = useDataSourceSchema(datasets[0]?.datasourceId || null) + + // Auto-select first datasource + useEffect(() => { + if (dataSources && dataSources.length > 0 && !datasets[0].datasourceId) { + setDatasets((prev) => + prev.map((ds, i) => (i === 0 ? { ...ds, datasourceId: dataSources[0].id } : ds)) + ) + } + }, [dataSources, datasets]) + const handleSubmit = async (e: React.FormEvent) => { e.preventDefault() + const primaryDataset = datasets[0] + if (!primaryDataset.identifier.trim()) return + + const dateStr = datePickerValueToString(anomalyDate) + if (!dateStr) return try { const result = await createInvestigation.mutateAsync({ - dataset_id: formData.dataset_id, + dataset_id: primaryDataset.identifier, metric_name: formData.metric_name, expected_value: parseFloat(formData.expected_value), actual_value: parseFloat(formData.actual_value), deviation_pct: parseFloat(formData.deviation_pct), - anomaly_date: formData.anomaly_date, + anomaly_date: dateStr, severity: formData.severity, }) - navigate(`/investigations/${result.investigation_id}`) } catch (error) { console.error('Failed to create investigation:', error) @@ -40,150 +442,262 @@ export function NewInvestigation() { } const handleChange = ( - e: React.ChangeEvent + e: React.ChangeEvent ) => { - setFormData((prev) => ({ + setFormData((prev) => ({ ...prev, [e.target.name]: e.target.value })) + } + + const updateDataset = useCallback( + (id: string, updates: Partial<{ datasourceId: string; identifier: string }>) => { + setDatasets((prev) => prev.map((ds) => (ds.id === id ? { ...ds, ...updates } : ds))) + if (updates.identifier === '' || updates.datasourceId) { + setSelectedTable(null) + } + }, + [] + ) + + const addDataset = useCallback(() => { + const defaultDsId = dataSources?.[0]?.id || '' + setDatasets((prev) => [ ...prev, - [e.target.name]: e.target.value, - })) + { id: crypto.randomUUID(), datasourceId: defaultDsId, identifier: '' }, + ]) + }, [dataSources]) + + const removeDataset = useCallback((id: string) => { + setDatasets((prev) => { + if (prev.length <= 1) return prev + return prev.filter((ds) => ds.id !== id) + }) + }, []) + + const primaryDataset = datasets[0] + const hasEmptyDataset = datasets.some((ds) => !ds.identifier.trim()) + const isSubmitDisabled = createInvestigation.isPending || hasEmptyDataset || !anomalyDate.start + + if (isLoadingDataSources) { + return ( +
+ +
+ ) } return ( -
-

New Investigation

- - - - Anomaly Details - - -
-
- - -
+
+
+ + + +

Start Investigation

+
-
- - -
+ {/* Error Banner */} + {dataSourcesError && ( +
+ +
+

Failed to load data sources

+

+ {dataSourcesError.message}. Please check your API key and try again. +

+
+ + + +
+ )} -
-
- - -
+
+ {/* Main Form */} +
+ + + Investigation Details +

+ Configure the investigation parameters and target datasets. +

+
+ + + {/* Datasets */} +
+ +
+ {datasets.map((dataset, index) => { + const ds = dataSources?.find((d) => d.id === dataset.datasourceId) + return ( + updateDataset(dataset.id, { datasourceId: id })} + onIdentifierChange={(val) => updateDataset(dataset.id, { identifier: val })} + onRemove={() => removeDataset(dataset.id)} + canRemove={datasets.length > 1} + disabled={createInvestigation.isPending} + autoFocus={index === datasets.length - 1 && !dataset.identifier} + dataSources={dataSources || []} + onTableSelect={setSelectedTable} + /> + ) + })} +
+ +
-
- - -
-
-
-
- - -
+ {/* Metric and Values */} +
+
+ + +
+
+ + +
+
-
- - -
-
+
+
+ + +
+
+ + +
+
+ + +
+
-
- - -
+ {/* Description */} +
+ +