diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..0a7473bbb --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,9 @@ +_site/ +.sass-cache/ +.jekyll-cache/ +.jekyll-metadata/ +vendor/ + +# These are not needed on GH Pages, but are used for local doc development +Gemfile +Gemfile.lock diff --git a/docs/_config.yml b/docs/_config.yml index d9ab79894..ea048a272 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1,2 +1,3 @@ remote_theme: pmarsceill/just-the-docs include: ['CONTRIBUTING.md'] +color_scheme: carbon diff --git a/docs/_sass/color_schemes/carbon.scss b/docs/_sass/color_schemes/carbon.scss new file mode 100644 index 000000000..571ed1118 --- /dev/null +++ b/docs/_sass/color_schemes/carbon.scss @@ -0,0 +1,41 @@ +// @import './color_schemes/dark'; + +$carbon-blue-30: #a6c8ff; +$carbon-blue-40: #78a9ff; +$carbon-blue-60: #0f62fe; +$carbon-gray-0: #ffffff; +$carbon-gray-10: #f4f4f4; +$carbon-gray-50: #8d8d8d; +$carbon-gray-90: #262626; +$carbon-gray-100: #161616; + +$body-background-color: $carbon-gray-0; +// $body-heading-color: $grey-dk-300 !default; +$body-text-color: $carbon-gray-100; +$code-background-color: $carbon-gray-10; +$sidebar-color: $carbon-gray-10; +// $nav-child-link-color: $carbon-blue-30; + +$body-font-family: 'IBM Plex Sans', sans-serif; +$mono-font-family: 'IBM Plex Mono', monospace; + +$link-color: $carbon-blue-60; + +pre { + line-height: 1.2; +} + +h4 code { + font-size: 0.75rem; + letter-spacing: normal; +} + +blockquote p { + color: $carbon-gray-50; +} + +// $base-button-color: #f7f7f7 !default; +// $btn-primary-color: $purple-100 !default; +// $table-background-color: $white !default; +// $search-background-color: $white !default; +// $search-result-preview-color: $grey-dk-000 !default; diff --git a/docs/index.md b/docs/index.md index a6e3a3d67..84cf0298e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,6 @@ --- -title: _Home +title: Home +nav_order: 1 --- # Welcome to the Carbon Platform developer docs! diff --git a/docs/packages-api.md b/docs/packages/api.md similarity index 99% rename from docs/packages-api.md rename to docs/packages/api.md index 534aa0b8a..8b3e3934f 100644 --- a/docs/packages-api.md +++ b/docs/packages/api.md @@ -1,3 +1,7 @@ +--- +parent: Packages +--- + # API package A package that exports common API utilities used across various other packages and services. These diff --git a/docs/packages-icons.md b/docs/packages/icons.md similarity index 98% rename from docs/packages-icons.md rename to docs/packages/icons.md index 813273f34..7a8e36583 100644 --- a/docs/packages-icons.md +++ b/docs/packages/icons.md @@ -1,3 +1,7 @@ +--- +parent: Packages +--- + # Icons package This package bundles all project-specific SVG icons in a node module. diff --git a/docs/packages/index.md b/docs/packages/index.md new file mode 100644 index 000000000..697f4158d --- /dev/null +++ b/docs/packages/index.md @@ -0,0 +1,5 @@ +--- +title: Packages +nav_order: 2 +has_children: true +--- diff --git a/docs/packages-mdx-components.md b/docs/packages/mdx-components.md similarity index 99% rename from docs/packages-mdx-components.md rename to docs/packages/mdx-components.md index 49c61f067..f0ca4dff0 100644 --- a/docs/packages-mdx-components.md +++ b/docs/packages/mdx-components.md @@ -1,3 +1,7 @@ +--- +parent: Packages +--- + # MDX Components package The set of components renderable in MDX files on the Carbon Platform. diff --git a/docs/packages-micromanage-cli.md b/docs/packages/micromanage-cli.md similarity index 83% rename from docs/packages-micromanage-cli.md rename to docs/packages/micromanage-cli.md index 5086dade1..d187258d7 100644 --- a/docs/packages-micromanage-cli.md +++ b/docs/packages/micromanage-cli.md @@ -1,3 +1,7 @@ +--- +parent: Packages +--- + # Micromanage CLI package Docs for micromanage are located in the package's [README.md](../packages/micromanage-cli/README.md) diff --git a/docs/services-data-graph.md b/docs/services/data-graph.md similarity index 99% rename from docs/services-data-graph.md rename to docs/services/data-graph.md index 378575adf..2e183f0d4 100644 --- a/docs/services-data-graph.md +++ b/docs/services/data-graph.md @@ -1,3 +1,7 @@ +--- +parent: Services +--- + # Data-Graph service ### Service diff --git a/docs/services/index.md b/docs/services/index.md new file mode 100644 index 000000000..f2dfa795a --- /dev/null +++ b/docs/services/index.md @@ -0,0 +1,5 @@ +--- +title: Services +nav_order: 3 +has_children: true +--- diff --git a/docs/services-logging.md b/docs/services/logging.md similarity index 99% rename from docs/services-logging.md rename to docs/services/logging.md index 270c7f78c..ddaed5277 100644 --- a/docs/services-logging.md +++ b/docs/services/logging.md @@ -1,3 +1,7 @@ +--- +parent: Services +--- + # Logging service ### Service diff --git a/docs/services-web-app.md b/docs/services/web-app.md similarity index 99% rename from docs/services-web-app.md rename to docs/services/web-app.md index be3c0e399..397a63fac 100644 --- a/docs/services-web-app.md +++ b/docs/services/web-app.md @@ -1,3 +1,7 @@ +--- +parent: Services +--- + # Web-App service The Web App service is a Next.js application that serves as the origin server for the diff --git a/tech-designs/README.md b/tech-designs/README.md new file mode 100644 index 000000000..5d65adb41 --- /dev/null +++ b/tech-designs/README.md @@ -0,0 +1,6 @@ +# Tech designs + +This is the home of all technical designs for the Carbon Platform. + +Each tech design is named as `td-##-title-of-doc.md`, where `##` is a sequentially incrementing +number and `title-of-doc` is a short title/description of the doc. diff --git a/tech-designs/_TEMPLATE.md b/tech-designs/_TEMPLATE.md new file mode 100644 index 000000000..8b9537ab7 --- /dev/null +++ b/tech-designs/_TEMPLATE.md @@ -0,0 +1,122 @@ +# td-[nn] - [Tech design title] + +**Status:** Draft πŸ“ + + + +## Summary + +> Describe the new feature from a technical perspective. + +> Describe the problem solved by this feature. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +> What needs to be in place prior to this feature being developed? + +> What assumptions are being made about those dependencies or about the feature itself? + +> What noteworthy things are considered "out of scope" for this feature? + +## Research + +- [ ] Approved? + +**Unanswered questions** + +> List any unknowns and unanswered questions that need answers prior to beginning development. Note +> that **all** of these questions should be answered prior to approving the Research section. + +**New technologies** + +> List new technologies that need to be explored in detail before beginning development. + +**Proofs of concept** + +> List proofs of concept that need to be completed before beginning development. + +## UI/UX design + +- [ ] Approved? + +> Does this feature have any associated UI/UX? If so, describe any design that needs to be +> completed/red-lined prior to development. + +## APIs + +- [ ] Approved? + +**Programmatic APIs** + +> List any APIs that will be developed and made available in the `@carbon-platform/api` package, +> including function/class/method names, parameters, and return values. If this tech design +> describes a new monorepo package, detail the APIs and exports of that package. + +**Data graph** + +> List any new query resolvers and/or data models being included in the data-graph and/or data-graph +> API package. + +**Messages** + +> List any new/changed RabbitMQ messages introduced by this feature, the message payload structure +> associated with them, whether they are queries or emits, and their expected return values (for +> queries). + +## Security + +- [ ] Approved? + +> What new data is created/stored/collected/transmitted by this feature? How is that data secured? +> Who is allowed to access it? How is that access controlled? Think like a hacker. How might someone +> attempt to break or abuse this feature? + +## Error handling + +- [ ] Approved? + +> Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +> through the APIs? How will users be informed about these errors? + +## Test strategy + +- [ ] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) What interesting edge cases should be considered and tested? + +## Logging + +- [ ] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. Pretend +> you're on-call for supporting the Platform. In the event something breaks and all you have to go +> by is a list of log entries (i.e. you can't reproduce the failure yourself, but users are +> reporting problems), what information would you need to be able to pinpoint the source of a +> production-site failure? Additional info: [Logging service](/docs/services-logging.md) + +## File and code layout + +- [ ] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +## Issue and work breakdown + +- [ ] Approved? + +> List any issues that should be created prior to starting work on this feature. + +**Epics** + +- [ ] + +**Issues** + +- [ ] diff --git a/tech-designs/td-01-replace-fs-cache-with-redis.md b/tech-designs/td-01-replace-fs-cache-with-redis.md new file mode 100644 index 000000000..cfc02635c --- /dev/null +++ b/tech-designs/td-01-replace-fs-cache-with-redis.md @@ -0,0 +1,130 @@ +# td-01 - Replace fs-cache with redis + +**Status:** Canceled 🚫 + + + +## Summary + +The web app uses a [Node.js cache-manager](https://www.npmjs.com/package/cache-manager) to cache all +GitHub responses to prevent the web app from getting GitHub API rate limited. These GitHub requests +vary from fetching repository information, traversing GitHub trees, to primarily fetching GitHub +file content to read carbon.yml files and MDX files for web page content. + +We're currently using a file system store engine +(https://www.npmjs.com/package/cache-manager-fs-hash), which was chosen during v1 preview +prototyping as a quick and easy way to prevent GitHub API rate limits. + +As we index more content (asset documentation pages), each GitHub request per MDX file adds to the +amount of cached responses. Our current build pipeline does not persist cached files written to disk +between builds. + +As the amount of indexed content grows, so does the risk of our production GitHub access tokens +getting rate limited which could leave us with a production environment void of all content. + +**Potential solution** + +To mitigate that risk, until we can create proper back-end services for this, what if we added Redis +stores for staging and production? It looks like we could use a Redis store listed here: +https://github.com/BryanDonovan/node-cache-manager#store-engines + +Those Redis store engines _should_ share the same API as `cache-manager-fs-hash`, so that _should_ +require minimal web app changes. + +We could keep the +[1 hour TTL](https://github.com/carbon-design-system/carbon-platform/blob/main/services/web-app/lib/file-cache.js#L24) +for cached responses, so for Next.js ISR pages, content from GitHub would be at most 1 hour stale. + +With a Redis cache, our production builds should be faster because there's a greater chance of a +cache hit. + +We could use a Redis cache for local development too, or for simplicity, keep that with the current +file system cache. + +**TL;DR:** + +In the event that our production environment gets rate limited by GitHub, because the Redis cache +would be persisted, we wouldn't lose web app content between deploys. Also, it would be neat to see +how many keys would get cached in Redis, to give us a better idea of to-be infrastructure needs to +manage GitHub content. + +TODO: vv Everything below this vv + +## Research + +- [ ] Approved? + +- **Unanswered questions** + - List any unknowns and unanswered questions that need answers prior to beginning development +- **New technologies** + - List new technologies that need to be explored in detail before beginning development +- **Proofs of concept** + - List proofs of concept that need to be completed before beginning development + +## UI/UX design + +Does this feature have any associated UI/UX? If so, describe any design that needs to be +completed/red-lined prior to development. + +## APIs + +- [ ] Approved? + +- **Programmatic APIs** + - List any APIs that will be developed and made available in the `@carbon-platform/api` package, + including function/class/method names, parameters, and return values. +- **Messages** + - List any new/changed RabbitMQ messages introduced by this feature, the message payload structure + associated with them, whether they are queries or emits, and their expected return values (for + queries). + +## Test strategy + +- [ ] Approved? + +How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +etc.) + +What interesting edge cases should be considered and tested? + +## Security + +- [ ] Approved? + +What new data is created/stored/collected/transmitted by this feature? How is that data secured? Who +is allowed to access it? How is that access controlled? + +Think like a hacker. How might someone attempt to break or abuse this feature? + +## Error handling + +Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +through the APIs? How will users be informed about these errors? + +## Logging + +- [ ] Approved? + +Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. + +## File and code layout + +- [ ] Approved? + +Describe how the files and code for this feature will fit into the rest of the mono repo. Will there +be a new package/service? Are there existing files/directories in which the new logic should live? + +## Issue and work breakdown + +- [ ] Approved? + +List any issues that should be created prior to starting work on this feature + +- **Epics** + - ... +- **Issues** + - ... diff --git a/tech-designs/td-02-blog-microservice.md b/tech-designs/td-02-blog-microservice.md new file mode 100644 index 000000000..a3c2cb685 --- /dev/null +++ b/tech-designs/td-02-blog-microservice.md @@ -0,0 +1,276 @@ +# td-02 - Blog microservice + +**Status:** Approved βœ… + + + +## Summary + +Related #540 + +Create a microservice to handle the logic to retrieve a publication's latest (10) mediumPosts and +serve those when a data-graph query is made for them. + +For a given mediumPost we want: + +- title +- author +- link +- date: we eventually want it in this format ('MMMM Do, YYYY'), but let's send it as epoch +- thumbnail + +How? + +- A user's medium feed can be retrieved by https://medium.com/feed/[publication], which will return + an rss feed with latest post information +- Use a parser to convert rss feed into a JSON object +- Format items to fit {new} model +- Order items by pubDate (orderBy param, default descending) +- Use fs-cache to hold onto results for a period of time to speed up results from messaging or the + data-graph + +## Research + +see https://help.medium.com/hc/en-us/articles/214874118 - publication's medium feed + +- [x] Approved? + +- **Unanswered questions** + - ~~Can we use a xml parser to parse a post's content to retrieve (for now) the image; if so, can + we use the same library both to parse the xml and the rss?~~ + - We can use an XML parser, but we can't also use that to parse the HTML, so we will instead use + `rss-parser` for the RSS feed parsing and `node-html-parser` for the HTML parsing. + - ~~How long should the cached entries live for?~~ + - 1 hour! + - ~~If one data graph resolver throws an exception, what happens to the other ones and the query + as a whole? Does it still return some data from the other resolvers?~~ + - Opting for a partial response and then include information on the errors (if any) + - ~~can we throw errors across the messaging package?~~ + - At the very least, we can just return an error and it will auto-serialize. We want to + eventually be "aware" of this and reject the messaging query promise instead of resolving it. +- **New technologies** + - RSS parser, https://www.npmjs.com/package/rss-parser potentially + - node-html-parser, https://www.npmjs.com/package/node-html-parser +- **Proofs of concept** + - successfully retrieved posts by querying https://medium.com/feed/carbondesign and using a rss + parser + - Server-side: Image only worked if user-agent is modified, got it working by loading on + client-side instead + - in POC, using a string matcher to find the first image in the post's content (the thumbnail), + we'll likely want to use an xml parser for this instead if possible + (`imgSrcMatchString = /]+src="?([^"\s]+)"?\s*\/>/g`) (Options: xml2js) + - [x] Validate that we can successfully parse xml post's content -- Francine + - We can't use an XML parser to parse the HTML, but we can use an HTML parser, which we will go + with + - [x] Throwing errors across messaging package -- Joe + - see "unanswered questions" + +## UI/UX design + +- [x] Approved? + +Info is used in mediumPosts component, which is being migrated from gatsby theme +https://github.com/carbon-design-system/gatsby-theme-carbon/tree/main/packages/gatsby-theme-carbon/src/components/MediumPosts, +no further red lines necessary since no visual changes are being made. + +## APIs + +- [x] Approved? + +- **Programmatic APIs** + + - New Data Graph Model: + + ```ts + // file: packages/api/data-graph/models/medium-post.model.ts + + @ObjectType({ description: 'Example user object' }) + class MediumPost { + @Field(() => ID) + id: string + + @Field(() => String) + publicationName: string + + @Field(() => String) + title: string + + @Field(() => String) + author: string + + @Field(() => String) + thumbnail: string + + @Field(() => String) + link: string + + @Field(() => GraphQLTimestamp) + date: number + + constructor( + id: string, + publicationName: string, + title: string, + author: string, + thumbnail: string, + link: string, + date: number + ) { + this.id = id + this.publicationName = publicationName + this.title = title + this.author = author + this.thumbnail = thumbnail + this.link = link + this.date = date + } + } + ``` + +- **Data graph** + + ```graphql + query { + mediumPosts(publicationName: carbondesign, limit: 3, order: descending) { + id + publicationName + title + date + author + thumbnail + link + } + } + ``` + +- **Messages** + + - New message: Query - + ```ts + medium_feed({ publicationName: string, limit: number = 3, order: 'ascending' | 'descending' = 'descending' }) + ``` + Expected return: + ```ts + ;[ + { + title: string, + link: string, + thumbnail: string, + author: string, + date: number + } + ] + ``` + A new queue will be used for the medium microservice to listen on: `q_medium_v1` + +## Security + +- [x] Approved? + +- Set the service up to handle an allow list for the users that can be queried +- enforce upper limit to 10 +- strip out weird characters from publication (possibly encodeURIComponent()) (we're not returning + content so this might be a non-issue) + +## Error handling + +- [x] Approved? + +### Messaging failures + +- fetching rss feed fails: + + - publicationName could be not found (404) + - return empty array + - Could get an http 500-series error + - we want to somehow return an error object + - throw an exception (if that'll work) or return empty array with error indicator + - `MediumWebsiteUnavailableException` + - Query could take too long to return results + - use promise.race() to abort fetch promise on timeout, return same as above ^^ + +- Error parsing RSS + - we want to somehow return an error object + - throw an exception (if that'll work) or return empty array with error indicator + - `MediumXmlParsingException` +- Error parsing XML + - we want to somehow return an error object + - throw an exception (if that'll work) or return empty array with error indicator + - `MediumXmlParsingException` + +### Data Graph failures + +- Propagate the errors from the messaging layer + +## Test strategy + +- [x] Approved? + +How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +etc.) + +What interesting edge cases should be considered and tested? + +- Add dev dataSet +- Add Test data in the service package +- Unit tests: + - MicroService: + - Validate incoming query (`InvalidInputException`) + - Successfully get medium posts + - `MediumXmlParsingException` + - `MediumWebsiteUnavailableException` + - `Not Found` + - api/messaging package: + - New message works + +## Logging + +- [x] Approved? + +Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. + +Data Graph + +- Info: add some sort of log to announce a request is coming through + +Medium Service + +- logging by default on incoming/outgoing requests +- Info: cache miss, going out to get the mediumPosts +- Warning: 404 +- Warning:`MediumXmlParsingException` +- Warning: `MediumWebsiteUnavailableException` +- Warning: `InvalidInputException` + +## File and code layout + +- [x] Approved? + +Describe how the files and code for this feature will fit into the rest of the mono repo. Will there +be a new package/service? Are there existing files/directories in which the new logic should live? + +- New dev dataSet in: packages/api/src/dev/data-graph +- New data-graph model in: packages/api/data-graph/models/medium-post.model.ts +- New microservice in: services/medium +- New mediumPosts resolver, service and module in: services/data-graph/src/main/medium-posts + +## Issue and work breakdown + +- [x] Approved? + +List any issues that should be created prior to starting work on this feature + +- **Epics** + + - MediumPosts Microservice + +- **Issues** + - MediumPosts - Data Graph: - New data graph Model - Resolver - Add dev dataset - Info log: add + some sort of log to announce a request is coming through + - MediumPosts Microservice + - Create and Unit test + - Integrate GraphQl MediumPosts request into Web App diff --git a/tech-designs/td-03-add-refs-to-schema-resources.md b/tech-designs/td-03-add-refs-to-schema-resources.md new file mode 100644 index 000000000..222c9815d --- /dev/null +++ b/tech-designs/td-03-add-refs-to-schema-resources.md @@ -0,0 +1,261 @@ +# td-03 - Add $refs to schema resources + +**Status:** Approved βœ… + + + +## Summary + +> Describe the new feature from a technical perspective. + +Carbon Platform has a +[resource schema](https://github.com/carbon-design-system/carbon-platform/blob/main/docs/resource-schemas.md). +There are scenarios where we need to establish relationships among resources through the schema. +Right now, we're relying on +[library inheritance](https://github.com/carbon-design-system/carbon-platform/blob/main/docs/resource-schemas.md#library-inheritance) +keys like `inherits: carbon-styles` or `inherits: carbon-styles@0.1.23` and then asset `id` +matching. Next, we need to establish relationships between libraries and design kits, and there +might be a better way to do this than cross-referencing resource `id`s. + +If we add `$ref` support to our schema, our `carbon.yml` in its full form could look like: + +```yml +libraries: + test: + name: Test Library + description: This is a test library. + assets: + nested: + name: Nested asset + description: This asset is defined in the library. + status: stable + type: component + platform: web + reference-local: + $ref: '#/assets/reference-local' + reference-local-subdirectory: + $ref: 'components/my-component/carbon.yml#/assets/reference-local-subdirectory' + reference-absolute: + $ref: 'https://gist.githubusercontent.com/mattrosno/da6e10a986139512f372ed81694b17ce/raw/8312abd7f322ec009cd4ddeb9f2714ebf5cae9b2/carbon.yml#/assets/reference-absolute' + designKits: + carbon-white-sketch: + $ref: '#/designKits/carbon-white-sketch' +assets: + reference-local: + name: Reference local asset + description: This asset is defined in the assets array and referenced in the library. + status: stable + type: component + platform: web +designKits: + carbon-white-sketch: + name: Sketch white theme + tool: sketch + type: ui + status: stable + url: sketch://add-library/cloud/557b75ff-67d3-41ab-ada5-fa25447218c1 + action: link + sponsor: carbon +``` + +With this: + +- A single Carbon config file could describe one resource, or multiple resources +- With the top-level keys being `libraries`, `assets`, `designKits`, we can still validate the + schema +- There's no need to use GitHub APIs to crawl GitHub repository subdirectories to find all assets in + a library; the Carbon configs are more explicit thus more understandable of what they do +- The `assets` object can be top-level, or nested in a library, to prevent unnecessary `$ref`s +- `$ref`s can be local to the YML file, relative to its repository, or absolute +- There's no need for the `carbon-styles@0.1.23` syntax for inheritance; when using an absolute + `$ref` URL, simply use the GitHub URL that includes the desired version + `https://raw.githubusercontent.com/carbon-design-system/carbon/v11.6.0/packages/styles/carbon.yml` +- When indexing your resources, there's less ambiguity because you don't have to guess what assets + are in `carbon-styles@0.1.23`, you can CMD-click that URL above in VSCode to view the reference + schema in full + +> Describe the problem solved by this feature. + +The idea of using `$ref`s came up with determine what to do with design kits. We need to index +design kits, and then reference compatible design kits on a per-library basis. We can index design +kits within the platform repository, and then use that as the single source of truth when indexing +libraries in their remote repositories... but that doesn't really scale. What happens when we want +to index design kits elsewhere and let other maintaining library teams reference those design kits? + +A more deterministic indexing approach both simplifies platform logic and makes indexing more +understandable for library, asset, and design kit contributors. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +This would likely result in v2 of our schema as a breaking change, so we'd need to consider the +added complexity of that. + +## Research + +- [x] Approved? + +> Unanswered questions + +- Would this require a breaking change and a schema v2? + - We could do this in two phases where the first phase in a normal feature release adding + `designKits` with references in libraries. The second phase could be the `library` to + `libraries` change as well as any `assets` changes. +- What are our options to handle a breaking change? Do we need to support a v1? I'm guessing yes + because even though we could synchronize updating all currently indexed resources, but prior + resource versions will still be using the v1 schema. + - We'll need to support v1 schema still, when we introduce breaking changes, in phase 2. +- What does this mean for cached GitHub responses? + - Because `@apidevtools/json-schema-ref-parser` + [resolves its own HTTP responses](https://github.com/APIDevTools/json-schema-ref-parser/blob/main/lib/resolvers/http.js#L141) + when fetching schemas via absolute URLs, that means we'd likely want to cache the `carbon.yml` + data after it's been ran through the JSON $Ref Parser. +- What about when we're indexing a `carbon.yml` from GitHub Enterprise and we're resolving remote + URLs on GitHub Enterprise through `@apidevtools/json-schema-ref-parser` - will we have to add + authentication to those requests? + - Yes, it appears there are options to pass authentication (via headers?) when dereferencing. + +> New technologies + +- `@apidevtools/json-schema-ref-parser` requires a newer version of npm than 8.5.5. I had to update + node from 16.15.0 to 16.15.1 to get npm 8.11.0. Is that okay? + +> Proofs of concept + +https://github.com/carbon-design-system/carbon-platform/pull/934 + +## UI/UX design + +- [x] Approved? + +> Does this feature have any associated UI/UX? If so, describe any design that needs to be +> completed/red-lined prior to development. + +No + +## APIs + +- [x] Approved? + +Adding Carbon config files (via the schema) to the data graph needs to happen regardless of the +schema supporting refs or not, so any API changes are out of scope for this issue. + +**Programmatic APIs** + +> List any APIs that will be developed and made available in the `@carbon-platform/api` package, +> including function/class/method names, parameters, and return values. + +None + +**Data graph** + +> List any new query resolvers and/or data models being included in the data-graph and/or data-graph +> API package. + +None + +**Messages** + +> List any new/changed RabbitMQ messages introduced by this feature, the message payload structure +> associated with them, whether they are queries or emits, and their expected return values (for +> queries). + +None + +## Security + +- [x] Approved? + +> What new data is created/stored/collected/transmitted by this feature? How is that data secured? +> Who is allowed to access it? How is that access controlled? + +No change + +> Think like a hacker. How might someone attempt to break or abuse this feature? + +Consider using our `urlsMatch` utility to prevent malicious reference paths like `../`, etc. This +can be done in the "Resolve schema references with validation and error handling" issue. + +## Error handling + +- [x] Approved? + +> Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +> through the APIs? How will users be informed about these errors? + +`$ref`s can be invalid due to improper syntax, paths, and URLs. Like our current schema validation, +we can discard invalid resources to ensure high data integrity. + +## Test strategy + +- [x] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) + +We don't currently have a testing strategy for the web app. For phase 1, we should consider just +manual verification, and in phase 2 that incorporates the breaking changes, add test to ensure both +schema v1 and v2 work as expected. + +> What interesting edge cases should be considered and tested? + +If phase 1 is just design kits, let's test for: + +- All four forms of reference (inline [no reference], local, local different file, absolute URL) +- Broken references (invalid reference syntax, invalid reference paths, invalid reference YML) + +Phase 2 testing would primarily be regression testing. + +## Logging + +- [x] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. + +- Log errors/warning if YAML fails to parse (see error handling) +- Ensure validation correctly logs skipped items, validation errors + +## File and code layout + +- [x] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +`github.js` is still a good source code destination for this in phase 1, and we consider moving some +of this to a different web app library file (`/lib` directory), or package in phase 2. + +## Issue and work breakdown + +- [x] Approved? + +> List any issues that should be created prior to starting work on this feature. + +This could be broken into two phases, where the first phase is enough to unblock design kits for v1, +and the second phase could be after v1 as a breaking change. + +**Phase I** + +- [x] Add `designKits` as a top-level and nested schema key, with libraries having `designKits` + references, and add to the JSON schema +- [x] #970 +- [x] #971 +- [x] #972 + +**Phase II** + +- [ ] Resolve schema references for GitHub Enterprise URLs +- [ ] Refactor library inheritance to use a reference instead of our `library-id@version` identifier +- [ ] Allow `assets` to be both top-level and also nested in a library +- [ ] Replace crawling GitHub trees to find assets in a library with assets living in a library + (direct or via reference) +- [ ] Update `library` to `libraries` top-level schema key +- [ ] Retain schema v1 deprecated functionality +- [ ] Publish a v2 schema and update documentation +- [ ] Get inherited library version by fetching the `package.json` from that inherited library +- [ ] Add tests for schema parsing and dereferencing +- [ ] Update docs to reference v2 instead of v1, and link to v1 docs for backwards compatibility diff --git a/tech-designs/td-04-mdx-sanitizer.md b/tech-designs/td-04-mdx-sanitizer.md new file mode 100644 index 000000000..53a0f6f84 --- /dev/null +++ b/tech-designs/td-04-mdx-sanitizer.md @@ -0,0 +1,378 @@ +# td-04 - MDX sanitizer + +**Status:** Approved βœ… + + + +## Summary + +> Describe the new feature from a technical perspective. + +> Describe the problem solved by this feature. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +Currently remotely loaded MDX pages might error out or break the application for a number of +reasons: + +- Component used that is not recognized by the platform (regardless of whether or not it has a + corresponding import) +- An import (or export) statement being used (we don't allow any) +- A usage of an imported value someplace in the mdx +- Inline styles that are strings instead of objects +- Inclusion of HTML comments +- General Markdown parsing errors + +The idea is to catch these occurrences and handle them before they get to the top-level and error +out. + +### Expected Solution Behavior: + +1 - Unknown Components have to be swapped out with an inline error: +image + +2 - Imports and Exports statements will replace the entire content with a Full Page Error: +image + +3 - MDX files with HTML comments have to be able to render 4 - Script tags cannot be executed. +Instead, an inline error notification will be rendered to display information on it along with a +code snipped containing the script code. The rest of the mdx content should be able to render +image + +5 - If an error occurs when trying to serialize MDX content other than the ones outlined above, +display a "Error compiling MDX" inline error: +image + +6 - If a React error occurs when trying to render parsed MDX content, display a "Content not +rendering" inline error: +image + +7 - If there's an error attempting to retrieve the MDX content from github, display a full page "The +page you're looking for cannot be found" information +image + +8 - Provide troubleshooting documentation for all errors than can be experienced by library +maintainers when trying to render their content on platform + +### Solution + +1 - Create a [PUBLIC] "MdxSanitizer" package that exports an unified attacher that can be injected +into the mdx processor, this attacher will be thorough enough to cover all of platform's use cases +but be general enough that can still be valuable for projects outside of the carbon ecosystem to +use. + +The attacher will: + +- Receive a config param object including: + - allowedComponents: array of allowed tags and custom component keys, defaults to [] + - fallbackComponent: component that will replace the content when a component that isn't allowed + is identified, defaults to undefined + - allowImports: whether import statements should be allowed in parsed mdx content, default to true + - allowExports: whether export statements should be allowed in parsed mdx content, defaults to + true + - stripHTMLComments: whether HTML comments should be removed from parsed mdx content, defaults to + true + - tagReplacements: object containing entries of html or component tags that should be replaced, + accompanied by a replace function that returns the content these tags should be replaced with, + defaults to {} + - onError: function to be called everytime an operation is performed on the AST as a result of the + supplied configuration +- if config.stripHTMLComments, rewrite the processor "parse" function to remove comments for the + string mdx source, using the + [html-comment-regex lib](https://www.npmjs.com/package/html-comment-regex) +- Return a transformer function that will: + - Find any component that isn't listed in the supplied in the allowedComponents array and replace + it's content with fallbackComponent + - if !config.allowImports throw a ImportFoundException; this exception should include info on the + content and line position of the import statement found + - if !config.allowExports throw a ExportFoundException; this exception should include info on the + content and line position of the export statement found + - if config.tagReplacements, for each entry: + - Find all (if any) components whose tag matches the entry key name + - Replace content with output of calling the function supplied in the entry value and parsing it + to an mdast tree + +OFF THE PLUGIN: + +2 - Catch serialize errors on the web-app, display inline error with parser error data 3 - If +there’s a JSX error, display inline error with error as well. 4 - If content cannot be retrieved, +display a full page β€œThe page you’re looking for cannot be found” info page 5 - Create a "Common +Errors" page inside platform with each of the possible MDX errors as an entry and troubleshooting +explanations + +In order to do this, we'll create a mdx-processor package that will receive the jsx content and +render it to HTML server side to confirm there are no runtime errors in the code; this is necessary +because there is no other way to catch the errors before they break the app otherwise. + +The web-app will retrieve the github content, then send that content off to the +mdx-processor.process() function, which will retur content that can be rendered inside a +`next-mdx-remote`'s `MDXRemote` tag (or throw an error if one shall occur) + +## Research + +- [ ] Approved? + +**Unanswered questions** + +> List any unknowns and unanswered questions that need answers prior to beginning development. + +- Will we actually be able to catch the rendering error and swap it? With the current remote mdx + technology we're using (`next-mdx-remote/serialize`) we will not be able to catch jsx errors + before render; However, if we switch to a lower level library (`@mdx-js/mdx`) and render the mdx + source as HTML server-side, we'll be able to catch the render errors server-side (this will all be + handler by the mdx-processor). + +```js + import { evaluate } from '@mdx-js/mdx' + import * as runtime from 'react/jsx-runtime.js' + import * as ReactDOMServer from 'react-dom/server' + const mdxSourceString = await getRemoteMdxSrc(...) + + const mdxSourceComponent = await evaluate(mdxSourceString, { + ...runtime, + remarkPlugins: [...], + rehypePlugins: [...] + }).default + + try { + htmlContent = ReactDOMServer.renderToString(new mdxSourceComponent({ components })) + } catch (err) { + // render error, load something else + console.log('error', err) + } +``` + +- List of components to link out to: link out to storybook? in-web-app page? .md docs? - Link to + Storybook +- What should the functions return in terms of content so that we can inject it into the tree? maybe + an object with componentName and props? - mdx-like content (a string) Ex: + +```js +const getHeyReplacementSrc = (node) => { + const buttonKind = node.attributes?.find((attr) => attr.name === 'kind') + const buttonText = node.attributes?.find((attr) => attr.name === 'text') + return `` +} +``` + +- Do the "inline notifications" go directly replacing the content or are they the first thing to + popup on top of the page? They go inline directly replacing the content, design will make a rollup + notification to display at the top indicating the current errors on the page + +**New technologies** + +> List new technologies that need to be explored in detail before beginning development. + +- html-comment-regex: regex to test string mdx source against to find html comments. +- For MDX parsing and tapping into the AST tree: + - @mdx-js/mdx: + - exports `remarkMarkAndUnravel ` plugin that we'll need to create processor to correctly + convert stringified content into MDAST when testing: + ```js + import { remarkMarkAndUnravel } from '@mdx-js/mdx/lib/plugin/remark-mark-and-unravel' + const processor = unified().use(remarkParse).use(remarkMdx).use(remarkMarkAndUnravel) + processor.run(processor.parse(mdxData), mdxData, (_, tree) => { + expect(sanitizer(tree)).toEqual(outputTree) + }) + ``` + - mdast-util-mdx-jsx: types + - mdast-util-mdxjs-esm: types + - remark-mdx: needed to parse strings into MDAST, will be needed for testing + - remark-parse: needed Parse strings into MDAST, will be needed for testing + - unified: used to power remark (remark-mdx, remark-parse) + - unist-util-visit: Find nodes in tree that meet certain criteria (specific components, specific + attributes...) + +**Proofs of concept** + +> List proofs of concept that need to be completed before beginning development. + +- First run of mdx-sanitizer: https://github.com/carbon-design-system/carbon-platform/pull/740 +- [Attacher to remove html comments POC](https://github.com/carbon-design-system/carbon-platform/commit/0d0770b4de5d5057262cc67938810dd8ada1ab14) +- [Catch JSX errors and swap out for Full Page Error POC](https://github.com/carbon-design-system/carbon-platform/compare/main...flucca/poc/mdx-jsx-error-handling) +- [Swap out a component in the tree for another one returned from a function with different props POC](https://github.com/carbon-design-system/carbon-platform/compare/main...flucca/poc/mdx-content-replacement) +- [Find a component in AST and retrieve MDX content corresponding to it (I.E.: find script tags, retrieve the content in between the script tags so we can send it off to the function that will replace that component)](https://github.com/carbon-design-system/carbon-platform/compare/main...flucca/poc/mdx-content-replacement) + +## UI/UX design + +- [ ] Approved? + +> Does this feature have any associated UI/UX? If so, describe any design that needs to be +> completed/red-lined prior to development. + +Yes, UIs have been completed for all the replacement components, figma link: +https://www.figma.com/file/WyfqQh9R1VXYZErfmsNDVH/MDX-errors?node-id=9%3A4035 + +MISSING: + +- [Rollup notification at top of page UI](https://github.com/carbon-design-system/carbon-platform/issues/1087) +- [Common MDX Errors Design Considerations](https://github.com/carbon-design-system/carbon-platform/issues/1123) + +## APIs + +- [ ] Approved? + +**Programmatic APIs** + +> List any APIs that will be developed and made available in the `@carbon-platform/api` package, +> including function/class/method names, parameters, and return values. + +- @carbon-platform/mdx-sanitizer Types: + ```ts + interface ComponentReplaceFn { + (node: Unist Node): string (mdx-like source) + } + ``` + ```ts + interface TagReplacementMap { + [tag: string]: ComponentReplaceFn + } + ``` + ```ts + interface Config { + allowedComponents?: string[] = [] + fallbackComponent: ComponentReplaceFn = () => '' + allowImports?: boolean = true + allowExports?: boolean = true + stripHTMLComments?: boolean = true + tagReplacements?: TagReplacementMap = {} + onError?: (Error) => undefined + } + ``` + Exports: `mdxSanitizerPlugin(config: Config): Function (mdxPlugin type)` + +**Data graph** + +> List any new query resolvers and/or data models being included in the data-graph and/or data-graph +> API package. + +- N/A, eventually the github function that retrieves the mdx source probably moves onto a data-graph + microservice but that is out of scope for this issue. + +**Messages** + +> List any new/changed RabbitMQ messages introduced by this feature, the message payload structure +> associated with them, whether they are queries or emits, and their expected return values (for +> queries). + +- N/A, No new MQ messages introduced or changed by this feature. + +## Security + +- [ ] Approved? + +> What new data is created/stored/collected/transmitted by this feature? How is that data secured? +> Who is allowed to access it? How is that access controlled? + +> Think like a hacker. How might someone attempt to break or abuse this feature? + +- Imports and exports are an mdx security concern but we're already covering for that by not + allowing and stopping MDX parsing full out when we find them +- Inline scripts: Confirmed with https://github.com/francinelucca/mdx-testing/blob/main/test-sec.mdx + that scripts can be passed to the remote MDX file and will be triggered in the app, as per + requirement #4, we will not allow mdx scripts to be executed. + ![](https://user-images.githubusercontent.com/40550942/180517816-e91b8803-57c4-422b-9f7d-e4a949f02fc0.png) + ![](https://user-images.githubusercontent.com/40550942/180517834-d15981b1-dd13-470d-8989-9d5872d44870.png) + +## Error handling + +- [ ] Approved? + +> Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +> through the APIs? How will users be informed about these errors? + +This entire tech design is about error handling πŸ˜… : + +- UnknownComponents render error: handle instructions described in solution explanation +- HTML comments break MDX parsing error: handle instructions described in solution explanation +- Parsing error - handle instructions described in solution explanation +- Full page error - handle instructions described in solution explanation +- Failure to retrieve remote mdx contents: handle instructions described in solution explanation + +## Test strategy + +- [ ] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) + +> What interesting edge cases should be considered and tested? + +Will use monorepo's standard unit testing library, 'ava'. Will have mdx local files inside package +to test against various use-cases: + +- Mdx with no errors: should stay exactly the same +- Mdx with Custom (supported) component: should stay exactly the same +- Mdx with Unknown component: Unknown component should be replaced by supplied FallbackComponent +- Mdx with Exports: + - should throw `ExportFoundException` if `!config.allowExports` + - should stay exactly the same if `config.allowExports` +- Mdx with Imports: + - should throw `ImportFoundException` if `!config.allowImports` + - should stay exactly the same if `config.allowImports` +- Mdx with HTML comments: + - if `config.stripHTMLComments` calling processor.parse should result in comments being removed + from string + - if `!config.stripHTMLComments` calling processor.parse should result in no changes +- Mdx with replacement tag: + - component that matches tag should be replaced by supplied replacement function + +Parse remote files into MDAST, use mdxSanitizer to "sanitize" content, then manually "visit" the +resulting tree to confirm what was expected to be added/removed happened successfully + +## Logging + +- [ ] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. + +- These "error" cases are considered to be fairly common and can mostly be solved by the user so + we'd want to steer away from filling our logs with them. +- For FullPageError when parsed mdx can't be rendered due to an error, log a "warning" message in + case we need to track special cases. + +## File and code layout + +- [ ] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +- packages/mdx-sanitizer + + - /src/ + - main/ + - mdx-sanitizer-plugin.ts + - index.ts + - interfaces.ts + - exceptions/\* + - test/ + - mdx-sanitizer-plugin.test.js + - test-files/\* + +- services/web-app/utils/mdx.js <- will be used to create a function that will generate the + mdx-processor that injects and consumes the plugin (this logic needs to be taken out of github.js + in the web-app) + +## Issue and work breakdown + +- [ ] Approved? + +> List any issues that should be created prior to starting work on this feature. + +**Epics** + +- #994 + +**Issues** + +- #752 +- #995 +- #996 +- #1087 +- #1123 diff --git a/tech-designs/td-05-mdx-media-mapper.md b/tech-designs/td-05-mdx-media-mapper.md new file mode 100644 index 000000000..b6998ae63 --- /dev/null +++ b/tech-designs/td-05-mdx-media-mapper.md @@ -0,0 +1,196 @@ +# td-05 - MDX media mapper + +**Status:** Draft πŸ“ + + + +## Summary + +> Describe the new feature from a technical perspective. + +> Describe the problem solved by this feature. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +Problem: 1 - Relative URL images/videos/svgs will not render because we don't have them locally 2 - +We want to use Next/Image because it interacts better with the web app and we get optimization for +free 3 - SVGs straight-up do not work ATM + +Solution: Create a [PRIVATE] "mdx-media-mapper" package that exports a plugin that we can inject +into the MDX parser to find and "correct" images & videos, that means: For all images and videos: + +- Convert relative urls into absolute URLs (mind security, can't be another repo) - Will need to + receive the basePath as a parameter to do this - Will need to add `'?raw=true'` to get media + resource instead of github html contents +- Security: ensure when creating absolute URLS from relative URLs that the base + (github/[org]/[repo]) stays the same (do not allow changing repos) + +For all images: + +- Run each image through the placeholder library to generate placeholder base 64-encoded images and + get image dimensions +- Modify at the AST level so the returned markdown uses Next.js Image component with the placeholder + props merged in Modify markdown image to accept extra props +- Investigate/Fix if needed: + [https://github.com/carbon-design-system/carbon-platform/issues/767](https://github.com/carbon-design-system/carbon-platform/issues/767) + +This package will be used from the web-app service at the time of string->mdast conversion (in the +mdx parser). + +## Assumptions + +- [x] Approved? +- We'll build this to work on open-source only (no auth needed) and we can scale it to include + authorization (maybe inside url with access tokens) in later versions. + +## Research + +- [x] Approved? + +- **Unanswered questions** Β  -Β  Will we be able to render SVGs (Re: issue #767)? Assuming yes, just + need to take a closer look into why it's not rendering and if it's fixed by just the image mapper + at all. Β  - Design: how do we show "this image/video is considered dangerous due to its url....." + type thing? Β  - Will onError, onLoad definitely be cleared out? if not how should we handle it? +- **New technologies** + - unist-util-remove:Β  \*\*Only if we need to remove tree nodes (e.g.: dangerous images) + - plaiceholder: gives us image blurs + - unist-util-visit: Find nodes in tree that meet certain criteria (i.e.: images, videos.) +- **Proofs of concept** Β  - Working POC: + [https://github.com/carbon-design-system/carbon-platform/pull/739](https://github.com/carbon-design-system/carbon-platform/pull/739/files) + Β  - Plaiceholder implementation with remote images POC: + [https://github.com/carbon-design-system/carbon-platform/pull/551](https://github.com/carbon-design-system/carbon-platform/pull/551) + +## UI/UX design + +- [x] Approved? + +Does this feature have any associated UI/UX? If so, describe any design that needs to be +completed/red-lined prior to development. -- TBD: + +- unsupported url error. + +## APIs + +- [x] Approved? + +- **Programmatic APIs** Β  - List any APIs that will be developed and made available in the + `@carbon-platform/api` package, Β Β Β  including function/class/method names, parameters, and return + values. -- + - @carbon-platform/api/mdx-media-mapper Exports: - mdxMediaMapperPlugin(basePath): Function + (mdxPlugin type) +- **Data graph** Β  - N/A +- **Messages** Β  - N/A + +## Security + +- [x] Approved? + +What new data is created/stored/collected/transmitted by this feature? How is that data secured? Who +is allowed to access it? How is that access controlled? + +## Think like a hacker. How might someone attempt to break or abuse this feature? + +- Potential dangerous image relative URLs: we can use the `urlsMatch` function in + services/web-app/utils/url to compare the baseUrl vs the resulting url to discriminate against a + good and bad url and then handle it appropriately. +- embedding malicious code on image/image metadata: - Tried image with onerror, onload function that + would trigger alerts + ([https://github.com/francinelucca/mdx-testing/blob/main/test-img.mdx](https://github.com/francinelucca/mdx-testing/blob/main/test-img.mdx)) + but look like next-mdx-remote is clearing those out: - (these could also be a result of Next/Image + clearing those events, see comment on + [https://github.com/carbon-design-system/carbon-platform/blob/1a35534bdba33f160d40bc6941538d9824881433/services/web-app/components/asset-catalog-item/asset-catalog-item.js#L28](https://github.com/carbon-design-system/carbon-platform/blob/1a35534bdba33f160d40bc6941538d9824881433/services/web-app/components/asset-catalog-item/asset-catalog-item.js#L28)) + image + +- Really large files can slow down the website. - We're going to assume this risk at the moment. + This is not an issue that we're seeing currently with any media in platform and a really large + image/video would only slow down the particular page (route) where that image is being displayed + and shouldn't affect any other parts of the web application since we're not serving those files + ourselves, they're remote files. Currently, all content loaded into platform comes from a library + that is explicitly declared in our allow-list so there's a certain sense of trust. We can address + this in the future if it ever becomes a problem. For example: + + - Could use something like + [https://bitexperts.com/Question/Detail/3316/determine-file-size-in-javascript-without-downloading-a-file](https://bitexperts.com/Question/Detail/3316/determine-file-size-in-javascript-without-downloading-a-file) + obtain file size and have a size cap to the media? (this would add an extra request for every + single image/video…) + +- The remote url could link to a resource with malicious code, I don’t think there’s anything we can + do about that, see first answer: + [https://stackoverflow.com/questions/3114301/can-something-bad-happen-via-img-src](https://stackoverflow.com/questions/3114301/can-something-bad-happen-via-img-src) + +## Error handling + +- [x] Approved? + +Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +through the APIs? How will users be informed about these errors? -- + +- Image not found: we can go-by without handling this, it's a user error, just make sure the plugin + doesn't break + +## Test strategy + +- [x] Approved? + +How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +etc.) + +## What interesting edge cases should be considered and tested? + +Will use monorepo's standard testing library, 'ava' to test with a remote test repository (will need +to construct) with test images/videos/files + +- Relative src images/videos +- Absolute src images/videos +- SVG, JPEG/JPG, PNG, GIF +- Image Not Found (just expect the plugin not to break) +- Malicious URL + +Parse remote files into MDAST, use mdxMediaMapper to "map" content, then manually "visit" the +resulting tree to confirm what was expected to be added/removed happened successfully. + +## Logging + +- [x] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. We're +> thinking not much info generated worth logging here, get the team's perspective on this when we +> play it back. + +## File and code layout + +- [x] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +--- + +- packages/api/mdx-sanitizer + - /src/ + - main/ + - mdx-media-mapper-plugin.js (entry point) + - test/ + - mdx-media-mapper-plugin.test.js + - test-files/\* + +## Issue and work breakdown + +- [x] Approved? + +> List any issues that should be created prior to starting work on this feature + +- **Epics** + - MdxMediaMapper +- **Issues** + - Design: malicious url UI + - Create MdxMediaMapper package + - Test repo + Data + - Test MdxMediaMapper plugin + - SVGs not working:Β  + [https://github.com/carbon-design-system/carbon-platform/issues/767](https://github.com/carbon-design-system/carbon-platform/issues/767) diff --git a/tech-designs/td-06-mdx-to-html-microservice.md b/tech-designs/td-06-mdx-to-html-microservice.md new file mode 100644 index 000000000..b2eb503dc --- /dev/null +++ b/tech-designs/td-06-mdx-to-html-microservice.md @@ -0,0 +1,300 @@ +# td-06 - MDX-to-HTML microservice + +**Status:** Approved βœ… + + + +## Summary + +> Describe the new feature from a technical perspective. + +This feature is for a microservice whose responsibility is to accept MDX as input and generated +sanitized HTML as output. It will make use of the `@carbon-platform/mdx-sanitizer` plugin to +accomplish this. + +The microservice will be called `mdx-converter`. + +**Optional goal:** Cache conversions by hashing the input string and mapping it to the generated +output string. + +Future enhancement: Convert to other output formats besides HTML (if it ever makes sense to do so). + +A package called `@carbon-platform/mdx-components` will be created for use by the microservice +itself. This package will have all of the mdx components currently housed in the web-app in a +standalone package that can be used in both places. This package will represent a comprehensive set +of all JSX components that can be used in MDX. + +> Describe the problem solved by this feature. + +This allows MDX to be processed into HTML in a secure, controlled, and isolated manner outside of +the web-app. The primary motivation of this is to allow top-level runtime JSX errors to be caught +and handled gracefully. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +It will provide a messaging interface to be used by other services. It will not communicate to any +external services. + +> What needs to be in place prior to this feature being developed? + +- `mdx-sanitizer` plugin package needs to be integrated into `main` since the microservice depends + on it. + +> What assumptions are being made about those dependencies or about the feature itself? + +- The maximum input size of MDX will be 1 MB. Output size may end up larger than this, but will + remain under the RabbitMQ message threshold of 128 MB. + +> What noteworthy things are considered "out of scope" for this feature? + +- The development of the actual mdx sanitizer plugin is not in scope for this microservice. + +## Research + +- [x] Approved? + +**Unanswered questions** + +> List any unknowns and unanswered questions that need answers prior to beginning development. Note +> that **all** of these questions should be answered prior to approving the Research section. + +- [x] How do we get the usable React components over to the service to use as input? + - There are two ways to accomplish this, but they both involve the service itself importing the + components for use. + - First, Carbon react components (`@carbon/react`) can be used by importing them, provided the + ultimate displayer of the output HTML has imported carbon styles in the web-app. + - Second, A standalone component library can be used to house "custom" components for the platform + and import them in the same way. +- [x] What about the fallback component? + - This should be either internal to the microservice and provided as-is, or manifested to the + web-app as an error, which then means that the web-app has to handle this error and convert to + UI element. +- [x] What about the mapping of disallowed components to replacement functions? Do we delegate that + to the caller and return a list of errors to them? + - No, this mapping should be internal to the microservice and not configurable. The sanitizer + _package_ can provide this level of configuration, but the microservice (as a user of this + package) should not allow further configuration by users. + - The error components themselves could live in the `mdx-components` package, since they're + technically MDX-rendered components. +- [x] Can we still use global overrides to the base carbon components/classes since we're not having + things in modules anymore? + - Yes, this just turns into business-as-usual css selectors based off of the carbon component + class names. +- [x] How many components are outside of the mdx folder? What is the lift to package-ify them? Are + they all using self-contained scss modules currently? + - Will assess this during development. + +**New technologies** + +> List new technologies that need to be explored in detail before beginning development. + +- `@mdx-js/mdx` used as the MDX compilation and evaluation engine. Takes MDX text as input and + produced JS as output (as a React component) +- `ReactDOMServer` - used to convert a react component to HTML in a server-side environment + +**Proofs of concept** + +> List proofs of concept that need to be completed before beginning development. + +- [x] Remote MDX exploration using `@mdx-js/mdx` directly and funneling that to + `ReactDOMServer.renderToStaticMarkup` + - We were able to prove that this is doable and even accepts the same plugins we were using + previously for remark and rehype. +- [x] Feasibility of using components which currently exist in + `service/web-app/components/mdx/components.js` in a standalone package used by the + mdx-converter microservice. + + - I was able to prove that it is 100% possible to offload these components to their own package. + That package can then be imported into the microservice for use during mdx processing and react + SSR. Here's some example code: + + ```ts + import { Button } from '@carbon/react' + import { ColorBlock } from '@carbon-platform/mdx-components' + import { evaluate } from '@mdx-js/mdx' + import * as runtime from 'react/jsx-runtime.js' + import { renderToString } from 'react-dom/server.js' + + const MdxContent = ( + await evaluate('<>#ff0000', { ...runtime }) + ).default + + const output = renderToString(MdxContent({ components: { Button, ColorBlock } })) + console.log(output) + ``` + + yields: + + ```html + +
+ +
+ ``` + +## UI/UX design + +- [x] Approved? + +> Does this feature have any associated UI/UX? If so, describe any design that needs to be +> completed/red-lined prior to development. + +None. + +## APIs + +- [x] Approved? + +**Programmatic APIs** + +> List any APIs that will be developed and made available in the `@carbon-platform/api` package, +> including function/class/method names, parameters, and return values. If this tech design +> describes a new monorepo package, detail the APIs and exports of that package. + +- `@carbon-platform/api/mdx-converter` + - `MdxConverter` class containing: + - `toHtml(inputMdx: string): string` Which takes an MDX string as input, sanitizes it, and + converts it to an HTML string as output. + +**Data graph** + +> List any new query resolvers and/or data models being included in the data-graph and/or data-graph +> API package. + +None. + +**Messages** + +> List any new/changed RabbitMQ messages introduced by this feature, the message payload structure +> associated with them, whether they are queries or emits, and their expected return values (for +> queries). + +- New message: `mdx_to_html` + - Takes a `string` as its payload and responds with a response object containing: + - `html: string` - the actual output upon successful conversion + - `errors: Array` - a list of errors that were encountered during the conversion + +## Security + +- [x] Approved? + +> What new data is created/stored/collected/transmitted by this feature? How is that data secured? +> Who is allowed to access it? How is that access controlled? Think like a hacker. How might someone +> attempt to break or abuse this feature? + +The biggest security concern with this feature is the use of +[`evaluate()`](https://mdxjs.com/packages/mdx/#evaluatefile-options). It's docs state: + +> [Compile](https://mdxjs.com/packages/mdx/#compilefile-options) and +> [run](https://mdxjs.com/packages/mdx/#runfunctionbody-options) MDX. When possible, please use +> compile, write to a file, and then run with Node, or use one of the +> [Β§ Integrations](https://mdxjs.com/getting-started/#integrations). But if you trust your content, +> evaluate can work. + +We are running under the assumption that we can do enough pre-processing of the input MDX to make is +"safe" to pass through the `evaluate` function without adverse results. This is accomplished mainly +by the use of the `mdx-sanitizer` plugin, which will remove things like script tags, +imports/exports, and other unsupported markup. + +In the worst case, this evaluation is confined to a single microservice, so there is low risk of any +unexpected behavior extending beyond the service to the rest of the platform. + +MDX data (which is potentially confidential or internal) will be sent encrypted across the message +broker infrastructure. The mdx-converter service may cache conversions, which contain html +variations of the input mdx. Any cached conversions should be stored in either: + +1. in encrypted files in ephemeral storage that is destroyed when the service exits; or +2. in an external, encrypted storage bucket or cache with controlled access. + +If neither of these can be accomplished within the allotted time, no caching should be performed of +responses. + +## Error handling + +- [x] Approved? + +> Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +> through the APIs? How will users be informed about these errors? + +There are many error conditions that can be encountered during parsing and evaluation. These are +outlined in #952. Beyond this, the following additional errors will be propagated: + +- "runtime" JSX evaluation errors (from React SSR) will also be propagated as thrown exceptions by + the API package utility method(s) outlined above. +- input validation error: input too large (cap the input before sending to messaging to prevent DoS + attacks). + +## Test strategy + +- [x] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) What interesting edge cases should be considered and tested? + +Normal unit test coverage will be accomplished. + +**Interesting test cases** + +- malformed input +- input that is too large ( > 1MB) +- input that results in JSX runtime errors +- no timely response from service when invoking API method + - (this is left up to the caller of the API to decide and implement) + +## Logging + +- [x] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. + +The request log interceptor will automatically log data about requests sent to the microservice. + +Parsing errors and warnings will be logged by the microservice. + +Details about caching will be info/debug logged, where appropriate. + +## File and code layout + +- [x] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +New service: `services/mdx-converter` (`@carbon-platform/mdx-converter`). This will be a standard +Carbon Platform NestJS microservice. + +Package update: `packages/api`: New subdirectory will be added at +`packages/api/src/main/mdx-converter`. This will contains the programmatic APIs detailed above. This +will also provide the message output type to be provided to the messaging `interfaces.ts` file. + +Package update: `packages/api/src/main/messaging/interfaces.ts`: New message type will be added. + +Out of scope, but still needed: New package: `packages/mdx-components` with all allowable MDX +components in it. + +## Issue and work breakdown + +- [x] Approved? + +> List any issues that should be created prior to starting work on this feature. + +**Epics** + +- #994 + +**Issues** + +- #1098 +- #1099 +- #1100 +- #1101 +- #1102 +- #1103 +- #1104 +- #1105 +- #1106 diff --git a/tech-designs/td-07-github-microservice.md b/tech-designs/td-07-github-microservice.md new file mode 100644 index 000000000..a06988906 --- /dev/null +++ b/tech-designs/td-07-github-microservice.md @@ -0,0 +1,122 @@ +# td-07 - GitHub microservice + +**Status:** Draft πŸ“ + + + +## Summary + +> Describe the new feature from a technical perspective. + +> Describe the problem solved by this feature. + +> Describe how it integrates/relates/communicates with existing features/packages/services. + +> What needs to be in place prior to this feature being developed? + +> What assumptions are being made about those dependencies or about the feature itself? + +> What noteworthy things are considered "out of scope" for this feature? + +## Research + +- [ ] Approved? + +**Unanswered questions** + +> List any unknowns and unanswered questions that need answers prior to beginning development. Note +> that **all** of these questions should be answered prior to approving the Research section. + +**New technologies** + +> List new technologies that need to be explored in detail before beginning development. + +**Proofs of concept** + +> List proofs of concept that need to be completed before beginning development. + +## UI/UX design + +- [ ] Approved? + +> Does this feature have any associated UI/UX? If so, describe any design that needs to be +> completed/red-lined prior to development. + +## APIs + +- [ ] Approved? + +**Programmatic APIs** + +> List any APIs that will be developed and made available in the `@carbon-platform/api` package, +> including function/class/method names, parameters, and return values. If this tech design +> describes a new monorepo package, detail the APIs and exports of that package. + +**Data graph** + +> List any new query resolvers and/or data models being included in the data-graph and/or data-graph +> API package. + +**Messages** + +> List any new/changed RabbitMQ messages introduced by this feature, the message payload structure +> associated with them, whether they are queries or emits, and their expected return values (for +> queries). + +## Security + +- [ ] Approved? + +> What new data is created/stored/collected/transmitted by this feature? How is that data secured? +> Who is allowed to access it? How is that access controlled? Think like a hacker. How might someone +> attempt to break or abuse this feature? + +## Error handling + +- [ ] Approved? + +> Ignore the happy path. What can go wrong with this feature? How will the error conditions manifest +> through the APIs? How will users be informed about these errors? + +## Test strategy + +- [ ] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) What interesting edge cases should be considered and tested? + +## Logging + +- [ ] Approved? + +> Detail any FFDC data (info, warning, error, debug logs) to be captured by this feature. Pretend +> you're on-call for supporting the Platform. In the event something breaks and all you have to go +> by is a list of log entries (i.e. you can't reproduce the failure yourself, but users are +> reporting problems), what information would you need to be able to pinpoint the source of a +> production-site failure? Additional info: [Logging service](/docs/services-logging.md) + +## File and code layout + +- [ ] Approved? + +> Describe how the files and code for this feature will fit into the rest of the mono repo. Will +> there be a new package/service? Are there existing files/directories in which the new logic should +> live? + +## Issue and work breakdown + +- [ ] Approved? + +> List any issues that should be created prior to starting work on this feature. + +**Epics** + +- [ ] + +**Issues** + +- [ ] diff --git a/tech-designs/td-08-rmdx-remote-mdx.md b/tech-designs/td-08-rmdx-remote-mdx.md new file mode 100644 index 000000000..830280d23 --- /dev/null +++ b/tech-designs/td-08-rmdx-remote-mdx.md @@ -0,0 +1,208 @@ +# td-08 - RMDX (Remote MDX) + +**Status:** Approved βœ… + + + +## Summary + +This feature describes the usage of remote mdx in a secure way that is not vulnerable to code +injection and arbitrary code execution (ACE) attacks. This will supersede existing MDX processing +and provide stricter parsing and rendering. This means less overall customization, but a +significantly more secure implementation. + +RMDX will be used in two places: + +1. A microservice (called `rmdx-processing`) which can translate source MDX into a sanitized + abstract syntax tree (AST), similar to that which would be retrieved from a CMS API such as + Contentful. +2. A set of utility React components and functions that can be used to render a sanitized AST as a + set of react components. + +The set of components rendered via the RMDX utilities is not defined as part of this tech design, +and is instead expected to be provided as a "map" to the utility (additional details below). Having +the interface act as a mapping will allow any arbitrary set of components to be used during +translation. + +The goal is to have the RMDX utilities generate an AST that is as close to the Contentful data model +as possible. This will make migration between the two as easy as possible. + +The maximum input size of MDX will be 1 MB. Output size may end up larger than this, but will remain +under the RabbitMQ message threshold of 128 MB. + +## Research + +- [x] Approved? + +https://app.mural.co/t/ibm14/m/ibm14/1667230506318/3c007d2b56bfc0b1c820e15d7d946285da5ae4a2?sender=jdharvey8136 + +https://github.com/contentful/rich-text/tree/master/packages/rich-text-types + +#1073 + +**Unanswered questions** + +None + +**New technologies** + +None + +**Proofs of concept** + +- [x] Go from mdx -> mdast -> JSON -> react components + - This works as expected (see mural for details) + +## UI/UX design + +- [x] Approved? + +None + +## APIs + +- [x] Approved? + +**Programmatic APIs** + +New package: `rmdx` + +This will export the utilities for converting to and working with the MDX-based AST. + +`process(srcMdx: string): AST` - Returns an RMDX AST given an input string + +`` - React component which takes an RMDX AST as input along +with a `components` map, which maps AST node types to React components for rendering. The mapped +components are given `children` to render as well as any relevant scalar props from the source MDX. + +**Data graph** + +There will eventually be an `rmdx` resolver for asset doc pages, however since there is not yet an +asset resolver, this will probably be deferred until later. + +**Messages** + +**query**: `rmdx` A request/response based message to get a processed RMDX result, given an input +string of raw MDX source + +```ts +// query message +interface RmdxMessage { + srcMdx: string // Max size = 1 MB +} + +interface RmdxResponse { + ast: Node // Either a unist tree or a custom AST similar to Contentful's model + errors: Array // List of errors encountered during processing +} +``` + +**Future**: Should eventually respond to an `asset_discovered` message by pre-caching processed RMDX +in an LRU cache. + +## Security + +- [x] Approved? + +**MDX things that will not work under RMDX:** + +- Inline JSX blocks (outside of components) +- Imports/exports/variable assignments +- Properties on JSX elements which are not a number, boolean, or string (i.e. no functions, arrays, + or objects) + +## Error handling + +- [x] Approved? + +Error handling should have feature parity with existing mdx processing. + +TODO: Need to figure out the best approach for transmitting errors back to the caller. Tentative +approach: Errors in the returned list of errors are numbered, and there are AST nodes in the +returned RMDX which call out particular error numbers (and types), so knowing what to render is +accomplished via a "lookup map". + +example: + +```json +{ + "ast": [ + { + "nodeType": "h1", + "value": "this is a header" + }, + { + "nodeType": "Error", + "errorIndex": 0 + } + ], + "errors": [ + { + "exception": "ImportFoundException", + "line": 123, + "text": "import thing from 'thing'" + } + ] +} +``` + +`const error = theErrorrmdx.errors[0]` + +## Test strategy + +- [x] Approved? + +> How will the new feature be tested? (e.g. unit tests, manual verification, automated e2e testing, +> etc.) What interesting edge cases should be considered and tested? + +76+% unit test coverage of all new code. + +Test existing known MDX exploits to ensure they can't be performed against RMDX + +## Logging + +- [x] Approved? + +- Log incoming requests to process MDX +- Log processing failures +- Warn log when encountering portions of MDX that need to be removed for security reasons + +## File and code layout + +- [x] Approved? + +Rough file layout: + +- packages + - api + - rmdx-processing + - RmdxMessage + - RmdxResponse + - query_rmdx + - rmdx + - `process` + - `RmdxNode` +- services + - rmdx-processing + - rmdx-controller + - rmdx-service + +## Issue and work breakdown + +- [x] Approved? + +**Epics** + +- #1491 + +**Issues** + +- #1492 +- #1493 +- #1494 +- #1495 +- #1496