From 06579a9c4c8e3c5d658eb52158cb3fdaefc27b77 Mon Sep 17 00:00:00 2001 From: Steven Maillet Date: Wed, 19 Nov 2025 08:22:49 -0800 Subject: [PATCH] Preparations for final release * Updates/Corrections and clarifications to docs * Updates to NuGet dependencies to non-preview version * Updates to repo readme --- .editorconfig | 27 +++++---- Directory.Packages.props | 23 +++++--- IgnoredWords.dic | 9 +++ .../Initialize-CommonBuildEnvironment.ps1 | 6 +- .../CommonBuild/Public/Show-FullBuildInfo.ps1 | 2 +- README.md | 55 +++++++++--------- docfx/ReadMe.md | 8 +-- docfx/index.md | 10 ++-- docfx/llvm/articles/InternalDetails/index.md | 18 +++--- .../articles/InternalDetails/llvm-handles.md | 23 +++++--- .../InternalDetails/marshal-LLVMBool.md | 12 ++-- .../InternalDetails/marshal-string.md | 7 +++ docfx/llvm/articles/Samples/index.md | 2 +- docfx/llvm/index.md | 56 +++++++++++-------- .../namespaces/Ubiquity.NET.Llvm.DebugInfo.md | 24 ++++---- .../Ubiquity.NET.Llvm.Transforms.Legacy.md | 2 +- docfx/templates/Ubiquity/public/main.css | 2 +- src/Analyzers/ReadMe.md | 4 +- src/Interop/LlvmBindingsGenerator/ReadMe.md | 45 ++++++++------- src/Interop/readme-first.md | 19 +++---- .../CodeGenWithDebugInfo/codegeneration.md | 34 +++++------ .../Kaleidoscope/Chapter2/Kaleidoscope-ch2.md | 36 +++++++----- .../Kaleidoscope/Chapter3/Kaleidoscope-ch3.md | 2 +- .../Kaleidoscope/Chapter4/Kaleidoscope-ch4.md | 3 +- .../Kaleidoscope/Chapter5/Kaleidoscope-ch5.md | 6 +- .../Kaleidoscope/Chapter6/Kaleidoscope-ch6.md | 17 +++--- .../Chapter7.1/Kaleidoscope-ch7.1.md | 18 +++--- .../Kaleidoscope/Chapter7/Kaleidoscope-ch7.md | 16 +++--- .../Kaleidoscope/Chapter8/Kaleidoscope-ch8.md | 4 +- src/Samples/Kaleidoscope/IgnoredWords.dic | 7 +++ .../Kaleidoscope/Kaleidoscope-Overview.md | 5 +- .../Kaleidoscope-Runtime.md | 13 +++-- src/Ubiquity.NET.Llvm/ReadMe.md | 2 + 33 files changed, 289 insertions(+), 228 deletions(-) diff --git a/.editorconfig b/.editorconfig index e55c230dc..c48655959 100644 --- a/.editorconfig +++ b/.editorconfig @@ -28,9 +28,12 @@ indent_size = 2 tab_width = 2 [*.md] -# mark left margion for split screen preview of markdown files +# mark left margin for split screen preview of markdown files # requires: https://marketplace.visualstudio.com/items?itemName=PaulHarrington.EditorGuidelinesPreview guidelines = 92 +# VSSPELL: Markdown Files +vsspell_section_id = 3fa02c9f36bb41a5ac6206ceb9c564dc +vsspell_exclusion_expressions_3fa02c9f36bb41a5ac6206ceb9c564dc = ```[\s\S]*?$[\s\S]*?```$(?@@PND@@/Options/Multiline) # match ISO standard requirement for C/C++ [*.c,*.h,*.cpp] @@ -133,31 +136,31 @@ dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case dotnet_naming_symbols.interface.applicable_kinds = interface dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected -dotnet_naming_symbols.interface.required_modifiers = +dotnet_naming_symbols.interface.required_modifiers = dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected -dotnet_naming_symbols.types.required_modifiers = +dotnet_naming_symbols.types.required_modifiers = dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected -dotnet_naming_symbols.non_field_members.required_modifiers = +dotnet_naming_symbols.non_field_members.required_modifiers = # Naming styles dotnet_naming_style.begins_with_i.required_prefix = I -dotnet_naming_style.begins_with_i.required_suffix = -dotnet_naming_style.begins_with_i.word_separator = +dotnet_naming_style.begins_with_i.required_suffix = +dotnet_naming_style.begins_with_i.word_separator = dotnet_naming_style.begins_with_i.capitalization = pascal_case -dotnet_naming_style.pascal_case.required_prefix = -dotnet_naming_style.pascal_case.required_suffix = -dotnet_naming_style.pascal_case.word_separator = +dotnet_naming_style.pascal_case.required_prefix = +dotnet_naming_style.pascal_case.required_suffix = +dotnet_naming_style.pascal_case.word_separator = dotnet_naming_style.pascal_case.capitalization = pascal_case -dotnet_naming_style.pascal_case.required_prefix = -dotnet_naming_style.pascal_case.required_suffix = -dotnet_naming_style.pascal_case.word_separator = +dotnet_naming_style.pascal_case.required_prefix = +dotnet_naming_style.pascal_case.required_suffix = +dotnet_naming_style.pascal_case.word_separator = dotnet_naming_style.pascal_case.capitalization = pascal_case dotnet_style_coalesce_expression = true:warning dotnet_style_null_propagation = true:warning diff --git a/Directory.Packages.props b/Directory.Packages.props index a70281288..e8a728cc8 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -39,9 +39,15 @@ - + - + @@ -51,14 +57,13 @@ - - - - - - + + + + + - + diff --git a/IgnoredWords.dic b/IgnoredWords.dic index 7f248282a..bb182622d 100644 --- a/IgnoredWords.dic +++ b/IgnoredWords.dic @@ -77,6 +77,7 @@ getelementptr getters gh github +gitignore Globalization Hashtable Identifier @@ -88,12 +89,15 @@ inlined inlining Interop ints +Invokable jit len Lexer +libclang LibLLVM Llilum llvm +llvmroot llvmversion lookups LValue @@ -110,6 +114,8 @@ minimalistic Mips msbuild msg +namespace +namespaces nav nint noinline @@ -125,6 +131,7 @@ outdent pages paren perf +performant pointee polyfills pragma @@ -156,12 +163,14 @@ telliam templated templating tl +tocyml trx typdef Typedef typedefs typelib typeof +Uber uid uint unaryop diff --git a/PsModules/CommonBuild/Public/Initialize-CommonBuildEnvironment.ps1 b/PsModules/CommonBuild/Public/Initialize-CommonBuildEnvironment.ps1 index 9227e661a..3a0bd3537 100644 --- a/PsModules/CommonBuild/Public/Initialize-CommonBuildEnvironment.ps1 +++ b/PsModules/CommonBuild/Public/Initialize-CommonBuildEnvironment.ps1 @@ -52,6 +52,7 @@ function Initialize-CommonBuildEnvironment [string]$repoRoot, [switch]$FullInit ) + try { # Script code should ALWAYS use the global CurrentBuildKind @@ -101,9 +102,12 @@ function Initialize-CommonBuildEnvironment # "profile" and the actual command is exposed. if($null -eq (Find-OnPath vswhere)) { - # NOTE: automated builds in Github do NOT include WinGet (for reasons unknown) + # NOTE: automated builds in Github do NOT include winget (for reasons unknown) # However, they do contain VSWHERE so should not hit this. winget install Microsoft.VisualStudio.Locator | Out-Null + + # location is fixed and the same no matter what version! + $env:PATH +=';%ProgramFiles(x86)%\Microsoft Visual Studio\Installer' } $vsShellModulePath = vswhere -latest -find **\Microsoft.VisualStudio.DevShell.dll diff --git a/PsModules/CommonBuild/Public/Show-FullBuildInfo.ps1 b/PsModules/CommonBuild/Public/Show-FullBuildInfo.ps1 index 68961ab21..6c71c76fb 100644 --- a/PsModules/CommonBuild/Public/Show-FullBuildInfo.ps1 +++ b/PsModules/CommonBuild/Public/Show-FullBuildInfo.ps1 @@ -9,7 +9,7 @@ function Show-FullBuildInfo properties so that the full details are available in logs. .DESCRIPTION - This function displays all the properties of the buildinfo to the information stream. Additionally, + This function displays all the properties of the build info to the information stream. Additionally, details of the current PATH, the .NET SDKs and runtimes installed is logged to the Verbose stream. #> Param($buildInfo) diff --git a/README.md b/README.md index 16ecfd898..108ac307c 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,15 @@ For details of releases, see the [release notes](https://github.com/UbiquityDotN Ubiquity.NET.Llvm provides LLVM language and runtime bindings for .NET based applications. Ubiquity.NET.Llvm's goal is to provide a robust Class library that accurately reflects the underlying LLVM C++ model. This is done through an extended LLVM-C API bundled as a native -library (LibLLVM). Ubiquity.NET.Llvm uses the support of LibLLVM to gain access to the LLVM -class library and project it into a .NET managed library that reflects the original class -library design as best as possible. +library (`Ubiquity.NET.LibLLVM`). Ubiquity.NET.Llvm uses the support of LibLLVM to gain +access to the LLVM class library and project it into a .NET managed library that reflects +the original class library design as best as possible. The goal is to match the original class model as closely as possible, while providing an object model to .NET applications that feels familiar and consistent with common styles and patterns in .NET Framework applications. Thus, while class, method and enumeration names are -similar to their counterparts in LLVM, they are not always identical. +similar to their counterparts in LLVM, they are not always identical (especially casing and +use of `_`). ### Brief History Ubiquity.NET.Llvm was initially developed as a means to leverage LLVM as the back-end for an @@ -36,24 +37,25 @@ code generator that was tied to the ARMv4 Instruction set. ([Llilum](https://www Ubiquity.NET.Llvm has continued to evolve and improve and remains a distinct project as it has no dependencies on Llilum or any of its components. Ubiquity.NET.Llvm is viable for any .NET applications wishing to leverage the functionality of the LLVM libraries from .NET -applications. +applications. In fact, it's most common use now is for supporting JIT execution of Domain +Specific Languages (DSL) though it is not limited to that as the [Kaleidoscope Tutorials](#kaleidoscope-tutorial) +show an interactive JIT implementation along with full AOT compilation. Ubiquity.NET.Llvm began with LLVM 3.4 as a C++/CLI wrapper which enabled a closer binding to the original C++ object model then the official LLVM-C API supported. As Ubiquity.NET.Llvm progressed so to did LLVM. Eventually, the LLVM code base migrated to requiring C++/11 support in the language to build. This became an issue for the C++/CLI wrapper as the -Microsoft C++/CLI compiler didn't support the C++11 syntax. Thus a change was made to -Ubiquity.NET.Llvm to move to an extended C API with a C# adapter layer to provide the full -experience .NET developers expect. While the transition was a tedious one very little -application code required changes. LLVM and Ubiquity.NET.Llvm have continued to progress and -Ubiquity.NET.Llvm is currently based on LLVM 20.1.x. +Microsoft C++/CLI compiler didn't support the `C++11` syntax. Thus, a change was made to +`Ubiquity.NET.Llvm` to move to an extended C API with a C# adapter layer to provide the full +experience .NET developers expect. While the transition was a tedious one, very little +application code required changes. LLVM and `Ubiquity.NET.Llvm` have continued to progress +and `Ubiquity.NET.Llvm` is currently based on LLVM 20.1.x. There are a few major goals of the current release that required breaking changes: 1) AOT compilation of applications leveraging this library - - While this goal is not yet fully realized many steps were taken to aid in getting - there. 2) Platform independence - - Again, not fully realized yet but many steps were taken to aid in getting there. + - Not fully realized in this release yet but many steps were taken to aid in getting + there. * The largest impediment is the massive resource requirements of building the native LLVM libraries for a given runtime. Building them runs afoul of the limitations of every available OSS not to mention exceeding the size of a NUGET package to host @@ -72,16 +74,16 @@ package is built for the "AnyCPU" platform and references the Ubiquity.NET.Llvm. package to bring in the native binary support. Ubiquity.NET.Llvm.Interop contains code to dynamically detect the platform it is running on and load the appropriate native library. This allows applications to build for AnyCPU without creating multiple build configurations -and release vehicles for applications. (Any new platforms would need to update the dynamic -loading support and include the appropriate P/Invokable binaries - consuming apps would not -need to change except to pick up a new package version.) +and release vehicles for applications. Any new platforms would need to update the dynamic +loading support and include the appropriate P/Invokable binaries. Consuming apps would not +need to change except to pick up a new package version. ### CI Build NuGet Packages -The CI Builds of the NuGet package built from the latest source in the master branch are -available as build artifacts from the build. Unfortunately with an all GitHub build via -GitHub Actions we don't have a good story for accessing the packages from unreleased -automated CI builds. While GitHub does support a package registry (GPR), it really doesn't -meet the needs of CI builds. In particular: +The CI Builds of the NuGet package built from the latest source in the `develop` branch are +available as build artifacts. Unfortunately with an all GitHub build via GitHub Actions we +don't have a good story for accessing the packages from unreleased automated CI builds. +While GitHub does support a package registry (GPR), it really doesn't meet the needs of CI +builds. In particular: * GPR Doesn't support deletion of older CI build packages (Cluttering the feed) * GPR requires complex login/Tokens just to get packages from the feed, despite being a public repository... @@ -92,7 +94,8 @@ used for publishing releases. (Official NuGet will serve that role for releases) and PR build packages are available as artifacts from the GitHub actions that build them. ### API Documentation -The full API documentation on using Ubiquity.NET.Llvm is available on the [Ubiquity.NET.Llvm documentation site](https://ubiquitydotnet.github.io/Llvm.NET/). +The full API documentation on using Ubiquity.NET.Llvm is available on the +[Ubiquity.NET.Llvm documentation site](https://ubiquitydotnet.github.io/Llvm.NET/). ### Sample Applications #### Code Generation With Debug Information @@ -123,13 +126,13 @@ the use of the library. #### Using Visual Studio The repository contains Visual Studio solution files that allow building the components -individually for modifying Ubiquity.NET.Llvm, as well as running the available unit tests -and samples. This is the primary mode of working with the Ubiquity.NET.Llvm source code +individually for modifying `Ubiquity.NET.Llvm`, as well as running the available unit tests +and samples. This is the primary mode of working with the `Ubiquity.NET.Llvm` source code during development. ### Replicating the automated build -The Automated build support for Ubiquity.NET.Llvm uses Build-All.ps1 PowerShell script to -build all the binaries and generate a NuGet package. To build the full package simply run +The Automated build support for `Ubiquity.NET.Llvm` uses `Build-All.ps1` PowerShell script +to build all the binaries and generate a NuGet package. To build the full package simply run `Build-All.ps1 -ForceClean` from a PowerShell command prompt with MSBuild tools on the system search path. diff --git a/docfx/ReadMe.md b/docfx/ReadMe.md index 4a5dfcdfa..27df94914 100644 --- a/docfx/ReadMe.md +++ b/docfx/ReadMe.md @@ -18,7 +18,7 @@ constructed a complete custom template to deal with it... Sigh, what a waste of ## Changes Over Time DocFX has obsoleted the `docfxconsole` NuGet package that was used to run docfx for a project via MSBUILD. Instead it focused on a .NET tool to do it all via the command line. -Ultimately the docfx.json serves as the corellary to a "project" file for the different site +Ultimately the docfx.json serves as the corollary to a "project" file for the different site builds. The PowerShell script `Build-Docs.ps1` was updated to use the new tool directly. Using that script should have little or no impact on the overall flow. There is a "no-targets" project in the solution to enable easier access to the input files but does not @@ -78,9 +78,9 @@ Since this is generated it is listed in the [.gitignore](#gitignore) file. These folders (named after the `*` portion of the [api-*](#api-*) folder names contains manually written additional files, articles, samples etc... related to a given library. -## Guide to wrting XML DOC comments +## Guide to writing XML DOC comments When dealing with doc comments the XML can sometimes get in the way of general readability -of the source code. There is an inherent tension beween how a particular editor renders the +of the source code. There is an inherent tension between how a particular editor renders the docs for a symbol/method (VS calls this "Quick Info") and how it is rendered in the final documentation by a tool like docfx. This guides general use to simplify things as much as possible. @@ -144,7 +144,7 @@ render properly in final docs. everything is trimmed it is at least a distinct pattern that is readable. 5) ***DO NOT*** put lists in any place other than inside a `remarks` region a) Usually, the remarks comments are not even rendered as the most useful part is the - API signaure and parameter info. Different editors may allow control of that. + API signature and parameter info. Different editors may allow control of that. i) In VS [2019|2022] for C# it is controlled by `Text Editor > C# > Advanced > Editor Help: "Show remarks in Quick Info."` 1) Turning this off can greatly reduce the noise AND reduce the problems of diff --git a/docfx/index.md b/docfx/index.md index 9b8d79b74..b220db86d 100644 --- a/docfx/index.md +++ b/docfx/index.md @@ -3,7 +3,7 @@ Ubiquity.NET family of libraries provides support for a number of scenarios but focus is AOT code generation of .NET for Embedded systems. We aren't quite there yet, but are rather close. In the mean time this set of libraries provides the building blocks needed for creating a Domain Specific Language (DSL) implementation or custom language compiler, -including JIT execution. Several useful generalized libraries are also included. +including JIT execution. ## The Libraries[1](#footnote_1) in this repository @@ -12,7 +12,7 @@ including JIT execution. Several useful generalized libraries are also included. | [Ubiquity.NET.Llvm](llvm/index.md) | This library contains The core of the LLVM projection to .NET | --- -1 The Ubiquity.NET.Llvm.Interop is intentionally NOT documented. It is an internal -implementation detail subject to change in the future. There are plans to merge it with the -OO wrapper library. Therefore, applications should NOT depend on it as it is likely to cease -existing in the future. +1 The Ubiquity.NET.Llvm.Interop is intentionally NOT +documented. It is an internal implementation detail subject to change in the future. There +are plans to merge it with the OO wrapper library. Therefore, applications should NOT depend +on it as it is likely to cease existing in the future. diff --git a/docfx/llvm/articles/InternalDetails/index.md b/docfx/llvm/articles/InternalDetails/index.md index e461584d8..1d66384f1 100644 --- a/docfx/llvm/articles/InternalDetails/index.md +++ b/docfx/llvm/articles/InternalDetails/index.md @@ -1,13 +1,13 @@ # Internal details This section is focused on providing internal details of the Ubiquity.NET.Llvm -implementation for developers contributing to the contents of the Ubiquity.NET.Llvm library -itself. If you are only interested in using the `Ubiquity.NET.Llvm` APIs you don't need this -information, though it may satisfy curiosity 8^). +implementation for developers contributing to the contents of the `Ubiquity.NET.Llvm` +library itself. If you are only interested in using the `Ubiquity.NET.Llvm` APIs you don't +need this information, though it may satisfy curiosity :nerd_face:. ## Generate Handles -The source for the handles is generated from the headers by the LibLLVM repository build. -They are created by the `LLvmBindingsGenerator` from the headers contained in the -`Ubiquity.NET.LibLLvm` package. The LibLLVM package is a bundle of the RID neutral headers -along with any RID specific headers. It is ultimately a "Uber" package that references the -RID specific native libraries. This keeps the size of each package down to meet NuGet -standards. +The source for the handles is generated from the headers by the `:lvmBindingGenerator` from +the headers contained in the `Ubiquity.NET.LibLLvm` package. The LibLLVM package is a bundle +of the RID neutral headers along with any RID specific headers. It is ultimately a "meta" +package that references the RID specific native libraries. This keeps the size of each +package down to meet NuGet standards while allowing easy addition of new runtimes in the +future. diff --git a/docfx/llvm/articles/InternalDetails/llvm-handles.md b/docfx/llvm/articles/InternalDetails/llvm-handles.md index 7a71fa3a2..eb17cd601 100644 --- a/docfx/llvm/articles/InternalDetails/llvm-handles.md +++ b/docfx/llvm/articles/InternalDetails/llvm-handles.md @@ -15,20 +15,24 @@ title: LLVM-C Handle Wrappers This occurs when a child of a resource contains a reference to the parent. In such a case the handle should be considered like an alias and not disposed. -The Handle implementations in Ubiquity.NET.Llvm follow consistent patterns for implementing -each form of handle. All handle types are generated from the native C++ headers contained in -the `Ubiquity.NET.LibLLVM` package. Ultimately, the handles are reduced to two forms: +The Handle implementations in `Ubiquity.NET.Llvm` follow consistent patterns for +implementing each form of handle. All handle types are generated from the native C++ headers +contained in the `Ubiquity.NET.LibLLVM` package. Ultimately, the handles are reduced to two +forms: 1) Requires caller to release them + - Case 1 & 2 [previously discussed](#llvm-c-handle-wrappers). - Lifetime of the thing the handle refers to is controlled by the caller - Release is implemented by standard .NET pattern with [IDisposable](xref:System.IDisposable) 2) Does NOT require any dispose + - Case 3 [previously discussed](#llvm-c-handle-wrappers). - Lifetime of the thing the handle refers to is controlled by the container >[!NOTE] > The generated sources are not useful outside of the `Ubiquity.NET.Llvm.Interop` as they > use classes within that as a base class. These are generated manually via the -> `Generate-HandleWrappers.ps1` script. This is done once for any updates to the LibLLVM -> package to ensure the handles are kept up to date with the underlying native library. +> `Generate-HandleWrappers.ps1` script. With the sources checked in to the repository. This +> is done once for any updates to the LibLLVM package to ensure the handles are kept up to +> date with the underlying native library. ### Contextual handles and Aliases These handles are never manually released or disposed, though releasing their containers @@ -49,7 +53,7 @@ replace the wrapped handle with a default value on `Dispose()` or when "moved" ( native code) IFF, the wrapper supports "move" semantics then the `Dispose()` call is idempotent. Calling Dispose() may be a NOP. This ensures that applications need not worry about move semantics and just call `Dispose()` [Usually implicitly via a `using` expression] -Thus, even if an exception occured and the move didn't complete, the resource is properly +Thus, even if an exception occurred and the move didn't complete, the resource is properly disposed of. All resource handles in `Ubiquity.NET.Llvm,Interop` requiring explicit release are handled @@ -64,7 +68,8 @@ in ownership control/release. These are commonly used when a child of a global c exposes a property that references the parent container. In such cases the reference retrieved from the child shouldn't be used to destroy the parent when no longer used. -In Ubiquity.NET.Llvm.Interop this is represented as an unowned context handle, that is alias -handles are the same as a context handle. There is no way to convert from an unowned alias -to an owned global handle (The other way around is allowed and supported) +In `Ubiquity.NET.Llvm.Interop` this is represented as an unowned context handle, that is +alias handles are the same as a context handle. There is no way to convert from an unowned +alias to an owned global handle (Though the other way around is allowed and supported +implicitly) diff --git a/docfx/llvm/articles/InternalDetails/marshal-LLVMBool.md b/docfx/llvm/articles/InternalDetails/marshal-LLVMBool.md index 267692a7a..2183b126d 100644 --- a/docfx/llvm/articles/InternalDetails/marshal-LLVMBool.md +++ b/docfx/llvm/articles/InternalDetails/marshal-LLVMBool.md @@ -2,19 +2,19 @@ LLVMBool is a typdef in the LLVM-C API that is both simple and problematic. In it's simplest sense an LLVMBool is a representation of a bi-modal value. However, the problematic part is -that the semantics for the value are different depending on any given API. That is, in some -cases LLVMBool != 0 is a failure case, and others it is a success! The confusion stems from -LLVMBool serving a dual role: +that the semantics for what a value means is different depending on any given API. That is, +in some cases LLVMBool != 0 is a failure case, and others it is a success! The confusion +stems from LLVMBool serving a dual role: 1. A real boolean true/false -2. A status code where 0 == success and non-zero indicates an error +2. A status code where 0 == success and non-zero indicates an error of some sort. This duality is confusing and can lead to subtle errors in usage of APIs if translated directly into language projections. This makes hands-off automatic generation of P/Invoke -calls to LLVM either impossible or error prone. Thus, Ubiquity.NET.Llvm uses manually +calls to LLVM either impossible or error prone. Thus, `Ubiquity.NET.Llvm` uses manually updated P/Invoke calls that were initially auto generated to get things started but not maintained via any generation tools. In the case of `LLVMBool` `Ubiquity.NET.Llvm.Interop` uses distinct types for the different semantics and declares the interop signatures with the -form appropriate to the function being called. The two types are LLVMStatus and standard +form appropriate to the function being called. The two types are `LLVMStatus` and standard `System.Boolean` or `bool` in C#. ## LLVMStatus diff --git a/docfx/llvm/articles/InternalDetails/marshal-string.md b/docfx/llvm/articles/InternalDetails/marshal-string.md index 29e356b54..a2b3069cc 100644 --- a/docfx/llvm/articles/InternalDetails/marshal-string.md +++ b/docfx/llvm/articles/InternalDetails/marshal-string.md @@ -11,3 +11,10 @@ To resolve these issues and make the requirements explicitly clear and consisten directly on the P/Invoke signature so it is both clear and easy to use for the upper layers (In most cases this is a `LazyEncodedString` but for a few it's just a `System.String`) +## LazyEncodedString +The `Ubiquity.NET.Llvm.Interop` library makes extensive user if the +[LazyEncodedString](https://ubiquitydotnet.github.io/Ubiquity.NET.Utils/interop-helpers/api/Ubiquity.NET.InteropHelpers.LazyEncodedString.html) +type. This allows for minimal overhead marshaling of strings to and from native code. In, +particular it lazily encodes into either form depending on what it started with. It does +this exactly once. So that the overhead of encode/decode is realized only the first time it +is needed. diff --git a/docfx/llvm/articles/Samples/index.md b/docfx/llvm/articles/Samples/index.md index ec5d84884..8f1318f02 100644 --- a/docfx/llvm/articles/Samples/index.md +++ b/docfx/llvm/articles/Samples/index.md @@ -4,5 +4,5 @@ Ubiquity.NET.Llvm provides multiple samples to aid in understanding how to use t aspect(s) of using Ubiquity.NET.Llvm itself and are not generally considered production quality. They serve to illustrate usage of some aspect with as little extraneous overhead as possible. The Kaleidoscope examples all use a common runtime, which is not necessary to -understand usage of the Ubiquity.NET.Llvm library. Though they can serve as an example of +understand usage of the `Ubiquity.NET.Llvm` library. Though they can serve as an example of how to implement something similar. diff --git a/docfx/llvm/index.md b/docfx/llvm/index.md index 7454dd76e..eb99a25e5 100644 --- a/docfx/llvm/index.md +++ b/docfx/llvm/index.md @@ -1,14 +1,15 @@ # Ubiquity.NET.Llvm -Ubiquity.NET.Llvm is a managed wrapper around an extended LLVM-C API including an Object -Oriented model that closely matches the underlying LLVM internal object model. This allows -for building code generation, JIT and other utilities leveraging LLVM from .NET applications. +Ubiquity.NET.Llvm is a managed wrapper around an extended LLVM-C API providing an Object +Oriented (OO) model that closely matches the underlying LLVM internal object model. This +allows for building code generation, JIT and other utilities leveraging LLVM from .NET +applications. ## Guiding principles 1. Mirror the underlying LLVM model as much as possible while providing a well behaved .NET projection including: 1. Class names and hierarchies - 2. Object identity and reference equality + 2. Object identity and equality 3. [Fluent](https://en.wikipedia.org/wiki/Fluent_interface) APIs when plausible and appropriate. 2. Hide low-level interop details and the raw LLVM-C API. @@ -22,20 +23,22 @@ for building code generation, JIT and other utilities leveraging LLVM from .NET ## Features * LLVM Cross target code generation from .NET code -* JIT engine support for creating dynamic domain specific language runtimes with JIT support. -* Ahead of time compilation with support for Link time optimization and debug information +* JIT engine support for creating dynamic Domain Specific Language (DSL) runtimes with JIT + support. +* Ahead of Time (AOT) compilation with support for Link time optimization and debug + information. * Object model that reflects the underlying LLVM classes >[!Important] > It is important to point out that the `Ubiquity.NET.Llvm` documentation is not a > substitute for the official LLVM documentation itself. That is, the content here is > focused on using `Ubiquity.NET.Llvm` and how it maps to the underlying LLVM. The LLVM -> documentation is, generally speaking, required reading to understand Ubiquity.NET.Llvm. +> documentation is, generally speaking, required reading to understand `Ubiquity.NET.Llvm`. > The topics here often contain links to the official LLVM documentation to help in > further understanding the functionality of the library. ## Breaking changes from prior versions -In Version 20.1.0 a number of issues were resolved using newer .NET as well as in the LLVM +In Version 20.1.x a number of issues were resolved using newer .NET as well as in the LLVM design itself that allows for a fundamentally new implementation. While there isn't a LOT of code that consumers have to change (See the samples and compare against older versions) there are important factors to consider in the new library: @@ -45,10 +48,10 @@ there are important factors to consider in the new library: did not (Alias). This caused problems for the interning of projected types as the behavior of the first instance interned was used. (Usually leading to leaks or strange crashes at obscure unrelated times that made testing extremely difficult [Worst case - scenario, it works fine in all in-house testing but breaks in the field!). + scenario, it works fine in all in-house testing but breaks in the field!]). 3) No Interning of projected types - Projected types are no longer interned, this dramatically increases performance and - reduces the complexity of maintenance of this library. Generally it should have little + reduces the complexity to maintain this library. Generally it should have little impact as anything that produces an alias where the type might in other cases require the owner to dispose it should now produce an interface that is not disposable. Anything the caller owns IS an `IDisposable`. @@ -56,28 +59,33 @@ there are important factors to consider in the new library: but the Dispose remains a safe NOP. This helps prevent leaks or confusion when transfer is unable to complete due to an exception. The caller still owns the resource. Either way, `Dispose()` is called to clean it up, which is either a - safe NOP, or an actual release of the native resource. + safe NOP, or an actual release of the native resource of transfer didn't complete. 2) Assumption of Reference Equality 1) In the new library there is NO guarantee of reference equality for reference types. - Such types MAY be value equal if they refer to the same underlying native instance. + 2) Reference equality only considers the MANAGED wrapper instances and NOT the LLVM + handles or the contents of the object they refer to. ### Ownership and IDisposable When dealing with native interop the concept of ownership is of critical importance. The -underlying resources are NOT controlled by a Garbage collector, and therefore require care -to avoid access violations and other app crash scenarios. This library aims to make that -much easier by using IDisposable for these scenarios. It is ***HIGHLY*** recommended to use -the [IDisposableAnalyzers](https://www.nuget.org/packages/IDisposableAnalyzers/) in ANY +underlying resources are NOT controlled by a Garbage Collector (GC), and therefore require +care to avoid access violations and other app crash scenarios. This library aims to make +that much easier by using IDisposable for these scenarios. It is ***HIGHLY*** recommended to +use the [IDisposableAnalyzers](https://www.nuget.org/packages/IDisposableAnalyzers/) in ANY project that consumes this library. (It was/is used internally to find and fix issues across -the library that were tedious to identify otherwise). +the library that were tedious to identify otherwise). The down side of this is that there is +no standard pattern for move semantics (e.g., when there is a transfer of ownership +responsibility). #### Ownership transfer (move semantics) Sometimes an API will transfer ownership to a containing type or native code in general. In C++ terminology that is known as 'move semantics' and typically handled with `std::move()` but .NET and C# have no such concept. To make life easier and keep usage of disposable types -consistent, when a method follows the move semantics it should be documented as such and, -more importantly, it will set the value provided as invalid BUT calling `Dispose()` is still -a NOP. This keeps usage consistent even if ownership is transferred. Attempting to use an -instance after it is transferred will result in an `ObjectDisposedException`. +consistent, when a method follows the move semantics it should be documented as such. +Furthermore, and more importantly, it will set the value provided as invalid BUT calling +`Dispose()` is still a NOP. This keeps usage consistent even if ownership is transferred. +Attempting to use an instance after it is transferred will result in an +`ObjectDisposedException`. Example from [OrcV2VeryLazy](xref:orcjitv2-very-lazy) sample application ``` C# @@ -113,7 +121,7 @@ of recommended best practice for IDisposable). Thus, this version of the library eliminates the confusion and complexity by use of objects that are disposable, interfaces and a usage pattern that ensures `Dispose()` is idempotent -and a NOP when already disposed. In the current release no interning is performed, and -instead wrapping types implement [`IEquatable`](xref:System.IEquatable`1) to allow value -equality to compare the underlying native handle and resolve them as the same underlying -instance or not. +and a NOP when already disposed or transferred. In the current release no interning is +performed, and instead wrapping types implement [`IEquatable`](xref:System.IEquatable`1) +to allow value equality to compare the underlying native handle and resolve them as the same +underlying instance or not. diff --git a/docfx/llvm/namespaces/Ubiquity.NET.Llvm.DebugInfo.md b/docfx/llvm/namespaces/Ubiquity.NET.Llvm.DebugInfo.md index d30bb3c26..12a9b2b8d 100644 --- a/docfx/llvm/namespaces/Ubiquity.NET.Llvm.DebugInfo.md +++ b/docfx/llvm/namespaces/Ubiquity.NET.Llvm.DebugInfo.md @@ -5,15 +5,15 @@ remarks: *content This namespace contains all the support for the LLVM representation of debugging information. ## Differences from previous release -A critical difference is that a [Module](xref:Ubiquity.NET.Llvm.Module) does NOT own a -[DIBuilder](xref:Ubiquity.NET.Llvm.DebugInfo.DIBuilder). That idea in previous releases was -a customized extension that was more accidental as a result of the former releases using -object interning. However, once that was removed it was found that Module instances were -attempting to hold fields or properties of things that were NOT part of the underlying -native object. So, the pattern of use was changed to better match how the underlying LLVM -API worked. In particular a `DIBuilder` is intended for short term use. It can (and does) -own a [DICompileUnit](xref:Ubiquity.NET.Llvm.DebugInfo.DICompileUnit) and it can reference -the module it was created from. ([Kaleidoscope Chapter 9](xref:Kaleidoscope-ch9) provides a -sample of use in a visitor pattern where the instance is provided as a parameter to -functions. This ability of providing a "context" was added to the visitor pattern -specifically for this case. +A critical difference from previous releases is that a [Module](xref:Ubiquity.NET.Llvm.Module) +does NOT own a [DIBuilder](xref:Ubiquity.NET.Llvm.DebugInfo.DIBuilder). That idea in +previous releases was a customized extension that was more accidental as a result of the +former releases using object interning. However, once that was removed it was found that +Module instances were attempting to hold fields or properties of things that were NOT part +of the underlying native object. So, the pattern of use was changed to better match how the +underlying LLVM API worked. In particular a `DIBuilder` is intended for short term use. It +can (and does) own a [DICompileUnit](xref:Ubiquity.NET.Llvm.DebugInfo.DICompileUnit) and it +can reference the module it was created from but is not owned by it. +([Kaleidoscope Chapter 9](xref:Kaleidoscope-ch9) provides a sample of use in a visitor +pattern where the instance is provided as a parameter to functions. This ability of +providing a "context" was added to the visitor pattern specifically for this case. diff --git a/docfx/llvm/namespaces/Ubiquity.NET.Llvm.Transforms.Legacy.md b/docfx/llvm/namespaces/Ubiquity.NET.Llvm.Transforms.Legacy.md index f020be7e6..431c64363 100644 --- a/docfx/llvm/namespaces/Ubiquity.NET.Llvm.Transforms.Legacy.md +++ b/docfx/llvm/namespaces/Ubiquity.NET.Llvm.Transforms.Legacy.md @@ -3,7 +3,7 @@ uid: Ubiquity.NET.Llvm.Transforms.Legacy remarks: *content --- This namespace contains the wrappers for supporting the "legacy" pass management. This is -NOT normally used by applications as the new pass manager support is built into +***NOT*** normally used by applications as the new pass manager support is built into [Module](xref:Ubiquity.NET.Llvm.Module) and [Function](xref:Ubiquity.NET.Llvm.Values.Function) via one of the overloads of `TryRunPasses(...)`. Generally the legacy pass manager support is only used for final target code generation and not exposed for LLVM-C consumption and diff --git a/docfx/templates/Ubiquity/public/main.css b/docfx/templates/Ubiquity/public/main.css index 1e1e87327..4e2e35ef3 100644 --- a/docfx/templates/Ubiquity/public/main.css +++ b/docfx/templates/Ubiquity/public/main.css @@ -10,7 +10,7 @@ final render of such things. } #logo { - background-color: white + background-color: transparent } /* diff --git a/src/Analyzers/ReadMe.md b/src/Analyzers/ReadMe.md index 038f89c37..c16abcaca 100644 --- a/src/Analyzers/ReadMe.md +++ b/src/Analyzers/ReadMe.md @@ -7,7 +7,7 @@ This repository does NOT use the new C# 14 extension syntax due to several reaso 1) Code lens does not work https://github.com/dotnet/roslyn/issues/79006 1. Sadly marked as "not planned" - e.g., dead-end 1. [New issue created](https://developercommunity.visualstudio.com/t/VS2026-Codelens-does-not-appearwork-f/10988233) -2) MANY analyzers get things wrong and need to be supressed +2) MANY analyzers get things wrong and need to be suppressed 1. (CA1000, CA1034, and many others [SAxxxx]) 3) Many tools (like docfx) don't support the new syntax yet. 4) No clear support for Caller* attributes on the extended symbol @@ -26,7 +26,7 @@ out. # Reference Equality Analyzer Reference equality is usually the wrong behavior for comparing wrapped LLVM types. This, is a significant breaking change from older releases of this library. However, due to issues -with cacheing (and more importantly, resolving) the correct thing (disposable or just an +with caching (and more importantly, resolving) the correct thing (disposable or just an alias?) - the behavior had to change and reference equality is broken. This analyzer reports issues if the code contains a reference equality (operator == on a ref type) when the type implements `IEquatable`. This eliminates source use assumptions in this library making diff --git a/src/Interop/LlvmBindingsGenerator/ReadMe.md b/src/Interop/LlvmBindingsGenerator/ReadMe.md index d08a9b1a3..cdd87e9eb 100644 --- a/src/Interop/LlvmBindingsGenerator/ReadMe.md +++ b/src/Interop/LlvmBindingsGenerator/ReadMe.md @@ -18,16 +18,16 @@ repositories: 1) This version is the one in this repository this document discusses ### Common implementation -While there is a common implementation between the implementations (They started as simply -the same code and commenting out the functionality not desired) they have and will diverge -over time. Though, anything in the core parsing of headers and general code generation from -templates is likely to remain. (It may be viable to support a common library for this -scenario but this is ONLY necessary when the native side of the interop library changes) +While there is a common implementation between the two (They started as simply the same code +and commenting out the functionality not desired) they have and will diverge over time. +Though, anything in the core parsing of headers and general code generation from templates +is likely to remain. (It may be viable to support a common library for this scenario but +this is ONLY necessary when the native side of the interop library changes) ## Usage > [!IMPORTANT] > This project has a dependency on the `CppSharp` library which ONLY supports the `X64` -> architecture but the generated wrappers are NOT dependent on a particular architecture. +> architecture but the generated wrappers are NOT dependent on any particular architecture. > This limits the environments that can be used to generate the sources. To simplify that, > the generated sources are placed into source control but generated off-line by a > developer. A developer machine doing this ***MUST*** be X64 or this tool can't run. This @@ -42,12 +42,12 @@ scenario but this is ONLY necessary when the native side of the interop library | HandleOutputPath | [Optional] Path to the root folder where the handle files are generated | | Diagnostics | Diagnostics output level for the app | -This tool is generally only required once per Major LLVM release. (Though a Minor release -that adds new APIs would also warrant a new run) However, to ensure the code generation tool -itself isn't altered with a breaking change, the PowerShell script takes care of running the -generator to update the Generated code base on each run, even if nothing changes in the end. -This is run on every automated build before building the res of the project so that -the generator is tested on every full automated build. +This tool is generally only required once per Major `Ubiquity.NET.LibLLVM` release. (Though +a Minor release that adds new APIs would also warrant a new run) However, to ensure the code +generation tool itself isn't altered with a breaking change, the PowerShell script takes +care of running the generator to update the Generated code base on each run, even if nothing +changes in the end. This is run on every automated build before building the rest of the +project so that the generator is tested on every full automated build. ### Generated code This library will generate the handle file directly. Therefore ROSLYN source generators are @@ -71,7 +71,7 @@ for this particular code base. of problems for the binary runtime dependencies of source generators, which don't flow with them as project references... -Specifically, in this code, the built-in generator that otherwise knows noting about the +Specifically, in this code, the built-in generator that otherwise knows nothing about the handle generation, needs to see and use the **OUTPUT** of the handle source generation. (It's not just a run ordering problem as ALL generators see the same input text!) [See: [Discussion on ordering and what a generator "sees"](https://github.com/dotnet/roslyn/discussions/57912#discussioncomment-1682779) @@ -83,10 +83,10 @@ code used, Specifically, it must have access to the `NativeMarshalling` attribut the handle types. Otherwise, it doesn't know how to marshal the type and bails out. It is possible to "overcome" this with an explicit `MarshalUsingAttribute` on every parameter or return type but that's tedious. Tedious, typing is what source generators and templates are -supposed to remove. Thus, this library will host the source generator (like a unit test -would) and generates the handle sources **BEFORE** they are compiled in the project. Thus, -the generated source files will contain the marshaling attributes so that the interop source -generator knows how to generate the correct code. +supposed to remove. Thus, this library is used to generate the handle sources **BEFORE** +they are compiled in the project. Thus, the generated source files will contain the +marshaling attributes so that the interop source generator knows how to generate the correct +code. >To be crystal clear - The problem is **NOT** one of generator run ordering, but on the > ***dependency of outputs***. By design, Roslyn source generators can only see the original @@ -102,7 +102,7 @@ generator knows how to generate the correct code. #### Alternate solutions considered and rejected 1) Running the source generator directly in the project 1) This is where the problem on non-deterministic ordering and visibility of the - generated code was discovered. Obviously (now anyway!) this won't work. + generated code was discovered. Obviously, (now anyway!) this won't work. 2) Use a source generator in a separate assembly 1) This solves the generator output dependency problem but introduces a new problem of how the build infrastructure for these types manage NuGet versions. @@ -114,7 +114,7 @@ generator knows how to generate the correct code. the custom generator runs before the built-in one. 2) However, this runs afoul of the binary dependency problem... Not 100% insurmountable but the number of caveats on the Roslyn Source Generator side of things grows to a - significant factor. + significant factor when an existing working solution already exists. #### The final choice Keep using this app as a generator for the handle types. This used to work, and still does. @@ -125,12 +125,11 @@ this sort of one off specialized code generation. Solving the problem of expressing P/Invokes is simply to just manage that directly. It seemed like a good idea to automate the tedium of generating those. Sadly, there are so many -subtleties of "special cases" that involve reading the docs (or source code) before you can -correctly implement it. In the end, there's no value in expressing all that subtlety in -anything other than C#. +subtleties of "special cases" that involve reading the docs (or, often, source code) before +you can correctly implement it. In the end, there's no value in expressing all that subtlety +in anything other than the implementation language (C#). This also keeps the door open to use the native AST from within the source generator or an analyzer to perform additional checks and ensure the hand written code matches the actual native code... (Though this would involve more direct use of the Roslyn parser/analyzer and may be best to generate an input to a proper analyzer) - diff --git a/src/Interop/readme-first.md b/src/Interop/readme-first.md index 1be5ddca0..2124d49b1 100644 --- a/src/Interop/readme-first.md +++ b/src/Interop/readme-first.md @@ -17,15 +17,15 @@ following scenarios: - They are still immutable types and the use of IDisposable is simply to unify the disposal and leverage built-in patterns for ownership. Since these are immutable types the `Dispose()` is NOT idempotent. - - This is normally not an issue as this is NOT inteded for exposire to end users. - It is expected that the handles are wrapped in an OO type that can replace the - handle held with a default value on Dispose() to allow for an idempotent - dispose. + - This is normally not an issue as this is NOT intended for exposure to end + users. It is expected that the handles are wrapped in an OO type that can + replace the handle held with a default value on Dispose() to allow for an + idempotent dispose. - Idempotent dispose is useful for APIs that use `move` semantics where the native API takes over ownership, but only on success. Thus, a caller might still own the resource on error/exception. By allowing idempotent `Dispose` - the caller need not care about such subtlties and ALWAYS calls `Dispose` - which is normally a NOP, but if an error occured actually releases the + the caller need not care about such subtleties and ALWAYS calls `Dispose` + which is normally a NOP, but if an error occurred actually releases the resource. ## Roslyn Source Generators - 'There be dragons there!' @@ -90,7 +90,7 @@ correct code. of needing a source generator. 1) This can work, however it means a great deal of tedious attribution that the compilation should do for us. - 1) Elimination of tedious typing and repetive code is WHY templates/generics exist. + 1) Elimination of tedious typing and repetitive code is WHY templates/generics exist. ### The final choice Keep using `LlvmBindingsGenerator` as a generator for the handle types. This used to work, @@ -167,7 +167,4 @@ as it may not exist in the future :warning: You have been warned! :warning: ^2^ Currently ONLY win-x64 is supported but the foundational work is present to allow -building for other platorms. - - - +building for other platforms. diff --git a/src/Samples/CodeGenWithDebugInfo/codegeneration.md b/src/Samples/CodeGenWithDebugInfo/codegeneration.md index 4fbf3892b..da7178a87 100644 --- a/src/Samples/CodeGenWithDebugInfo/codegeneration.md +++ b/src/Samples/CodeGenWithDebugInfo/codegeneration.md @@ -86,10 +86,10 @@ is required. > another thread's context. This is a fundamental design of LLVM and reduces the complexity > of attempting to manage collections of objects and interning them in a thread safe manner. > Applications instead just create a context per thread if needed. This constraint does not -> prevent use with asyncornous operations as long as such operations NEVER touch the context -> from multiple threads. That is, each leg of an asynchonous operation may use the same -> `Context` but no other threads or operations may use the same one. This ensoures that the -> `Context` is only used by one thread at a time even if it is passed arround between +> prevent use with asynchronous operations as long as such operations NEVER touch the context +> from multiple threads. That is, each leg of an asynchronous operation may use the same +> `Context` but no other threads or operations may use the same one. This ensures that the +> `Context` is only used by one thread at a time even if it is passed around between > threads. To generate code for a particular target the application initializes the module to include @@ -111,9 +111,8 @@ so it creates a [DIFile](xref:Ubiquity.NET.Llvm.DebugInfo.DIFile) for the source time. The sample code creates the `DICompileUnit` when creating the bit code module. This is the -normal pattern for creating the compile unit when generating debugging information. Though -it is possible to create it independently but there usually isn't any real benefit to doing -so. +normal pattern for creating the compile unit when generating debugging information. While +it is possible to create it independently there usually isn't any real benefit to doing so. ## Creating basic types with debug information In LLVM types are fairly minimalistic and only contain the basic structural information for @@ -121,7 +120,7 @@ generating the final machine code. Debug information, as metadata in LLVM, provi source level debugging information. In LLVM this requires creating and tracking both the native type and the Debug information metadata as independent object instances. In `Ubiquity.NET.Llvm` this is handled by a unified debug and type information system. That is, -in Ubiquity.NET.Llvm a single class is used to represent types and it acts as a binder +in `Ubiquity.NET.Llvm`, a single class is used to represent types and it acts as a binder between the full debugging description of the type and the native LLVM minimal description. These types all implement a common interface [ITypeRef](xref:Ubiquity.NET.Llvm.Types.ITypeRef). This interface is used throughout Ubiquity.NET.Llvm to expose types in a consistent fashion. @@ -214,8 +213,8 @@ is visible only locally. This is indicated by the [Linkage.Internal](xref:Ubiquity.NET.Llvm.Values.Linkage.Internal) linkage value. >[!NOTE] -> The use of fluent style extension methods in the Ubiquity.NET.Llvm API helps make it easy -> to add to or modify the attributes and linkage etc... +> The use of fluent style extension methods in the `Ubiquity.NET.Llvm` API helps make it +> easy to add to or modify the attributes and linkage etc... `DeclareCopyFunc()` is a bit special in that it handles some target specific support in a generalized way. In particular the calling convention for the struct to use the `byval` form @@ -254,13 +253,14 @@ intrinsic function that is used to declare the debug information for a variable. method. ### Calling LLVM Intrinsics -The generated code needs to copy some data, rather than directly doing a copy in a loop, the -code uses the LLVM intrinsic memcopy function. This function is lowered to an optimized copy -for the target so that applications need not worry about building optimal versions of IR for -this common functionality. Furthermore, the LLVM intrinsic supports a variety of signatures -for various data types all of which are hidden in the `Ubiquity.NET.Llvm` method. Rather -than require callers to create a declaration of the correct signature the `Ubiquity.NET.Llvm` -wrapper automatically figures out the correct signature from the parameters provided. +The generated code needs to copy some data. Rather than directly doing a copy in a loop, the +code uses the LLVM intrinsic memcopy function. This function is lowered to an optimized +variant for the target so that applications need not worry about building optimal versions +of IR for this common functionality. Furthermore, the LLVM intrinsic supports a variety of +signatures for various data types all of which are hidden in the `Ubiquity.NET.Llvm` method. +Rather than require callers to create a declaration of the correct signature the +`Ubiquity.NET.Llvm` wrapper automatically figures out the correct signature from the +parameters provided. ## Final LLVM IR ```llvm diff --git a/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md b/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md index 9172e0565..1fa2adcc9 100644 --- a/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md +++ b/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md @@ -121,8 +121,17 @@ BINARY: {FeatureUserOperators}? 'binary'; > operators [The official LLVM implementation only allows a single character as the operator > lexeme]) > -> Additionally the Ubiquity.NET.Llvm implementation adds the built-in '^' operator for -> exponentiation. +> +> The `Ubiquity.NET.Llvm` implementation adds the following: +> +>| Syntax | Description | +>|--------|-------------| +>| '^' | operator for exponentiation. | +>| '=' | operator for assignment. | +>| '==' | operator for equality checks | +>| '++' | operator for incrementing a value | +>| '--' | operator for decrementing a value | + ### Parser The parser, like the lexer, uses Semantic Predicates, which allows for dynamic adaptation of @@ -143,7 +152,7 @@ In order to support the parser detecting attempts to overload built-in operators handle the fact that some operators don't make any sense as unary operators (e.g. you can't create a user defined unary '=' operator. Technically, you could implement that but it would make for some confusing code. If you really like hard to read and comprehend code there are -[other languages](https://en.wikipedia.org/wiki/Brainfuck) better suited to that end 8^) ) +[other languages](https://en.wikipedia.org/wiki/Brainfuck) better suited to that end :nerd_face: ) To manage detection of appropriate operator tokens the grammar uses a set of parser rules that group the operator tokens by their allowed kinds. This allows subsequent rules to @@ -235,7 +244,7 @@ binaryop ``` ### Initializers -The Initializers rule provides a way to handle a common sequence in the language in multiple +The initializers rule provides a way to handle a common sequence in the language in multiple different contexts (sort of like a function in most programming languages, in fact, ANTLR rules are implemented in the generated parser as methods). @@ -347,7 +356,7 @@ printstar(100); For loops with mutable values support in the language may provide a result that isn't always 0.0, for example: -```Kaleidoscope +``` Kaleidoscope # Define ':' for sequencing: as a low-precedence operator that ignores operands # and just returns the RHS. def binary : 1 (x y) y; @@ -383,7 +392,7 @@ will generate a parser based on the grammar description input file. This generat generates these types as partial classes so they are extensible from the parser assembly without needing to derive a new type or use virtual methods etc. Thus, the `Kaleidoscope.Grammar` assembly contains partial class extensions that provide simpler -property accessors and support methods to aid is generating the AST. +property accessor and support methods to aid is generating the AST. See [Kaleidoscope Parse Tree Examples](xref:Kaleidoscope-Parsetree-examples) for more information and example diagrams of the parse tree for various language constructs. @@ -482,13 +491,14 @@ Of particular use is the ability to generate DGML and [blockdiag](http://blockdi representations of the parse tree for a given parse. >[!NOTE] ->All of the diagrams in these tutorials were created by generating the blockdiag files and -> then producingthe SVG files from that. Having a nice visual representation of a parse tree -> result is helpful to understanding the parsing and various parse tree node types. +> All of the diagrams in these tutorials were created by generating the `blockdiag` files +> and then producing the SVG files from that. Having a nice visual representation of a parse +> tree result is helpful to understanding the parsing and various parse tree node types. The visual graph is also immensely valuable when making changes to the grammar so you can see the results of a parse and more readily understand why something isn't right. In fact, -this feature was created to help track down bugs in the parsing for user defined operator -precedence that was difficult to figure out. Once the visualization was available it became -quite easy to see the problems. Thus, Chapter 2 is both a simple introductory example and a -tool for use when doing more advanced language tweaking or extension. +this diagram generation feature was created to help track down bugs in the parsing for user +defined operator precedence that was difficult to figure out. Once the visualization was +available it became quite easy to see the problems. Thus, Chapter 2 is both a simple +introductory example and a tool for use when doing more advanced language tweaking or +extension. diff --git a/src/Samples/Kaleidoscope/Chapter3/Kaleidoscope-ch3.md b/src/Samples/Kaleidoscope/Chapter3/Kaleidoscope-ch3.md index 6d32e3e3e..611b303ed 100644 --- a/src/Samples/Kaleidoscope/Chapter3/Kaleidoscope-ch3.md +++ b/src/Samples/Kaleidoscope/Chapter3/Kaleidoscope-ch3.md @@ -226,7 +226,7 @@ exponentiation a efficiently as the back-end generator can. ## Examples -```Console +``` Shell Ubiquity.NET.Llvm Kaleidoscope Interpreter - SimpleExpressions Ready># simple top level expression >4+5; diff --git a/src/Samples/Kaleidoscope/Chapter4/Kaleidoscope-ch4.md b/src/Samples/Kaleidoscope/Chapter4/Kaleidoscope-ch4.md index 46713cce6..52e0d9623 100644 --- a/src/Samples/Kaleidoscope/Chapter4/Kaleidoscope-ch4.md +++ b/src/Samples/Kaleidoscope/Chapter4/Kaleidoscope-ch4.md @@ -76,11 +76,10 @@ console output support. > [!WARNING] > All such methods implemented in .NET must block any exception from bubbling out of the -> call as the JIT engine doesn't know anything about them and neither does the Kaleidoscope +> call as the LLVM JIT engine doesn't know anything about them and neither does the Kaleidoscope > language. Exceptions thrown in these functions would produce undefined results, at best - > probably crashing the application. - #### Generator Dispose Since the JIT engine is disposable, the code generators Dispose() method must now call the Dispose() method on the JIT engine. diff --git a/src/Samples/Kaleidoscope/Chapter5/Kaleidoscope-ch5.md b/src/Samples/Kaleidoscope/Chapter5/Kaleidoscope-ch5.md index 63f57d07d..d7f119da5 100644 --- a/src/Samples/Kaleidoscope/Chapter5/Kaleidoscope-ch5.md +++ b/src/Samples/Kaleidoscope/Chapter5/Kaleidoscope-ch5.md @@ -16,14 +16,14 @@ understanding the language functionality to implement. The ultimate goal of the changes to support code generation for control flow constructs is to transform Kaleidoscope code such as: -```Kaleidoscope +``` Kaleidoscope extern foo(); extern bar(); def baz(x) if x then foo() else bar(); ``` and generate LLVM like this (unoptimized): -```llvm +``` llvm declare double @foo() declare double @bar() @@ -115,7 +115,7 @@ concepts to implement the for loop constructs for the language. The general idea is to transform the loops in Kaleidoscope such as this: -```Kaleidoscope +``` Kaleidoscope extern putchard(char); def printstar(n) for i = 1, i < n, 1.0 in diff --git a/src/Samples/Kaleidoscope/Chapter6/Kaleidoscope-ch6.md b/src/Samples/Kaleidoscope/Chapter6/Kaleidoscope-ch6.md index 86a8627fe..9d67c3dbe 100644 --- a/src/Samples/Kaleidoscope/Chapter6/Kaleidoscope-ch6.md +++ b/src/Samples/Kaleidoscope/Chapter6/Kaleidoscope-ch6.md @@ -11,20 +11,21 @@ break that and get down and dirty with the parser a bit to make the operators wo particular implementing user defined precedence. > [!TIP] -> The actual value of user defined operator precedence in a language is a bit debatable, and -> the initial plan for the Ubiquity.NET.Llvm tutorials was to skip this chapter as it -> doesn't involve any new LLVM IR or code generation. After the code was done to get the -> other chapters working - this one was still nagging, begging really, for a solution. The +> The actual value of user defined operator precedence in a language is a bit debatable. The +> initial plan for the `Ubiquity.NET.Llvm` tutorials was to skip this chapter as it doesn't +> involve any new LLVM IR or code generation. After the code was done to get the other +> chapters working - this one was still nagging, begging really, for a solution. The > challenge to come up with a good solution was ultimately too tempting to resist, and we > now have a full implementation with a few useful extensions on top! (Exponent operator '^', -'=' vs '==', '++', and '--') +> (assignment) '=' vs (equality) '==', '++', and '--') ## General idea of user defined operators User defined operators in Kaleidoscope are a bit unique. Unlike C++ and other similar languages, the precedence of the user defined operators in Kaleidoscope are not fixed. -Though, the built-in operators all use a fixed precedence. That poses some interesting -challenges for a parser as it must dynamically adapt to the state of the language runtime as -it is parsing so that it can correctly evaluate the operator expressions. +Though, the built-in operators all use a fixed precedence. That poses some "interesting" +challenges for a parser (let alone a user! :nerd_face:) as it must dynamically adapt to the state +of the language runtime as it is parsing so that it can correctly evaluate the operator +expressions. Making that work while using ANTLR requires looking under the hood to how ANTLR4 ordinarily handles precedence. A full treatise on the subject is outside the scope of this tutorial, diff --git a/src/Samples/Kaleidoscope/Chapter7.1/Kaleidoscope-ch7.1.md b/src/Samples/Kaleidoscope/Chapter7.1/Kaleidoscope-ch7.1.md index 4f4fb925c..72e90f8bd 100644 --- a/src/Samples/Kaleidoscope/Chapter7.1/Kaleidoscope-ch7.1.md +++ b/src/Samples/Kaleidoscope/Chapter7.1/Kaleidoscope-ch7.1.md @@ -3,14 +3,13 @@ uid: Kaleidoscope-ch7.1 --- >[!WARNING] -> There is a fatal flaw in the current design of this support for an interactive runtime +> There is an issue in the current design of this support for an interactive runtime > like Kaleidoscope thus far. It does NOT allow for re-defining a function. Once it is > defined, you cannot define it again or an exception or application crash will occur. > This is handled in Kaleidoscope by setting an option in the `DynamicRuntimeState` to > indicate that redefinition is not supported. This is then processed in the AST conversion > to produce an error node. This reports the redefinition as an error in the input rather -> then an exception at runtime. Hopefully a future variant of this sample will address -> tracking and removing that. See [Special notes for interactive run-times](#special-notes-for-interactive-run-times) +> then an exception at runtime. See [Special notes for interactive run-times](#special-notes-for-interactive-run-times) > for more details. # 7. Kaleidoscope: Extreme Lazy JIT @@ -153,23 +152,24 @@ outside of the driving application code control so it can't specify a resource t Additionally, there is no resource tracker for a materialization unit that can remove the unit BEFORE it is run. -There are at least three states of a function definition to deal with: +To help clarify the problem there are at least three states of a function definition to deal +with: 1) Not defined anywhere yet (First occurrence) 2) Materializer Created, but not yet materialized 3) Already materialized. -Tracking of each is different and thus handling removal will require different +Tracking of each is different and thus handling removal for each would require different implementations. All of which requires thread synchronization as the JIT could materialize -the function at ANY point along the way! So it is possible that while trying to remove a +the function at ANY point along the way! So it is possible that while trying to replace a definition it transitions from #2 to #3. Even if code for removal looked at the state first it's a classic [TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use) problem. There is no mechanism in the standard OrcJIT v2 for this scenario. It is arguable what the -validity of such a thing is for an interactive language/runtime. For any sufficiently -complex thing there's at least two high level default questions to ask: +validity of such a thing is for a non-interactive language/runtime. +For any sufficiently complex thing there's at least two high level default questions to ask: 1) Do we even know HOW to do it yet? 2) Is it worth the cost of implementation? For an interactive language/runtime like Kaleidoscope, the answer to both thus far is a hard 'NO'. This sort of support is best for non-interactive run-times like .NET or Java -where redefinition isn't legal syntax and caught in the parser/AST transforms. +where redefinition isn't legal syntax and caught before even generating the IR. diff --git a/src/Samples/Kaleidoscope/Chapter7/Kaleidoscope-ch7.md b/src/Samples/Kaleidoscope/Chapter7/Kaleidoscope-ch7.md index 5f7a03f98..1aee248f4 100644 --- a/src/Samples/Kaleidoscope/Chapter7/Kaleidoscope-ch7.md +++ b/src/Samples/Kaleidoscope/Chapter7/Kaleidoscope-ch7.md @@ -17,15 +17,15 @@ convert to SSA form directly. >[!IMPORTANT] >***In LLVM There is no need for a language front-end to convert to SSA form directly!*** -> In fact, ***manually*** converting to SSA form is strongly discouraged! LLVM already has -> very efficient, and more importantly, well tested, support for converting to SSA form +> In fact, ***manually*** converting to SSA form is ***strongly discouraged!*** LLVM already +> has very efficient, and more importantly, well tested, support for converting to SSA form > (though how that works might be a bit surprising - read on!). The use of this support is > the focus of this chapter. ## Mutable Variables in LLVM ### Mutable Variables vs. SSA, What's the big deal? Consider the following simple "C" code: -```C +``` C int G, H; int test(_Bool Condition) @@ -43,7 +43,7 @@ The general idea of how to handle this in LLVM SSA form was already covered in [ Since there are two possible values for X when the function returns, a PHI node is inserted to merge the values. The LLVM IR for this would look like this: -```llvm +``` llvm @G = weak global i32 0 ; type of @G is i32* @H = weak global i32 0 ; type of @H is i32* @@ -99,7 +99,7 @@ address for that space (e.g. it's a pointer). Stack variables work the same way, instead of static allocation via a global declaration they are declared with the [LLVM alloca instruction](xref:Ubiquity.NET.Llvm.Instructions.Alloca). -```llvm +``` llvm define i32 @example() { entry: %X = alloca i32 ; type of %X is i32*. @@ -115,7 +115,7 @@ memory allocated with alloca is completely generalized. you can pass the address slot to a function, store it in a variable, etc... Using alloca, the previous example could be re-written using alloca without the PHI node as follows: -```llvm +``` llvm @G = weak global i32 0 ; type of @G is i32* @H = weak global i32 0 ; type of @H is i32* @@ -185,7 +185,7 @@ The mem2reg pass is an integral part of the full solution to mutable variables. mem2reg is highly recommended. There are a few conditions for using mem2reg correctly. 1. mem2reg is based on alloca: it looks for and promotes alloca. It does not apply to - globals or heap allocations. + global variables or heap allocations. 1. mem2reg only looks for alloca instructions in the **entry block** of the function. 1. Placing Alloca instructions for all variables, in all scopes, in the entry block ensures they are executed only once, which makes the conversion simpler. @@ -228,7 +228,7 @@ variables. Defining variables is just a generally useful concept that can serve purposes, including self documentation. The following is an example on how these features are used: -```Kaleidoscope +``` Kaleidoscope # Define ':' for sequencing: as a low-precedence operator that ignores operands # and just returns the RHS. def binary : 1 (x y) y; diff --git a/src/Samples/Kaleidoscope/Chapter8/Kaleidoscope-ch8.md b/src/Samples/Kaleidoscope/Chapter8/Kaleidoscope-ch8.md index 418676601..e5a08e430 100644 --- a/src/Samples/Kaleidoscope/Chapter8/Kaleidoscope-ch8.md +++ b/src/Samples/Kaleidoscope/Chapter8/Kaleidoscope-ch8.md @@ -26,7 +26,7 @@ distinct scenarios the grammar has different rules. For the interactive scenario previously mentioned "repl" rule is used. When parsing a full source file the "fullsrc" rule is used as the start. -```antlr +``` antlr // Full source parse accepts a series of definitions or prototypes, all top level expressions // are generated into a single function called Main() fullsrc @@ -58,7 +58,7 @@ typed in an interactive console. A trick used in the code generation is to mark anonymous functions as private and always inline so that a simple optimization pass can eliminate the anonymous functions after inlining them all into the main() function. -```C# +``` C# // mark anonymous functions as always-inline and private so they can be removed if(definition.IsAnonymous) { diff --git a/src/Samples/Kaleidoscope/IgnoredWords.dic b/src/Samples/Kaleidoscope/IgnoredWords.dic index de6ed0a76..0b1a8ce9a 100644 --- a/src/Samples/Kaleidoscope/IgnoredWords.dic +++ b/src/Samples/Kaleidoscope/IgnoredWords.dic @@ -6,6 +6,7 @@ alltmp antlr arity ascii +baz binaryop binop blockdiag @@ -32,10 +33,14 @@ ifresult imag impl initializer +initializers inline inlined +inliner +inlining lexer literation +llvm lookups loopcond mandel @@ -54,6 +59,7 @@ printdensity printstar putchard readonly +repl ret serializer structs @@ -61,5 +67,6 @@ subtmp uid unaryop uniqued +unoptimized userdefinedop xref diff --git a/src/Samples/Kaleidoscope/Kaleidoscope-Overview.md b/src/Samples/Kaleidoscope/Kaleidoscope-Overview.md index fa22031a8..67988e5b2 100644 --- a/src/Samples/Kaleidoscope/Kaleidoscope-Overview.md +++ b/src/Samples/Kaleidoscope/Kaleidoscope-Overview.md @@ -2,7 +2,6 @@ uid: Kaleidoscope-ch1 --- - # 1. Kaleidoscope: Language Introduction The general flow of this tutorial follows that of the official [LLVM tutorial](xref:llvm_kaleidoscope_tutorial) @@ -27,7 +26,7 @@ use cases for Ubiquity.NET.Llvm for code generation and JIT execution. It is worth pointing out that this example is not intended as a treatise on compiler design nor on language parsing. While it contains many aspects of those topics the tutorial is, mostly, focused on the use of Ubiquity.NET.Llvm for code generation. Furthermore it isn't -a trans-literation of the LLVM C++ sample as that would defeat one of the major points of +a trans-literation of the LLVM `C++` sample as that would defeat one of the major points of `Ubiquity.NET.Llvm` - to provide a familiar API and use patterns familiar to C# developers. ## General layout @@ -45,6 +44,8 @@ language features in a single assembly. Ultimately the parsing produces an [AST](xref:Kaleidoscope-AST) so that the actual technology used for the parse is hidden as an implementation detail. This helps in isolating the parsing from the use of `Ubiquity.NET.Llvm` for code generation and JIT compilation for interactive languages. +Additionally, a number of [utilities](https://github.com/UbiquityDotNET/Ubiquity.NET.Utils) +are used to further enable use by additional DSL implementations or any other use. ## The Kaleidoscope Language ### General Concepts diff --git a/src/Samples/Kaleidoscope/Kaleidoscope.Runtime/Kaleidoscope-Runtime.md b/src/Samples/Kaleidoscope/Kaleidoscope.Runtime/Kaleidoscope-Runtime.md index 6a3205d6f..28c8c4fb7 100644 --- a/src/Samples/Kaleidoscope/Kaleidoscope.Runtime/Kaleidoscope-Runtime.md +++ b/src/Samples/Kaleidoscope/Kaleidoscope.Runtime/Kaleidoscope-Runtime.md @@ -16,10 +16,10 @@ run-times. This uses an asynchronous pattern and allows cancellation via a stand cancellation token. This supports a clean shutdown via a CTRl-C handler etc... ## Kaleidoscope JIT engine -The JIT engine used for Kaleidoscope is based on the Ubiquity.NET.Llvm OrcJIT v2, which, -unsurprisingly, uses the LLVM OrcJit functionality to provide On Request Compilation (ORC). -For most of the chapters, the JIT uses a moderately lazy compilation technique where the -source language is parsed, converted to LLVM IR and submitted to the JIT engine. The JIT +The JIT engine used for Kaleidoscope is based on the `Ubiquity.NET.Llvm` OrcJIT v2, which, +unsurprisingly, uses the LLVM OrcJit v2 functionality to provide On Request Compilation +(ORC). For most of the chapters, the JIT uses a moderately lazy compilation technique where +the source language is parsed, converted to LLVM IR and submitted to the JIT engine. The JIT engine does not immediately generate native code from the module, however. Instead it stores the module, and whenever compiled code calls to a symbol exported by the IR module, it will then generate the native code for the function "on the fly". This has the advantage of not @@ -30,8 +30,9 @@ execute. ### Really lazy compilation While the basic lazy compilation of IR to native code has performance benefits over a pure interpreter, it still has the potential for wasted overhead converting the parsed language -to LLVM IR. Fortunately, the LLVM and Ubiquity.NET.Llvm.OrcJitv2 supports truly lazy +to LLVM IR. Fortunately, the LLVM and `Ubiquity.NET.Llvm.OrcJitv2` supports truly lazy compilation. This is done by asking the JIT to create a stub for a named symbol and then, whenever code calls that symbol the stub calls back to the JIT which then calls back the application to 'materialize' the IR, add the module to the JIT and trigger compilation to -native. Thus, achieving true Just-In-Time compilation. +native. hat is, the code generation stores the AST for a function and only converts it to +IR, and then native code when needed. Thus, achieving true Just-In-Time compilation. diff --git a/src/Ubiquity.NET.Llvm/ReadMe.md b/src/Ubiquity.NET.Llvm/ReadMe.md index e3d969df1..b0d9d214e 100644 --- a/src/Ubiquity.NET.Llvm/ReadMe.md +++ b/src/Ubiquity.NET.Llvm/ReadMe.md @@ -11,6 +11,8 @@ extensions required generally decreases with each release of LLVM). * Just In Time (JIT) compilation support - Including fully lazy compilation * Generation of detailed debug information +* Use for AOT code generation from managed code (usually C#) + * Including .NET AOT generation of the application itself. ### Full documentation [Full documentation](https://ubiquitydotnet.github.io/Llvm.NET/) is available online.