diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index e6bff3b0a..7869c5030 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,13 @@ # F2 Developer Kit Release Notes +## v2.1.1 + +* Added global register offset for the SDE IP. See [CL_SDE software examples](./hdk/cl/examples/cl_sde/software/src/README.md). +* Added [CL_SDE software exmaple](./hdk/cl/examples/cl_sde/software/src/sde_c2h_user_buffers.c) for a user allocated DMA buffer. +* [Documentation](./hdk/docs/List_AFI_on_Marketplace.md) to assist F2 customers with releasing AFIs and AMIs on the AWS Marketplace. +* [Documentation](./developer_resources/Amazon_DCV_Setup_Guide.md) to assist in creating a virtual desktop based on the FPGA Developer AMI running graphics-intensive applications remotely on Amazon EC2 instances. +* Fixed the BW calculation and tolerance calculation in the test_hbm_perf_random in the [cl_mem_perf](./hdk/cl/examples/cl_mem_perf/verif/README.md#test_hbm_perf_randomsv). + ## v2.1.0 * Support for Vivado and Vitis 2024.2 tools. diff --git a/developer_resources/Amazon_DCV_Setup_Guide.md b/developer_resources/Amazon_DCV_Setup_Guide.md new file mode 100644 index 000000000..9a22bc155 --- /dev/null +++ b/developer_resources/Amazon_DCV_Setup_Guide.md @@ -0,0 +1,112 @@ +# Using EC2 Instances with a GUI + +## Table of Contents + +- [What is Amazon DCV?](#what-is-amazon-dcv) +- [Installing the Amazon DCV Server on an Amazon EC2 Instance](#installing-the-amazon-dcv-server-on-an-amazon-ec2-instance) + - [Prerequisites](#prerequisites) + - [Amazon DCV Server Installation](#amazon-dcv-server-installation) + - [Post-Installation Checks](#post-installation-checks) + - [Setting a Password for Your Sessions](#setting-a-password-for-your-sessions) + - [Setting Security Rules to Allow Traffic on Port 8443](#setting-security-rules-to-allow-traffic-on-port-8443) +- [Amazon DCV Client Installation](#amazon-dcv-client-installation) +- [Basic Session Management](#basic-session-management) + - [Quick Session Startup](#quick-session-startup) + +## What is Amazon DCV? + +[Amazon DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) is a high-performance remote +display protocol that provides customers with a secure way to deliver remote desktops and application streaming +from any cloud or data center to any device, over varying network conditions. With Amazon DCV and Amazon EC2, +customers can run graphics-intensive applications remotely on EC2 instances and stream the results to simpler +client machines, eliminating the need for expensive dedicated workstations. + +This guide helps customers developing for AWS F2 instances create a virtual desktop running on EC2 instances +based on the [FPGA Developer AMI](../User_Guide_AWS_EC2_FPGA_Development_Kit.md#fpga-developer-ami). +The FPGA Developer AMI has pre-installed tools which are license free. Combined with DCV, this enables +development using Vivado or Vitis' graphical Integrated Design Environment (IDE), which provides an intuitive +graphical user interface (GUI) to visualize FPGA development in the cloud. + +## Installing the Amazon DCV Server on an Amazon EC2 Instance + +### Prerequisites + +1. [Depenency Installation](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-prereq.html#linux-prereq-gui) + * :warning: DO NOT PERFORM STEP 3! Upgrading may impact the stability of development kit software! +2. [Protocol Setup](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-prereq.html#linux-prereq-wayland) +3. [Driver Installation](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-prereq.html#linux-prereq-nongpu) + +### Amazon DCV Server Installation + +In the [install procedure described here](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-server.html#linux-server-install), follow steps 1 through 5, 7, and 8. +When you get to step 9, do the following: + +``` bash + sudo apt --fix-broken install + sudo apt install -y mesa-utils + sudo dpkg -i nice-dcv-gl_2024.0.1096-1_amd64.ubuntu<2404 or 2004>.deb +``` + +### Post-Installation Checks + +[This section of the post-installation check](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-checks.html#checks-xserver) should be run to ensure that all aspects of the setup are working as expected. + +### Setting a Password for Your Sessions + +In order to connect to an Amazon DCV session, you must have a password set for your user on the EC2 instance. +This can be done with this command: + +``` bash +sudo passwd $USER +``` + +### Setting Security Rules to Allow Traffic on Port 8443 + +In order for Amazon DCV to communicate with your EC2 instance, TCP and UDP traffic must be allowed on port 8443. +This can be accomplished by updating the security group you used to launch your instance. + +## Amazon DCV Client Installation + +The [Amazon DCV client](https://www.amazondcv.com/) should be installed on your local machine and is used to view your virtual desktop on your EC2 instance. + +## Basic Session Management + +To begin, run the following two commands: + +``` bash +sudo systemctl isolate multi-user.target (Ubuntu 20.04 only) +sudo systemctl restart dcvserver.service +sudo systemctl restart dcvsessionlauncher.service +``` + +Next, refer to the [session management user guide here](https://docs.aws.amazon.com/dcv/latest/adminguide/managing-sessions.html). This guide will provide you with all of the information you need to customize and manage your Amazon DCV sessions. + +### Quick Session Startup + +To start a session, use the following command: + +``` bash +dcv create-session $your_session_number +dcv list-sessions +Session: '1' (owner:ubuntu type:virtual) +``` + +You may give your session any number you like, but no two sessions may have the same number. + +From this point, you can access your session using the Amazon DCV client on your local machine or via the [DCV console in your web browser](https://docs.aws.amazon.com/dcv/latest/userguide/using-connecting-browser-connect.html). + +Enter `https://user@ec2_instance_ip_address:8443` into the `Hostname or IP Address` box and click `Connect`. Next, click "Trust and Connect". + +Enter the password you set in [Post-Installation Checks](#post-installation-checks) in the `Password` box and click `Login`. + +At this point, you should see your session begin and a virtual desktop displayed after a brief delay. + +Any popups about not having a license may be safely ignored. This is a known issue with DCV. + +Now, open a terminal and run the following command: `source /etc/profile.d/default_module.sh`. You're now ready to use your GUI-enabled EC2 Instance. + +``` bash +ubuntu@ip-1-2-3-4:~$ source /etc/profile.d/default_module.sh +ubuntu@ip-1-2-3-4:~$ vivado -version +vivado v2024.1 (64-bit) +``` diff --git a/developer_resources/DCV.md b/developer_resources/DCV.md deleted file mode 100644 index 0fe3f62f2..000000000 --- a/developer_resources/DCV.md +++ /dev/null @@ -1,12 +0,0 @@ -# GUI FPGA Development Environment with NICE DCV - -[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) can be used create a virtual desktop on your FPGA Developer AMI instance. - -[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) is a high-performance remote -display protocol that provides customers with a secure way to deliver remote desktops and application streaming -from any cloud or data center to any device, over varying network conditions. - -With NICE DCV and Amazon EC2, customers can run graphics-intensive applications remotely on EC2 instances -and stream the results to simpler client machines, eliminating the need for expensive dedicated workstations. -Customers across a broad range of HPC workloads use NICE DCV for their remote visualization requirements. -Please refer to the [Official DCV documentation](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) diff --git a/docs-rtd/source/RELEASE-NOTES.rst b/docs-rtd/source/RELEASE-NOTES.rst index 8413bd7c8..8932132ff 100644 --- a/docs-rtd/source/RELEASE-NOTES.rst +++ b/docs-rtd/source/RELEASE-NOTES.rst @@ -1,6 +1,17 @@ F2 Developer Kit Release Notes ============================== +.. _v211: + +v2.1.1 +------ + +- Added global register offset for the SDE IP. See `CL_SDE software examples <./hdk/cl/examples/cl-sde/software/src/README.html>`__. +- Added `CL_SDE software exmaple `__ for a user allocated DMA buffer. +- `Documentation <./hdk/docs/List-AFI-on-Marketplace.html>`__` to assist F2 customers with releasing AFIs and AMIs on the AWS Marketplace. +- `Documentation <./developer-resources/Amazon-DCV-Setup-Guide.html>`__ to assist in creating a virtual desktop based on the FPGA Developer AMI running graphics-intensive applications remotely on Amazon EC2 instances. +- Fixed the BW calculation and tolerance calculation in the test_hbm_perf_random in the `cl_mem_perf <./hdk/cl/examples/cl-mem-perf/verif/README.html#test-hbm-perf-randomsv-mem>`__. + .. _v210: v2.1.0 diff --git a/docs-rtd/source/all-links.rst b/docs-rtd/source/all-links.rst index 359f61a51..4d4f9ae62 100644 --- a/docs-rtd/source/all-links.rst +++ b/docs-rtd/source/all-links.rst @@ -70,6 +70,7 @@ General Documentation hdk/docs/AWS-Shell-ERRATA hdk/docs/AWS-Shell-Interface-Specification hdk/docs/Clock-Recipes-User-Guide + hdk/docs/List-AFI-on-Marketplace hdk/docs/on-premise-licensing-help hdk/docs/RTL-Simulation-Guide-for-HDK-Design-Flow hdk/docs/shell-floorplan diff --git a/docs-rtd/source/developer-resources/Amazon-DCV-Setup-Guide.rst b/docs-rtd/source/developer-resources/Amazon-DCV-Setup-Guide.rst new file mode 100644 index 000000000..f17c310ca --- /dev/null +++ b/docs-rtd/source/developer-resources/Amazon-DCV-Setup-Guide.rst @@ -0,0 +1,165 @@ +Using EC2 Instances with a GUI +============================== + +Table of Contents +----------------- + +- `What is Amazon DCV? <#what-is-amazon-dcv>`__ +- `Installing the Amazon DCV Server on an Amazon EC2 + Instance <#installing-the-amazon-dcv-server-on-an-amazon-ec2-instance>`__ + + - `Prerequisites <#prerequisites>`__ + - `Amazon DCV Server Installation <#amazon-dcv-server-installation>`__ + - `Post-Installation Checks <#post-installation-checks>`__ + - `Setting a Password for Your + Sessions <#setting-a-password-for-your-sessions>`__ + - `Setting Security Rules to Allow Traffic on Port + 8443 <#setting-security-rules-to-allow-traffic-on-port-8443>`__ + +- `Amazon DCV Client Installation <#amazon-dcv-client-installation>`__ +- `Basic Session Management <#basic-session-management>`__ + + - `Quick Session Startup <#quick-session-startup>`__ + +What is Amazon DCV? +------------------- + +`Amazon +DCV `__ +is a high-performance remote display protocol that provides customers +with a secure way to deliver remote desktops and application streaming +from any cloud or data center to any device, over varying network +conditions. With Amazon DCV and Amazon EC2, customers can run +graphics-intensive applications remotely on EC2 instances and stream the +results to simpler client machines, eliminating the need for expensive +dedicated workstations. + +This guide helps customers developing for AWS F2 instances create a +virtual desktop running on EC2 instances based on the `FPGA Developer +AMI <../User-Guide-AWS-EC2-FPGA-Development-Kit.html#fpga-developer-ami>`__. +The FPGA Developer AMI has pre-installed tools which are license free. +Combined with DCV, this enables development using Vivado or Vitis' +graphical Integrated Design Environment (IDE), which provides an +intuitive graphical user interface (GUI) to visualize FPGA development +in the cloud. + +Installing the Amazon DCV Server on an Amazon EC2 Instance +---------------------------------------------------------- + +Prerequisites +~~~~~~~~~~~~~ + +1. `Depenency + Installation `__ + + - ⚠️ DO NOT PERFORM STEP 3! Upgrading may impact the stability of + development kit software! + +2. `Protocol + Setup `__ +3. `Driver + Installation `__ + +Amazon DCV Server Installation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the `install procedure described +here `__, +follow steps 1 through 5, 7, and 8. When you get to step 9, do the +following: + +.. code:: bash + + sudo apt --fix-broken install + sudo apt install -y mesa-utils + sudo dpkg -i nice-dcv-gl_2024.0.1096-1_amd64.ubuntu<2404 or 2004>.deb + +Post-Installation Checks +~~~~~~~~~~~~~~~~~~~~~~~~ + +`This section of the post-installation +check `__ +should be run to ensure that all aspects of the setup are working as +expected. + +Setting a Password for Your Sessions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to connect to an Amazon DCV session, you must have a password +set for your user on the EC2 instance. This can be done with this +command: + +.. code:: bash + + sudo passwd $USER + +Setting Security Rules to Allow Traffic on Port 8443 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order for Amazon DCV to communicate with your EC2 instance, TCP and +UDP traffic must be allowed on port 8443. This can be accomplished by +updating the security group you used to launch your instance. + +Amazon DCV Client Installation +------------------------------ + +The `Amazon DCV client `__ should be +installed on your local machine and is used to view your virtual +desktop on your EC2 instance. + +Basic Session Management +------------------------ + +To begin, run the following two commands: + +.. code:: bash + + sudo systemctl isolate multi-user.target (Ubuntu 20.04 only) + sudo systemctl restart dcvserver.service + sudo systemctl restart dcvsessionlauncher.service + +Next, refer to the `session management user guide +here `__. +This guide will provide you with all of the information you need to +customize and manage your Amazon DCV sessions. + +Quick Session Startup +~~~~~~~~~~~~~~~~~~~~~ + +To start a session, use the following command: + +.. code:: bash + + dcv create-session $your_session_number + dcv list-sessions + Session: '1' (owner:ubuntu type:virtual) + +You may give your session any number you like, but no two sessions may +have the same number. + +From this point, you can access your session using the Amazon DCV client +on your local machine or via the `DCV console in your web browser `__. + +Enter ``https://user@ec2_instance_ip_address:8443`` into the +``Hostname or IP Address`` box and click ``Connect``. Next, click "Trust +and Connect". + +Enter the password you set in `Post-Installation +Checks <#post-installation-checks>`__ in the ``Password`` box and click +``Login``. + +At this point, you should see your session begin and a virtual desktop +displayed after a brief delay. + +Any popups about not having a license may be safely ignored. This is a +known issue with DCV. + +Now, open a terminal and run the following command: +``source /etc/profile.d/default_module.sh``. You're now ready to use +your GUI-enabled EC2 Instance. + +.. code:: bash + + ubuntu@ip-1-2-3-4:~$ source /etc/profile.d/default_module.sh + ubuntu@ip-1-2-3-4:~$ vivado -version + vivado v2024.1 (64-bit) diff --git a/docs-rtd/source/developer-resources/DCV.rst b/docs-rtd/source/developer-resources/DCV.rst deleted file mode 100644 index d8cae13a2..000000000 --- a/docs-rtd/source/developer-resources/DCV.rst +++ /dev/null @@ -1,24 +0,0 @@ -GUI FPGA Development Environment with NICE DCV -============================================== - -`NICE -DCV `__ -can be used create a virtual desktop on your FPGA Developer AMI -instance. - -`NICE -DCV `__ -is a high-performance remote display protocol that provides customers -with a secure way to deliver remote desktops and application streaming -from any cloud or data center to any device, over varying network -conditions. - -With NICE DCV and Amazon EC2, customers can run graphics-intensive -applications remotely on EC2 instances and stream the results to simpler -client machines, eliminating the need for expensive dedicated -workstations. Customers across a broad range of HPC workloads use NICE -DCV for their remote visualization requirements. Please refer to the -`Official DCV -documentation `__ - -`Back to Home <../index.html>`__ \ No newline at end of file diff --git a/docs-rtd/source/hdk/README.rst b/docs-rtd/source/hdk/README.rst index 9eebdbf1a..77730980c 100644 --- a/docs-rtd/source/hdk/README.rst +++ b/docs-rtd/source/hdk/README.rst @@ -809,6 +809,7 @@ Additional HDK Documentation docs/shell-floorplan docs/AWS-Fpga-Pcie-Memory-Map docs/RTL-Simulation-Guide-for-HDK-Design-Flow + docs/List-AFI-on-Marketplace docs/on-premise-licensing-help docs/Supported-DDR-Modes docs/Virtual-JTAG-XVC diff --git a/docs-rtd/source/hdk/docs/List-AFI-on-Marketplace.rst b/docs-rtd/source/hdk/docs/List-AFI-on-Marketplace.rst new file mode 100644 index 000000000..254dbdefe --- /dev/null +++ b/docs-rtd/source/hdk/docs/List-AFI-on-Marketplace.rst @@ -0,0 +1,50 @@ +Listing Your AFI on AWS Marketplace +################################### + +The AWS Marketplace enables you to sell your FPGA accelerator solutions to other AWS customers. You can list your AFI (Amazon FPGA Image) bundled with an AMI (Amazon Machine Image) that contains all necessary software components. AWS handles the metering, billing, and payment processing, allowing you to focus on your solution. + +Prerequisites +------------- + +Before listing your AFI on the AWS Marketplace: + +1. Register as an AWS Marketplace seller at https://aws.amazon.com/marketplace/management/ + +2. Prepare your solution: + + * Create your AFI using the supported workflows in the `AWS FPGA Developer Kit <./../README.html#build-accelerator-afi-using-hdk-design-flow>`__ + * Build an `AMI `__ that includes all required software components: + + - Device drivers + - Runtime engines + - Libraries + - Documentation + + * Test your complete solution thoroughly + +3. Initiate AMI scanning via the `AMI Scanning page `__. + +Important Considerations +------------------------ + +* AFIs are always sold bundled with an AMI under a single product code +* AFIs are instance-type specific (e.g., an AFI created for F1 instances cannot be used with F2 instances) + +Submission Process +------------------ + +1. Follow the `process `__ for submitting your product for publication + +2. Review and Verification by AWS Marketplace team + + * Submission creates a case for the AWS Seller Operations team + * AWS Support Engineer processes the product request and publish it to limited state + * Upon completion of limited publishing, the AWS Support Engineer notifies the seller for verification and approval to go live + * Seller conducts testing and validation + +3. Final Publication + + * Seller provides formal AWS Marketplace approval + * Seller Operations team proceeds with public release + +For detailed information about AWS Marketplace policies and best practices, visit the `AWS Marketplace Seller Guide `__. diff --git a/docs-rtd/source/index.rst b/docs-rtd/source/index.rst index 8429d5c5b..88c6d0044 100644 --- a/docs-rtd/source/index.rst +++ b/docs-rtd/source/index.rst @@ -42,7 +42,7 @@ Table of Contents sdk/README vitis/README - developer-resources/DCV.rst + developer-resources/Amazon-DCV-Setup-Guide.rst ERRATA RELEASE-NOTES diff --git a/docs-rtd/source/sitemap.xml b/docs-rtd/source/sitemap.xml index 8b7773259..bf6157b3d 100644 --- a/docs-rtd/source/sitemap.xml +++ b/docs-rtd/source/sitemap.xml @@ -2,185 +2,189 @@ https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/ERRATA.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/RELEASE-NOTES.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/User-Guide-AWS-EC2-FPGA-Development-Kit.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/all-links.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 - https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/developer-resources/DCV.html - 2025-05-14T00:00:01+00:00 + https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/developer-resources/Amazon-DCV-Setup-Guide.html + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/CHECKLIST-BEFORE-BUILDING-CL.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/CL-TEMPLATE/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-dram-hbm-dma/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-dram-hbm-dma/verif/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-mem-perf/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-mem-perf/verif/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-sde/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-sde/software/src/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/cl/examples/cl-sde/verif/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/AWS-CLI-FPGA-Commands.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/AWS-CLK-GEN-spec.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/AWS-Fpga-Pcie-Memory-Map.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/AWS-Shell-ERRATA.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/AWS-Shell-Interface-Specification.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/Clock-Recipes-User-Guide.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 + + + https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/List-AFI-on-Marketplace.html + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/RTL-Simulation-Guide-for-HDK-Design-Flow.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/Supported-DDR-Modes.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/Virtual-JTAG-XVC.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/XDMA-Install.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/on-premise-licensing-help.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/hdk/docs/shell-floorplan.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/index.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/apps/virtual-ethernet/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/apps/virtual-ethernet/doc/SDE-HW-Guide.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/apps/virtual-ethernet/doc/Virtual-Ethernet-Application-Guide.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/docs/F2-Software-Performance-Optimization-Guide.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/docs/Load-Times.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/userspace/fpga_mgmt_examples/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/sdk/userspace/fpga_mgmt_tools/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/vitis/ERRATA.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/vitis/README.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/genindex.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 https://awsdocs-fpga-f2.readthedocs-hosted.com/latest/en/search.html - 2025-05-14T00:00:01+00:00 + 2025-06-26T00:00:01+00:00 \ No newline at end of file diff --git a/docs-rtd/source/spelling_wordlist.txt b/docs-rtd/source/spelling_wordlist.txt index 8404dc5c6..4d7807aed 100644 --- a/docs-rtd/source/spelling_wordlist.txt +++ b/docs-rtd/source/spelling_wordlist.txt @@ -271,3 +271,5 @@ GiB TiB uncomment github +Startup +popups diff --git a/hdk/cl/examples/cl_mem_perf/verif/tests/cl_mem_perf_utils.svh b/hdk/cl/examples/cl_mem_perf/verif/tests/cl_mem_perf_utils.svh index 76011f6c5..5186e8690 100644 --- a/hdk/cl/examples/cl_mem_perf/verif/tests/cl_mem_perf_utils.svh +++ b/hdk/cl/examples/cl_mem_perf/verif/tests/cl_mem_perf_utils.svh @@ -184,14 +184,24 @@ task print_cl_hbm_perf_kernel_bandwidth_performance(logic [31:0] selected_channe tb.peek_ocl(.addr(`RD_TIMER_HI_REG), .data(rd_timer[63:32])); $display("RD_TIMER value = %016d", rd_timer); - wr_bw = (wr_timer == 0) ? 0 : ((wr_cyc_count * (axlen + 1) * $countones(selected_channels))/(wr_timer * 4.0)); - rd_bw = (rd_timer == 0) ? 0 : ((rd_cyc_count * (axlen + 1) * $countones(selected_channels))/(rd_timer * 4.0)); + // BW Calculation Notes + // wr_cyc_count increments by $countones(selecte_channels) + // bus width is 32B (256b), + // axlen is burst len - 1 + // cycle cycle ~4ns + wr_bw = (wr_timer == 0) ? 0 : ((wr_cyc_count * (axlen + 1) * 32) / (wr_timer * 4.0)); + rd_bw = (rd_timer == 0) ? 0 : ((rd_cyc_count * (axlen + 1) * 32) / (rd_timer * 4.0)); + $display("=======PERFORMANCE INFO============="); $display("Write BW = %-0.2f GB/s", wr_bw); $display("Read BW = %-0.2f GB/s", rd_bw); - expected_wr_bandwidth = (400.0 * $pow( $countones(selected_channels), 3 ) / $pow( 32.0, 3 )); - expected_rd_bandwidth = (340.0 * $pow( $countones(selected_channels), 3 ) / $pow( 32.0, 3 )); + // Starting from theoretical max of bus x channels = 450GB/s + // Adjust by number of used channels + // Adjust by axlen , wraddr overhead cycles + // 0.90 toleraance + expected_wr_bandwidth = 0.90 * (450.0 * (axlen/ (axlen+1)) * ($countones(selected_channels) / 32.0)); + expected_rd_bandwidth = 0.90 * (350.0 * ($countones(selected_channels) / 32.0)); if (wr_bw < expected_wr_bandwidth) begin $error("Write Bandwidth of %3.1f is below %3.1f GB/s", wr_bw, expected_wr_bandwidth); diff --git a/hdk/cl/examples/cl_sde/software/runtime/Makefile b/hdk/cl/examples/cl_sde/software/runtime/Makefile index a2ef29ea1..3c88a269d 100644 --- a/hdk/cl/examples/cl_sde/software/runtime/Makefile +++ b/hdk/cl/examples/cl_sde/software/runtime/Makefile @@ -30,14 +30,17 @@ STATS_PERIOD = 0 INCLUDES=-I$(SDE_LIB_DIR) -I$(SDK_USERSPACE_DIR)/include -I$(SDK_USERSPACE_DIR)/fpga_libs/fpga_mgmt LDFLAGS = -L$(SDK_USERSPACE_DIR)/lib/so LDLIBS = -lfpga_mgmt -OPT=-DFPGA_ALLOW_NON_ROOT -DCONFIG_LOGLEVEL=1 +# Global offsets are available if your design has an offset register set compared to the original CL_SDE example. +GLOBAL_SDE_OFFSET=-DGLOBAL_SDE_OFFSET=0x0 +GLOBAL_ATG_OFFSET=-DGLOBAL_ATG_OFFSET=0x0 +OPT=-DFPGA_ALLOW_NON_ROOT -DCONFIG_LOGLEVEL=1 $(GLOBAL_SDE_OFFSET) $(GLOBAL_ATG_OFFSET) CFLAGS=$(OPT) -g -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) SRC=$(SDE_LIB_DIR)/sde_hw_ctrl.c $(SDE_LIB_DIR)/sde_dma_buffer.c $(SDE_LIB_DIR)/sde_mgmt.c $(SDE_LIB_DIR)/sde_utility.c $(SDE_LIB_DIR)/sde_mem.c -SDE_EXAMPLES = sde_c2h_perf_test sde_h2c_perf_test sde_c2h_simple sde_h2c_simple sde_loopback_simple +SDE_EXAMPLES = sde_c2h_perf_test sde_h2c_perf_test sde_c2h_simple sde_h2c_simple sde_loopback_simple sde_c2h_user_buffers .PHONY: all clean -all: check_env ve_install sde_examples ve_run +all: check_env ve_install sde_examples ve_run ve_install: check_env $(APP_SCRIPTS_DIR)/virtual_ethernet_install.py $(APP_INSTALL_DIR) @@ -73,3 +76,6 @@ sde_h2c_simple: $(SDE_EXAMPLE_DIR)/sde_h2c_simple.c $(SRC) check_env sde_loopback_simple: $(SDE_EXAMPLE_DIR)/sde_loopback_simple.c $(SRC) check_env gcc $< $(SRC) -o $@ -mavx2 $(LDFLAGS) $(LDLIBS) $(CFLAGS) + +sde_c2h_user_buffers: $(SDE_EXAMPLE_DIR)/sde_c2h_user_buffers.c $(SRC) check_env + gcc $< $(SRC) -o $@ -mavx2 $(LDFLAGS) $(LDLIBS) $(CFLAGS) diff --git a/hdk/cl/examples/cl_sde/software/src/README.md b/hdk/cl/examples/cl_sde/software/src/README.md index e3409faed..a19766010 100644 --- a/hdk/cl/examples/cl_sde/software/src/README.md +++ b/hdk/cl/examples/cl_sde/software/src/README.md @@ -11,6 +11,7 @@ This directory contains example applications demonstrating the usage of the Stre - `slot_id` (int): Specifies the FPGA image slot for subsequent SDE library operations. - `direction` (enum): Defines the data transfer direction (C2H: Card-to-Host, H2C: Host-to-Card, LOOPBACK). Determines which subsystem buffers are allocated. - `packet_size` (size_t): Defines the buffer size in bytes for data transfers with the CL_SDE. + - `layout` (enum): Defines the DMA Buffers used for DMA (SINGLE: each buffer descriptor represents the same buffer that is repeatedly used, MULTI: each buffer descriptor represents a unique buffer, USER_MANAGED: buffers are allocated and managed by the user and each descriptor points to a unique buffer) - **Returns**: 0 on success, non-zero value on error. * **`sde_mgmt_init_and_cfg`** @@ -36,6 +37,14 @@ This directory contains example applications demonstrating the usage of the Stre - `subsystem` (enum): Specifies the subsystem to check (C2H: Card-to-Host, H2C: Host-to-Card). - **Returns**: 0 if no errors detected, non-zero value indicating specific error condition. +* **`sde_mgmt_set_dma_buffers** + Sets the DMA buffers to buffers allocated by the user. + - `slot_id` (int): Identifies the FPGA image slot containing the CL_SDE. + - `subsystem` (enum): Specifies the subsystem to check (C2H: Card-to-Host, H2C: Host-to-Card). + - `sde_buffers` (struct sde_buffer*): Specifies the array of user buffers to be used by the CL_SDE. + - `num_buffers` (size_t): Specifies the number of user buffers passed in. + - **Returns**: 0 on success, non-zero value on error. + * **`sde_mgmt_cfg`** Configures the CL_SDE using parameters established during initialization. - **Returns**: 0 on success, non-zero value on error. @@ -60,6 +69,12 @@ This directory contains example applications demonstrating the usage of the Stre - `size` (size_t): Total number of bytes to be read from the card. - **Returns**: 0 on success, non-zero value on error. +* **`sde_mgmt_read_md`** + Reads the write-back next valid writeback-metadata struct. This struct specifies if status of a Card-to-Host DMA transfer including the size of the transferred data. + - `slot_id` (int): Identifies the FPGA image slot containing the CL_SDE. + - `md` (struct sde_md*): Valid pointer that will be populated with the metadata for the Card-to-Host DMA transfer. + - **Returns**: 0 on success, non-zero value on error or timeout. + * **`sde_mgmt_read_data`** Transfers data from internal DMA buffers to the user-provided buffer. - `slot_id` (int): Identifies the FPGA image slot containing the CL_SDE. @@ -173,6 +188,16 @@ Build the sde_examples from the $(HDK)/cl/examples/cl_sde/software/runtime direc - `sudo ./sde_h2c_perf_test ` - `sudo ./sde_h2c_perf_test 1 131072 0` +### 6. Card-to-Host Transfer with User Managed Buffers +- File: `sde_c2h_user_buffers` +- Demonstrates basic card-to-host data transfer with user managed buffers +- Configures SDE for reading data into a user-managed DMA buffer + +#### Usage + +- `sudo ./sde_c2h_user_buffers ` +- `sudo ./sde_c2h_user_buffers 1 4096 0` + ## Performance Metrics Each example provides performance metrics: diff --git a/hdk/cl/examples/cl_sde/software/src/sde_c2h_perf_test.c b/hdk/cl/examples/cl_sde/software/src/sde_c2h_perf_test.c index 90d83c76c..1f1d9e0b8 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_c2h_perf_test.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_c2h_perf_test.c @@ -50,12 +50,10 @@ #define C2H_DESC_COALESCE_CNT 32 -void print_timing(double start_time, int pkt_size, size_t num_packets); - int main(int argc, char **argv) { struct sde_parameters params; - double start_time; + double start_time, end_time; int ret = 0; ret = sde_parse_args(argc, argv, ¶ms, "sde_c2h_perf_test"); @@ -100,7 +98,8 @@ int main(int argc, char **argv) { num_packets+=num_descriptors; } - print_timing(start_time, params.pkt_size, num_packets); + end_time = sde_get_curr_time(); + print_timing(start_time, end_time, params.pkt_size, num_packets, SDE_EXAMPLE_DIR_C2H); cleanup: ret |= sde_mgmt_close(params.slot_id); @@ -116,16 +115,3 @@ int main(int argc, char **argv) { return ret; } - -void print_timing(double start_time, int pkt_size, size_t num_packets) { - double curr_time = sde_get_curr_time(); - double total_run_time = (curr_time - start_time); - double c2h_mpps = (((double)num_packets)/1e6) / total_run_time; - double c2h_bw = (((double) num_packets * (double) pkt_size)/1e9)/total_run_time ; - - printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, curr_time); - printf ("Total Run time: %.2f secs\n", total_run_time); - printf ("Total Number of Packets: %ld\n", num_packets); - printf ("c2h_mpps: %.3f MPPS\n", c2h_mpps); - printf ("c2h BW: %.3f GB/s\n", c2h_bw); -} diff --git a/hdk/cl/examples/cl_sde/software/src/sde_c2h_simple.c b/hdk/cl/examples/cl_sde/software/src/sde_c2h_simple.c index e165130f0..4266f5bd7 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_c2h_simple.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_c2h_simple.c @@ -56,12 +56,10 @@ #define C2H_DESC_COALESCE_CNT 32 -void print_timing(double start_time, int pkt_size, size_t num_packets); - int main(int argc, char **argv) { struct sde_parameters params; - double start_time; + double start_time, end_time; int ret = 0; ret = sde_parse_args(argc, argv, ¶ms, "sde_c2h_simple"); @@ -100,7 +98,8 @@ int main(int argc, char **argv) { num_packets+=num_descriptors; } - print_timing(start_time, params.pkt_size, num_packets); + end_time = sde_get_curr_time(); + print_timing(start_time, end_time, params.pkt_size, num_packets, SDE_EXAMPLE_DIR_C2H); cleanup: free(data_ptr); @@ -117,16 +116,3 @@ int main(int argc, char **argv) { return ret; } - -void print_timing(double start_time, int pkt_size, size_t num_packets) { - double curr_time = sde_get_curr_time(); - double total_run_time = (curr_time - start_time); - double c2h_mpps = (((double)num_packets)/1e6) / total_run_time; - double c2h_bw = (((double) num_packets * (double) pkt_size)/1e9)/total_run_time ; - - printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, curr_time); - printf ("Total Run time: %.2f secs\n", total_run_time); - printf ("Total Number of Packets: %ld\n", num_packets); - printf ("c2h_mpps: %.3f MPPS\n", c2h_mpps); - printf ("c2h BW: %.3f GB/s\n", c2h_bw); -} diff --git a/hdk/cl/examples/cl_sde/software/src/sde_c2h_user_buffers.c b/hdk/cl/examples/cl_sde/software/src/sde_c2h_user_buffers.c new file mode 100644 index 000000000..0d01cfc90 --- /dev/null +++ b/hdk/cl/examples/cl_sde/software/src/sde_c2h_user_buffers.c @@ -0,0 +1,157 @@ +/* + * Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +/* + * This example demonstrates how to configure the SDE for card to host DMA using user-managed DMA buffers instead of + * library-allocated buffers. It shows how to allocate a hugepage, partition it into multiple buffers, and use them for DMA operations. + * + * 0. Prerequisites: + * - Must be run on an F2 instance with an FPGA at the FPGA image slot matching the command-line option + * - Source the sdk by navigating to the root of this repo and running `source ./sdk_setup.sh` + * - The CL_SDE must be loaded on the FPGA that matches the slot_id passed to this program + * - The APP_PF of the FPGA card must have bus mastering enabled (check with `lspci -d 1d0f:f002 -vv`, + * should show `BusMaster+`) + * - To enable bus mastering if needed: `sudo setpci -s 4.w=6` + * - Hugepages must be allocated as this example uses them for DMA buffer management + * - To allocate hugepages: `sudo sysctl -w vm.nr_hugepages=` + * + * 1. Compile the example: + * `make sde_c2h_user_buffers` + * + * 2. Run the example: + * `sudo ./sde_c2h_user_buffers ` + * + * 3. Example output: + * `sudo ./sde_c2h_user_buffers 1 1024 0` + * Start Time = 0, Current Time = 0 + * Total Run time: 0 secs + * Total Number of Packets: 1 + * c2h_mpps: 0 MPPS + * c2h BW: 0 GB/s + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define C2H_DESC_COALESCE_CNT 32 +#define NUM_BUFFERS 64 +#define HUGEPAGE_SIZE 2097152 + +int partition_user_buffers(struct sde_buffer* sde_buffers, size_t num_buffers, size_t pkt_size, uint64_t physical_address); + +int main(int argc, char **argv) { + + struct sde_parameters params; + double start_time, end_time; + int ret = 0; + + ret = sde_parse_args(argc, argv, ¶ms, "sde_c2h_simple"); + fail_on(ret, err, "Unable to parse arguments"); + + fail_on_with_code(params.slot_id >= FPGA_SLOT_MAX, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Invalid slot_id %d", params.slot_id); + + uint64_t dma_buffer_va = 0; + uint64_t dma_buffer_pa = 0; + ret = fpga_dma_mem_map_huge(&dma_buffer_va, &dma_buffer_pa); + size_t dma_buffer_size = HUGEPAGE_SIZE; + + struct sde_buffer sde_buffers[NUM_BUFFERS]; + ret = partition_user_buffers(sde_buffers, NUM_BUFFERS, params.pkt_size, dma_buffer_pa); + + ret = sde_mgmt_init(params.slot_id, SDE_EXAMPLE_DIR_C2H, params.pkt_size, SDE_BUFFER_USER_MANAGED); + fail_on(ret, err, "failed to init sde_mgmt"); + + ret = sde_mgmt_reset(params.slot_id); + fail_on(ret, err, "failed to reset sde_mgmt"); + + ret = sde_mgmt_set_dma_buffers(params.slot_id, SDE_SUBSYSTEM_C2H, sde_buffers, NUM_BUFFERS); + + ret = sde_mgmt_cfg(params.slot_id); + fail_on(ret, err, "failed to cfg sde_mgmt"); + + size_t num_descriptors = params.pkt_cnt < C2H_DESC_COALESCE_CNT ? params.pkt_cnt : C2H_DESC_COALESCE_CNT; + size_t num_packets = 0; + + // Starting the timer after the SDE is already configured although it is not yet running. + start_time = sde_get_curr_time(); + struct sde_md c2h_metadata; + while (num_packets < (params.pkt_cnt)) { + ret = sde_mgmt_check_status(params.slot_id, SDE_SUBSYSTEM_C2H); + fail_on(ret, cleanup, "Error checking status"); + + // Start the card to host DMA (read) by posting the descriptors for the buffers. + size_t descriptors_posted = num_descriptors; + ret = sde_mgmt_post_desc(params.slot_id, SDE_SUBSYSTEM_C2H, &descriptors_posted); + fail_on(ret, cleanup, "Error posting descriptors");; + + for (size_t i = 0; i < descriptors_posted; ++i) { + ret = sde_mgmt_read_md(params.slot_id, &c2h_metadata); + fail_on(ret, cleanup, "Error reading metadata for each descriptor written."); + fail_on(!c2h_metadata.valid, cleanup, "c2h_metadata is not valid"); + // user_bits and eop can also be checked before processing the data in the buffer. + } + + num_packets+=descriptors_posted; + } + + end_time = sde_get_curr_time(); + print_timing(start_time, end_time, params.pkt_size, num_packets, SDE_EXAMPLE_DIR_C2H); + +cleanup: + ret |= sde_mgmt_close(params.slot_id); + ret |= fpga_dma_mem_unmap(&dma_buffer_va, dma_buffer_size); + +err: + if (ret) { + printf("Error: (%d) %s\n", ret, sde_mgmt_strerror(ret)); + const char *long_help = sde_mgmt_strerror_long(ret); + if (long_help) { + printf("%s\n", long_help); + } + } + + return ret; +} + +int partition_user_buffers(struct sde_buffer* sde_buffers, size_t num_buffers, size_t pkt_size, uint64_t physical_address) { + int ret = 0; + fail_on_with_code(sde_buffers == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Invalid buffer pointer"); + fail_on_with_code(num_buffers == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Invalid number of buffers"); + fail_on_with_code(pkt_size == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Invalid packet size"); + fail_on_with_code(physical_address == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Invalid physical address"); + fail_on_with_code(pkt_size * num_buffers > HUGEPAGE_SIZE, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Packet size * number of buffers exceeds hugepage size"); + + memset(sde_buffers, 0, sizeof(struct sde_buffer) * num_buffers); + + // Partition the hugepage into NUM_BUFFERS buffers. + for (size_t i = 0; i < num_buffers; ++i) { + sde_buffers[i].data_pa = (pkt_size * i) + physical_address; + sde_buffers[i].length = pkt_size; + } + +err: + return ret; +} diff --git a/hdk/cl/examples/cl_sde/software/src/sde_h2c_perf_test.c b/hdk/cl/examples/cl_sde/software/src/sde_h2c_perf_test.c index 90eec1ad3..8ecc293ef 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_h2c_perf_test.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_h2c_perf_test.c @@ -50,12 +50,10 @@ #define H2C_DESC_COALESCE_CNT 32 -void print_timing(double start_time, int pkt_size, size_t num_packets); - int main(int argc, char **argv) { struct sde_parameters params; - double start_time; + double start_time, end_time; int ret = 0; ret = sde_parse_args(argc, argv, ¶ms, "sde_h2c_perf_test"); @@ -100,7 +98,8 @@ int main(int argc, char **argv) { num_packets+=num_descriptors; } - print_timing(start_time, params.pkt_size, num_packets); + end_time = sde_get_curr_time(); + print_timing(start_time, end_time, params.pkt_size, num_packets, SDE_EXAMPLE_DIR_H2C); cleanup: ret |= sde_mgmt_close(params.slot_id); @@ -116,16 +115,3 @@ int main(int argc, char **argv) { return ret; } - -void print_timing(double start_time, int pkt_size, size_t num_packets) { - double curr_time = sde_get_curr_time(); - double total_run_time = (curr_time - start_time); - double h2c_mpps = (((double)num_packets)/1e6) / total_run_time; - double h2c_bw = (((double) num_packets * (double) pkt_size)/1e9)/total_run_time ; - - printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, curr_time); - printf ("Total Run time: %.2f secs\n", total_run_time); - printf ("Total Number of Packets: %ld\n", num_packets); - printf ("h2c_mpps: %.3f MPPS\n", h2c_mpps); - printf ("h2c BW: %.3f GB/s\n", h2c_bw); -} \ No newline at end of file diff --git a/hdk/cl/examples/cl_sde/software/src/sde_h2c_simple.c b/hdk/cl/examples/cl_sde/software/src/sde_h2c_simple.c index 36d434454..7c25192c3 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_h2c_simple.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_h2c_simple.c @@ -53,12 +53,10 @@ #define H2C_DESC_COALESCE_CNT 32 -void print_timing(double start_time, int pkt_size, size_t num_packets); - int main(int argc, char **argv) { struct sde_parameters params; - double start_time; + double start_time, end_time; int ret = 0; ret = sde_parse_args(argc, argv, ¶ms, "sde_h2c_simple"); @@ -99,7 +97,8 @@ int main(int argc, char **argv) { num_packets+=num_descriptors; } - print_timing(start_time, params.pkt_size, num_packets); + end_time = sde_get_curr_time(); + print_timing(start_time, end_time, params.pkt_size, num_packets, SDE_EXAMPLE_DIR_H2C); cleanup: free(data_ptr); @@ -116,16 +115,3 @@ int main(int argc, char **argv) { return ret; } - -void print_timing(double start_time, int pkt_size, size_t num_packets) { - double curr_time = sde_get_curr_time(); - double total_run_time = (curr_time - start_time); - double h2c_mpps = (((double)num_packets)/1e6) / total_run_time; - double h2c_bw = (((double) num_packets * (double) pkt_size)/1e9)/total_run_time ; - - printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, curr_time); - printf ("Total Run time: %.2f secs\n", total_run_time); - printf ("Total Number of Packets: %ld\n", num_packets); - printf ("h2c_mpps: %.3f MPPS\n", h2c_mpps); - printf ("h2c BW: %.3f GB/s\n", h2c_bw); -} diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.c b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.c index 9b84d50bc..529d98c6f 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.c @@ -32,13 +32,15 @@ int sde_dma_buffer_init(struct sde_dma_buffer* dma_buffer, enum SDE_BUFFER_LAYOU dma_buffer->subsystem = subsystem; dma_buffer->pkt_size = pkt_size; dma_buffer->desc_element_size = subsystem == SDE_SUBSYSTEM_C2H ? sizeof(struct c2h_desc) : sizeof(struct h2c_desc); - dma_buffer->mem = mem; dma_buffer->ctrl = ctrl; - ret = sde_mem_get_desc(dma_buffer->mem, dma_buffer->subsystem, &dma_buffer->desc_va, &dma_buffer->desc_pa); + ret = sde_mem_get_desc(mem, dma_buffer->subsystem, &dma_buffer->desc_va, &dma_buffer->desc_pa); fail_on(ret, err, "Failed to get descriptor"); - ret = sde_mem_get_buffers(dma_buffer->mem, dma_buffer->subsystem, &dma_buffer->buffers, &dma_buffer->num_buffers); + if (dma_buffer->layout != SDE_BUFFER_USER_MANAGED) { + dma_buffer->num_desc = SDE_NUM_DESC; + ret = sde_mem_get_buffers(mem, dma_buffer->subsystem, &dma_buffer->buffers, &dma_buffer->num_buffers); + } err: return ret; @@ -70,14 +72,35 @@ static uint32_t get_next_start_dw(uint32_t curr_start_dw) { return(next_start_dw); } +int sde_dma_buffer_set_dma_buffers(struct sde_dma_buffer* dma_buffer, struct sde_buffer* sde_buffers, size_t num_buffers) { + int ret = 0; + + fail_on_with_code(dma_buffer == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "dma_buffer is NULL"); + fail_on_with_code(sde_buffers == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "sde_buffer_descriptors is NULL"); + fail_on_with_code(num_buffers == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "num_descriptors is 0"); + fail_on_with_code(dma_buffer->layout != SDE_BUFFER_USER_MANAGED, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "dma_buffer->layout is not SDE_BUFFER_USER_MANAGED"); + + log_debug("dma_buffer->subsystem = %d", dma_buffer->subsystem); + log_debug("dma_buffer->num_buffers = %ld", dma_buffer->num_buffers); + + dma_buffer->buffers = sde_buffers; + dma_buffer->num_buffers = num_buffers; + dma_buffer->num_desc = num_buffers; + +err: + return ret; +} + int sde_dma_init_desc_buffer(struct sde_dma_buffer* dma_buffer) { int ret = 0; fail_on_with_code(dma_buffer == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "dma_buffer is NULL"); + fail_on_with_code(dma_buffer->num_buffers == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "dma_buffer->num_buffers is 0"); + fail_on_with_code(dma_buffer->buffers == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "dma_buffer->buffers is NULL"); if (dma_buffer->subsystem == SDE_SUBSYSTEM_C2H) { struct c2h_desc* desc = (struct c2h_desc*) dma_buffer->desc_va; - for (size_t i = 0; i < SDE_NUM_DESC; ++i) { + for (size_t i = 0; i < dma_buffer->num_desc; ++i) { size_t buffer_index = i % dma_buffer->num_buffers; desc[i].length = dma_buffer->buffers[buffer_index].length; desc[i].phys_addr = dma_buffer->buffers[buffer_index].data_pa; @@ -88,7 +111,7 @@ int sde_dma_init_desc_buffer(struct sde_dma_buffer* dma_buffer) { uint32_t next_dw = get_next_start_dw(current_dw); struct h2c_desc* desc = (struct h2c_desc*) dma_buffer->desc_va; - for (size_t i = 0; i < SDE_NUM_DESC; ++i) { + for (size_t i = 0; i < dma_buffer->num_desc; ++i) { size_t buffer_index = i % dma_buffer->num_buffers; desc[i].length = dma_buffer->buffers[buffer_index].length; desc[i].phys_addr = dma_buffer->buffers[buffer_index].data_pa; @@ -105,7 +128,6 @@ int sde_dma_init_desc_buffer(struct sde_dma_buffer* dma_buffer) { } } - dma_buffer->num_desc = SDE_NUM_DESC; dma_buffer->curr_desc_index_to_post = 0; dma_buffer->current_buffer_index = 0; diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.h index 1d8ccbcf1..38bbc07f4 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_dma_buffer.h @@ -30,7 +30,6 @@ struct sde_dma_buffer { enum SDE_BUFFER_LAYOUT layout; enum SDE_SUBSYSTEM subsystem; - struct sde_mem* mem; struct sde_hw_ctrl* ctrl; uint64_t desc_va; @@ -51,6 +50,7 @@ struct sde_dma_buffer { int sde_dma_buffer_init(struct sde_dma_buffer* dma_buffer, enum SDE_BUFFER_LAYOUT layout, enum SDE_SUBSYSTEM subsystem, size_t pkt_size, struct sde_mem* mem, struct sde_hw_ctrl* ctrl); int sde_dma_buffer_close(struct sde_dma_buffer* dma_buffer); +int sde_dma_buffer_set_dma_buffers(struct sde_dma_buffer* dma_buffer, struct sde_buffer* sde_buffers, size_t num_buffers); int sde_dma_init_desc_buffer(struct sde_dma_buffer* dma_buffer); int sde_dma_post_desc(struct sde_dma_buffer* dma_buffer, size_t* num_desc); int sde_dma_read_data(struct sde_dma_buffer* dma_buffer, void* data, size_t size); diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_enums.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_enums.h index d2c914dab..de27846c4 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_enums.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_enums.h @@ -15,6 +15,8 @@ #pragma once +#include + enum SDE_EXAMPLE_DIR { SDE_EXAMPLE_DIR_C2H, SDE_EXAMPLE_DIR_H2C, @@ -23,7 +25,8 @@ enum SDE_EXAMPLE_DIR { enum SDE_BUFFER_LAYOUT { SDE_BUFFER_LAYOUT_SINGLE, - SDE_BUFFER_LAYOUT_MULTI + SDE_BUFFER_LAYOUT_MULTI, + SDE_BUFFER_USER_MANAGED, }; enum SDE_SUBSYSTEM { @@ -39,6 +42,13 @@ enum SDE_ERROR { SDE_METADATA_VALID_TIMEOUT = 0x1004, }; +struct sde_buffer { + uint64_t data_va; + uint64_t data_pa; + uint32_t length; + uint32_t alloc_length; +}; + #define SDE_ERR2STR(error) \ ((error) == SDE_UNEXPECTED_REGISTER_VALUE) ? "unexpected-register-value" : \ ((error) == SDE_ALLOCATION_FAILURE) ? "allocation-failure" : \ diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_hw_regs.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_hw_regs.h index bf07e1308..a1b1a6130 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_hw_regs.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_hw_regs.h @@ -77,21 +77,23 @@ struct h2c_status { #define STATUS_WB_ERR_MASK 0x4 #define STATUS_WB_ERR_SHIFT 2 -#define SDE_ATG_TX_CTRL_REG 0x0 +/* ATG and General Configuration Registers */ +#define SDE_ATG_CTRL_BASE_REG GLOBAL_ATG_OFFSET +#define SDE_ATG_TX_CTRL_REG (0x0 + SDE_ATG_CTRL_BASE_REG) -#define SDE_ATG0_DATA_REG 0x4 -#define SDE_ATG0_SIZE_REG 0x8 +#define SDE_ATG0_DATA_REG (0x4 + SDE_ATG_CTRL_BASE_REG) +#define SDE_ATG0_SIZE_REG (0x8 + SDE_ATG_CTRL_BASE_REG) -#define SDE_ATG1_DATA_REG 0xC -#define SDE_ATG1_SIZE_REG 0x10 +#define SDE_ATG1_DATA_REG (0xC + SDE_ATG_CTRL_BASE_REG) +#define SDE_ATG1_SIZE_REG (0x10 + SDE_ATG_CTRL_BASE_REG) -#define SDE_RX_CONTROL_REG 0x180 +#define SDE_RX_CONTROL_REG (0x180 + SDE_ATG_CTRL_BASE_REG) #define RCR_LOOPBACK_EN_MASK 0x0000001 #define RCR_LOOPBACK_EN_SHIFT 0 #define RCR_BACKPRESSURE_EN_MASK 0x00000010 #define RCR_BACKPRESSURE_EN_SHIFT 4 -#define SDE_GENERAL_PURPOSE_CFG_REG 0x2000 +#define SDE_GENERAL_PURPOSE_CFG_REG (0x2000 + SDE_ATG_CTRL_BASE_REG) #define GPCR_SDE_RESET_MASK 0x00000001 #define GPCR_SDE_RESET_SHIFT 0 #define GPCR_LOOPBACK_MASK 0x00000002 @@ -104,24 +106,25 @@ struct h2c_status { /* SDE REGISTERS */ /* PF0, BAR4*/ +#define SDE_BASE_ADDR GLOBAL_SDE_OFFSET /* C2H Descriptor RAM */ -#define SDE_C2H_DESC_RAM_MAP_OFFSET 0x0 +#define SDE_C2H_DESC_RAM_MAP_OFFSET (0x0 + SDE_BASE_ADDR) /* H2C Descriptor RAM */ -#define SDE_H2C_DESC_RAM_MAP_OFFSET 0x1000 +#define SDE_H2C_DESC_RAM_MAP_OFFSET (0x1000 + SDE_BASE_ADDR) /* Configuration Registers */ -#define SDE_CONFIG_REG_MAP_OFFSET 0x3000 +#define SDE_CONFIG_REG_MAP_OFFSET (0x3000 + SDE_BASE_ADDR) /* PCIS CSRs */ #define SDE_PCIS_CSRS_REGS_OFFSET (0x0 + SDE_CONFIG_REG_MAP_OFFSET) -#define SDE_SW_RESET_REG (0x0 + PCIS_CSRS_REGS_OFFSET) +#define SDE_SW_RESET_REG (0x0 + SDE_PCIS_CSRS_REGS_OFFSET) #define SWRR_SW_RST_MASK 0x1 #define SWRR_SW_RST_SHIFT 0 -#define SDE_INFO_REG (0x4 + PCIS_CSRS_REGS_OFFSET) +#define SDE_INFO_REG (0x4 + SDE_PCIS_CSRS_REGS_OFFSET) #define SDEIR_C2H_PRESENT_MASK 0x1 #define SDEIR_C2H_PRESENT_SHIFT 0 #define SDEIR_H2C_PRESENT_MASK 0x10000 diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.c b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.c index c519e931b..9ad0b5860 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.c @@ -112,7 +112,7 @@ int sde_mem_init(struct sde_mem* mem, enum SDE_BUFFER_LAYOUT c2h_layout, enum SD mem->c2h_layout = c2h_layout; mem->c2h_num_buffers = 0; - if (direction != SDE_EXAMPLE_DIR_H2C) { + if (direction != SDE_EXAMPLE_DIR_H2C && c2h_layout != SDE_BUFFER_USER_MANAGED) { ret = dma_buffer_init(c2h_layout, pkt_size, &mem->c2h_buffers, &mem->c2h_num_buffers); fail_on(ret, err, "dma_buffer_init c2h failed"); } @@ -120,7 +120,7 @@ int sde_mem_init(struct sde_mem* mem, enum SDE_BUFFER_LAYOUT c2h_layout, enum SD mem->h2c_layout = h2c_layout; mem->h2c_num_buffers = 0; - if (direction != SDE_EXAMPLE_DIR_C2H) { + if (direction != SDE_EXAMPLE_DIR_C2H && h2c_layout != SDE_BUFFER_USER_MANAGED) { ret = dma_buffer_init(h2c_layout, pkt_size, &mem->h2c_buffers, &mem->h2c_num_buffers); fail_on(ret, err, "dma_buffer_init h2c failed"); } diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.h index 78205aa4f..e4f149dee 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mem.h @@ -22,14 +22,6 @@ #include #include #include -#include - -struct sde_buffer { - uint64_t data_va; - uint64_t data_pa; - uint32_t length; - uint32_t alloc_length; -}; struct sde_writeback_mem { uint64_t memory_va; diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.c b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.c index 010a9031f..9fcf1eec2 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.c @@ -51,7 +51,7 @@ struct sde_mgmt { #define SDE_SLOT_MAX 8 static struct sde_mgmt priv_sde_mgmt[SDE_SLOT_MAX]; -int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size) { +int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size, enum SDE_BUFFER_LAYOUT layout) { int ret = 0; fail_on_with_code(slot_id >= SDE_SLOT_MAX, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "slot_id %d is out of range", slot_id); @@ -61,7 +61,7 @@ int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_siz sde_mgmt->direction = direction; - ret = sde_mem_init(&sde_mgmt->mem, SDE_BUFFER_LAYOUT_MULTI /*c2h_layout*/, SDE_BUFFER_LAYOUT_MULTI /*h2c_layout*/, sde_mgmt->direction, packet_size); + ret = sde_mem_init(&sde_mgmt->mem, layout, layout, sde_mgmt->direction, packet_size); fail_on(ret, err, "failed to init mem"); ret = sde_hw_init(&sde_mgmt->hw_ctrl, slot_id); @@ -74,7 +74,7 @@ int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_siz sde_mgmt->c2h_status = (struct c2h_status*)c2h_status_va; - ret = sde_dma_buffer_init(&sde_mgmt->c2h_buffer, SDE_BUFFER_LAYOUT_SINGLE, SDE_SUBSYSTEM_C2H, packet_size, &sde_mgmt->mem, &sde_mgmt->hw_ctrl); + ret = sde_dma_buffer_init(&sde_mgmt->c2h_buffer, layout, SDE_SUBSYSTEM_C2H, packet_size, &sde_mgmt->mem, &sde_mgmt->hw_ctrl); fail_on(ret, err, "failed to init c2h_buffer"); uint64_t h2c_status_va; @@ -84,7 +84,7 @@ int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_siz sde_mgmt->h2c_status = (struct h2c_status*) h2c_status_va; - ret = sde_dma_buffer_init(&sde_mgmt->h2c_buffer, SDE_BUFFER_LAYOUT_SINGLE, SDE_SUBSYSTEM_H2C, packet_size, &sde_mgmt->mem, &sde_mgmt->hw_ctrl); + ret = sde_dma_buffer_init(&sde_mgmt->h2c_buffer, layout, SDE_SUBSYSTEM_H2C, packet_size, &sde_mgmt->mem, &sde_mgmt->hw_ctrl); fail_on(ret, err, "failed to init h2c_buffer"); uint64_t md_ring_va; @@ -103,7 +103,7 @@ int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_siz int sde_mgmt_init_and_cfg(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size) { int ret = 0; - ret = sde_mgmt_init(slot_id, direction, packet_size); + ret = sde_mgmt_init(slot_id, direction, packet_size, SDE_BUFFER_LAYOUT_MULTI); fail_on(ret, err, "failed to init sde_mgmt"); ret = sde_mgmt_reset(slot_id); @@ -187,6 +187,28 @@ int sde_mgmt_check_status(int slot_id, enum SDE_SUBSYSTEM subsystem) { return ret; } +int sde_mgmt_set_dma_buffers(int slot_id, enum SDE_SUBSYSTEM subsystem, struct sde_buffer* sde_buffers, size_t num_buffers) { + int ret = 0; + + fail_on_with_code(slot_id >= SDE_SLOT_MAX, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "slot_id %d is out of range", slot_id); + fail_on_with_code(num_buffers > SDE_NUM_DESC, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "num_buffers %ld is out of range", num_buffers); + fail_on_with_code(num_buffers == 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "num_buffers is 0"); + fail_on_with_code(sde_buffers == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "sde_buffers is NULL"); + + struct sde_mgmt *sde_mgmt = &priv_sde_mgmt[slot_id]; + + if (subsystem == SDE_SUBSYSTEM_C2H) { + ret = sde_dma_buffer_set_dma_buffers(&sde_mgmt->c2h_buffer, sde_buffers, num_buffers); + fail_on(ret, err, "failed to set c2h descriptors"); + } else if (subsystem == SDE_SUBSYSTEM_H2C) { + ret = sde_dma_buffer_set_dma_buffers(&sde_mgmt->h2c_buffer, sde_buffers, num_buffers); + fail_on(ret, err, "failed to set h2c descriptors"); + } + +err: + return ret; +} + static int sde_mgmt_cfg_c2h(struct sde_mgmt *sde_mgmt) { int ret = 0; @@ -356,8 +378,12 @@ int sde_mgmt_start_read(int slot_id, size_t size) { return ret; } -static int sde_mgmt_read_md(struct sde_mgmt* sde_mgmt, struct sde_md* md) { +int sde_mgmt_read_md(int slot_id, struct sde_md* md) { int ret = 0; + fail_on_with_code(slot_id >= SDE_SLOT_MAX, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "slot_id %d is out of range", slot_id); + fail_on_with_code(md == NULL, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "md is NULL"); + + struct sde_mgmt *sde_mgmt = &priv_sde_mgmt[slot_id]; bool done = 0; size_t iters = 0; @@ -401,7 +427,7 @@ int sde_mgmt_read_data(int slot_id, void *data, size_t size) { size_t iter = 0; size_t data_to_read = 0; while (data_read < size) { - ret = sde_mgmt_read_md(sde_mgmt, &md); + ret = sde_mgmt_read_md(slot_id, &md); fail_on(ret, err, "failed to read md"); data_to_read = md.length < (size - data_read) ? md.length : (size - data_read); diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.h index cb63e6889..13f287c33 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_mgmt.h @@ -25,13 +25,14 @@ #include #include -int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size); +int sde_mgmt_init(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size, enum SDE_BUFFER_LAYOUT layout); int sde_mgmt_init_and_cfg(int slot_id, enum SDE_EXAMPLE_DIR direction, size_t packet_size); int sde_mgmt_close(int slot_id); int sde_mgmt_reset(int slot_id); int sde_mgmt_check_status(int slot_id, enum SDE_SUBSYSTEM subsystem); +int sde_mgmt_set_dma_buffers(int slot_id, enum SDE_SUBSYSTEM subsystem, struct sde_buffer* sde_buffers, size_t num_buffers); int sde_mgmt_cfg(int slot_id); int sde_mgmt_wait_desc_credit(int slot_id, enum SDE_SUBSYSTEM subsystem, size_t num_desc); @@ -45,6 +46,7 @@ struct sde_md { }; int sde_mgmt_start_read(int slot_id, size_t size); +int sde_mgmt_read_md(int slot_id, struct sde_md* md); int sde_mgmt_read_data(int slot_id, void *data, size_t size); int sde_mgmt_prepare_write(int slot_id, void *data, size_t size); diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.c b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.c index 29667e2ca..123ee2e57 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -112,3 +111,28 @@ double sde_get_curr_time() { gettimeofday(&curr_time, NULL); return((double) curr_time.tv_sec + ((double) curr_time.tv_usec / 1e6)); } + +void print_timing(double start_time, double end_time, int pkt_size, size_t num_packets, enum SDE_EXAMPLE_DIR test_direction) { + double total_run_time = (end_time - start_time); + double mpps = (((double)num_packets)/1e6) / total_run_time; + double bw = (((double) num_packets * (double) pkt_size)/1e9)/total_run_time ; + + char* str_direction; + switch (test_direction) { + case SDE_EXAMPLE_DIR_C2H: + str_direction = "c2h"; + break; + case SDE_EXAMPLE_DIR_H2C: + str_direction = "h2c"; + break; + case SDE_EXAMPLE_DIR_LOOPBACK: + str_direction = "loopback"; + break; + } + + printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, end_time); + printf ("Total Run time: %.2f secs\n", total_run_time); + printf ("Total Number of Packets: %ld\n", num_packets); + printf ("%s_mpps: %.3f MPPS\n", str_direction, mpps); + printf ("%s BW: %.3f GB/s\n", str_direction, bw); +} diff --git a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.h b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.h index 6be59405d..bb9a18092 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.h +++ b/hdk/cl/examples/cl_sde/software/src/sde_lib/sde_utility.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include @@ -106,3 +107,15 @@ int sde_parse_args(int argc, char **argv, struct sde_parameters* params, const c // //============================================================================================================= double sde_get_curr_time(void); + +//============================================================================================================ +// +// print_timing() : Prints the elapsed time and bandwidth in MPPS and GB/s. +// double start_time : The beginning of the test after configuration is complete. +// double end_time : The end of the test after transfers are complete. +// int pkt_size : The number of bytes transferred with each packet. +// size_t num_packets : The number of packets transferred. +// enum SDE_EXAMPLE_DIR direction : The direction of the example (c2h, h2c, loopback). +// +//============================================================================================================= +void print_timing(double start_time, double end_time, int pkt_size, size_t num_packets, enum SDE_EXAMPLE_DIR direction); diff --git a/hdk/cl/examples/cl_sde/software/src/sde_loopback_simple.c b/hdk/cl/examples/cl_sde/software/src/sde_loopback_simple.c index b0a82f316..04484df1d 100644 --- a/hdk/cl/examples/cl_sde/software/src/sde_loopback_simple.c +++ b/hdk/cl/examples/cl_sde/software/src/sde_loopback_simple.c @@ -53,12 +53,10 @@ #define DESC_COALESCE_CNT 32 -void print_timing(double start_time, int pkt_size, size_t num_packets); - int main(int argc, char **argv) { struct sde_parameters params; - double start_time; + double start_time, end_time; int ret = 0; ret = sde_parse_args(argc, argv, ¶ms, "sde_loopback_simple"); @@ -119,7 +117,9 @@ int main(int argc, char **argv) { num_packets+=num_descriptors; } - print_timing(start_time, params.pkt_size, num_packets); + end_time = sde_get_curr_time(); + // Each packet was sent and received, so multiply pkt_size and num_packets by 2 for BW calculations. + print_timing(start_time, end_time, params.pkt_size * 2, num_packets * 2, SDE_EXAMPLE_DIR_LOOPBACK); ret = memcmp(wr_data_ptr, rd_data_ptr, params.pkt_size * num_descriptors); fail_on_with_code(ret, cleanup, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Error comparing data"); @@ -140,16 +140,3 @@ int main(int argc, char **argv) { return ret; } - -void print_timing(double start_time, int pkt_size, size_t num_packets) { - double curr_time = sde_get_curr_time(); - double total_run_time = (curr_time - start_time); - double loopback_mpps = (((double)num_packets)/1e6) / total_run_time; - double loopback_bw = (((double) num_packets * (double) pkt_size * 2)/1e9)/total_run_time ; - - printf ("Start Time = %.2f, Current Time = %.2f\n", start_time, curr_time); - printf ("Total Run time: %.2f secs\n", total_run_time); - printf ("Total Number of Packets: %ld\n", num_packets); - printf ("loopback_mpps: %.3f MPPS\n", loopback_mpps); - printf ("loopback BW: %.3f GB/s\n", loopback_bw); -} diff --git a/hdk/docs/List_AFI_on_Marketplace.md b/hdk/docs/List_AFI_on_Marketplace.md new file mode 100644 index 000000000..97a71e826 --- /dev/null +++ b/hdk/docs/List_AFI_on_Marketplace.md @@ -0,0 +1,39 @@ +# Listing Your AFI on AWS Marketplace + +The AWS Marketplace enables you to sell your FPGA accelerator solutions to other AWS customers. You can list your AFI (Amazon FPGA Image) bundled with an AMI (Amazon Machine Image) that contains all necessary software components. AWS handles the metering, billing, and payment processing, allowing you to focus on your solution. + +## Prerequisites + +Before listing your AFI on the AWS Marketplace: + +1. Register as an AWS Marketplace seller at https://aws.amazon.com/marketplace/management/ + +2. Prepare your solution: + - Create your AFI using the supported workflows in the [AWS FPGA Developer Kit](./../README.md#build-accelerator-afi-using-hdk-design-flow) + - Build an [AMI](https://docs.aws.amazon.com/marketplace/latest/userguide/ami-products.html) that includes all required software components: + * Device drivers + * Runtime engines + * Libraries + * Documentation + - Test your complete solution thoroughly + +3. Initiate AMI scanning via the [AMI Scanning page](https://docs.aws.amazon.com/marketplace/latest/userguide/product-and-ami-policies.html#security). + +## Important Considerations + +- AFIs are always sold bundled with an AMI under a single product code +- AFIs are instance-type specific (e.g., an AFI created for F1 instances cannot be used with F2 instances) + +## Submission Process + +1. Follow the [process](https://docs.aws.amazon.com/marketplace/latest/userguide/product-submission.html) for submitting your product for publication +2. Review and Verification by AWS Marketplace team + - Submission creates a case for the AWS Seller Operations team + - AWS Support Engineer processes the product request and publish it to limited state + - Upon completion of limited publishing, the AWS Support Engineer notifies the seller for verification and approval to go live + - Seller conducts testing and validation +3. Final Publication + - Seller provides formal AWS Marketplace approval + - Seller Operations team proceeds with public release + +For detailed information about AWS Marketplace policies and best practices, visit the [AWS Marketplace Seller Guide](https://docs.aws.amazon.com/marketplace/latest/userguide/what-is-marketplace.html). diff --git a/hdk/docs/RTL_Simulation_Guide_for_HDK_Design_Flow.md b/hdk/docs/RTL_Simulation_Guide_for_HDK_Design_Flow.md index f9152bcdf..38224e8f9 100644 --- a/hdk/docs/RTL_Simulation_Guide_for_HDK_Design_Flow.md +++ b/hdk/docs/RTL_Simulation_Guide_for_HDK_Design_Flow.md @@ -52,7 +52,7 @@ export CL_DIR=$PWD/hdk/cl/examples/cl_sde cd ${CL_DIR}/verif/scripts # Run the default test using the VCS SIM -export TEST_NAME=test_ddr +export TEST_NAME=test_simple_c2h make ${TEST_NAME} VCS=1 # To view the test log files (this is defined by SIM_DIR makefile variable) diff --git a/release_version.txt b/release_version.txt index de41c13ef..a3fb64d66 100644 --- a/release_version.txt +++ b/release_version.txt @@ -1 +1 @@ -RELEASE_VERSION=2.1.0 +RELEASE_VERSION=2.1.1 diff --git a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py index e81c31db0..f39a84897 100755 --- a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py +++ b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py @@ -114,9 +114,6 @@ def setup_dpdk(dpdk_path, fpga_slot_str, eni_dbdf, eni_ethdev): # Remove then load igb_uio.ko cmd_exec("rmmod ./build/kernel/linux/igb_uio/igb_uio.ko >/dev/null 2>&1", False) - # set to permissive (Alma is default enabled, Ubuntu is default disable) - if distro.id() == "almalinux" : - cmd_exec("setenforce 0") cmd_exec("insmod ./build/kernel/linux/igb_uio/igb_uio.ko") # Bind the FPGA to to DPDK diff --git a/vitis_setup.sh b/vitis_setup.sh index 1113ec1ae..f561a7761 100644 --- a/vitis_setup.sh +++ b/vitis_setup.sh @@ -58,11 +58,8 @@ function check_xilinx_vitis { info_msg " https://repost.aws/questions?view=all\&sort=recent\&tagIds=TAc7ofO5tbQRO57aX1lBYbjA" return 1 fi - RELEASE_VER=$(basename $XILINX_VITIS) - RELEASE_VER=${RELEASE_VER:0:6} - export RELEASE_VER="${RELEASE_VER}" - export VIVADO_TOOL_VER=$(echo "${RELEASE_VER}" | tr -d '[:space:]') - info_msg "RELEASE_VER = ${RELEASE_VER}" + export VITIS_TOOL_VER=$(vivado -version | grep -o "v[0-9]\+\.[0-9]" | sed 's/v//') + info_msg "VITIS_TOOL_VER = ${VITIS_TOOL_VER}" return 0 } @@ -73,11 +70,10 @@ valid_tool_versions["2024.2"]="true" declare -A valid_os valid_os["Ubuntu"]="true" -valid_os["AlmaLinux"]="true" function check_os_and_tool_ver { - if [[ "${valid_tool_versions[${VIVADO_TOOL_VER}]}" != "true" ]]; then - err_msg "Unsupported Vivado tool version detected: ${VIVADO_TOOL_VER}!!!" + if [[ "${valid_tool_versions[${VITIS_TOOL_VER}]}" != "true" ]]; then + err_msg "Unsupported Vivado tool version detected: ${VITIS_TOOL_VER}!!!" info_msg "Supported versions are: 2024.1 and 2024.2" return 1 fi @@ -85,7 +81,7 @@ function check_os_and_tool_ver { echo "Distro: ${distro}" if [[ "${valid_os[${distro}]}" != "true" ]]; then err_msg "Unsupported OS detected!!!" - info_msg "Supported OS are: Ubuntu 24.04, Ubuntu 20.04, and AlmaLinux 9.4" + info_msg "Supported OS are: Ubuntu 24.04 and Ubuntu 20.04" return 1 fi return 0 @@ -134,17 +130,10 @@ function set_up_vitis_examples { mkdir -p $VITIS_DIR/examples/ cd $VITIS_DIR/examples/ - if ! git clone "${vitis_exs_repo_url}" --recurse-submodules; then + if ! git clone "${vitis_exs_repo_url}" -b "${VITIS_TOOL_VER}" --recurse-submodules; then err_msg "Couldn't clone in ${vitis_exs_repo_name} repo!" return 1 fi - - cd $vitis_exs_repo_name - if ! git checkout "${VIVADO_TOOL_VER}"; then - err_msg "Couldn't check out ${VIVADO_TOOL_VER} branch of ${vitis_exs_repo_name}" - return 1 - fi - cd .. fi if [[ -d "${VITIS_DIR}/examples/vitis_examples" && ! -L "${VITIS_DIR}/examples/vitis_examples" ]]; then @@ -184,20 +173,16 @@ function sha256_check { function get_sha_file { missing_sha="${missing_xsa}.sha256" - if [ ! -e "${vitis_xpfm_dir}/${missing_sha}" ]; then - if ! sudo wget "${vitis_xsa_s3_url}/${missing_sha}" -O "${vitis_xpfm_dir}/${missing_sha}" -q; then + sha_path="${destination_dir}/${missing_sha}" + if [ ! -e "${sha_path}" ]; then + if ! sudo wget "${vitis_xsa_s3_url}/${missing_sha}" -O "${sha_path}" -q; then err_msg "Download of Vitis XSA SHA256 file ${missing_sha} failed!" return 1 fi fi - sha_path="" - if [[ "${missing_xsa}" != "${vitis_xpfm}" ]]; then - sha_path="${vitis_xpfm_dir}/${missing_xsa%.*}/${missing_xsa}" - else - sha_path="${vitis_xpfm_dir}/${missing_xsa}" - fi - if ! sha256_check "${vitis_xpfm_dir}/${missing_xsa}.sha256" "${sha_path}"; then + xsa_path="${destination_dir}/${missing_xsa}" + if ! sha256_check "${sha_path}" "${xsa_path}"; then sha_mismatches=1 fi return 0 @@ -205,25 +190,31 @@ function get_sha_file { function get_xsa_file { - if [ ! -e "${vitis_xpfm_dir}/${missing_xsa}" ]; then - if ! sudo wget "${vitis_xsa_s3_url}/${missing_xsa}" -O "${vitis_xpfm_dir}/${missing_xsa}" -q; then - err_msg "Download of Vitis XSA file ${missing_xsa} failed!" - return 1 - fi - if [[ "${missing_xsa}" != "${vitis_xpfm}" ]]; then - # Gets the stem of the XSA file - destination_dir=$(echo "${missing_xsa%.*}") - sudo mv "${vitis_xpfm_dir}/${missing_xsa}" "${vitis_xpfm_dir}/${destination_dir}" - fi - else - info_msg "Vitis XSA file $missing_xsa already downloaded!" + # Set up the directory where the XSA file will live + destination_dir="${vitis_xpfm_dir}" + category_dir="" + if [[ "${missing_xsa}" != "${vitis_xpfm}" ]]; then + # Gets the stem of the XSA file: hw, hw_emu, sw + category_dir=$(echo "${missing_xsa%.*}") + destination_dir="${destination_dir}/${category_dir}" + fi + + # Remove the old XSA + missing_xsa_file_extension="${missing_xsa##*.}" + sudo rm -f "${destination_dir}/*.${missing_xsa_file_extension}" + + # Grab the new XSA + if ! sudo wget "${vitis_xsa_s3_url}/${missing_xsa}" -O "${destination_dir}/${missing_xsa}" -q; then + err_msg "Download of Vitis XSA file ${missing_xsa} failed!" + return 1 fi + return 0 } function create_xsa_dirs { - vitis_xpfm_dir=/opt/Xilinx/Vitis/${VIVADO_TOOL_VER}/platforms + vitis_xpfm_dir=$XILINX_VITIS/platforms vitis_hw_dir=$vitis_xpfm_dir/hw vitis_hw_emu_dir=$vitis_xpfm_dir/hw_emu vitis_sw_dir=$vitis_xpfm_dir/sw @@ -235,11 +226,12 @@ function create_xsa_dirs { ) for dir in "${xsa_dirs[@]}"; do - if [ ! -d $dir ]; then - if ! sudo mkdir -p $dir; then - err_msg "Couldn't create directory $dir!" - return 1 - fi + if [ -d $dir ]; then + sudo rm -rf $dir + fi + if ! sudo mkdir -p $dir; then + err_msg "Couldn't create directory $dir!" + return 1 fi done return 0 @@ -258,16 +250,15 @@ xsa_map["2024.2"]="202420_1" function setup_xsa { info_msg "Installing supporting libraries" - export DEBIAN_FRONTEND=noninteractive - sudo DEBIAN_FRONTEND=noninteractive $XILINX_VITIS/scripts/installLibs.sh >>/dev/null + sudo $XILINX_VITIS/scripts/installLibs.sh >>/dev/null rm installLibs.sh_* # Get XSA for right tool version - xsa_for_tool_ver="${xsa_map[${VIVADO_TOOL_VER}]}" + xsa_for_tool_ver="${xsa_map[${VITIS_TOOL_VER}]}" XSA="xilinx_aws-vu47p-f2_${xsa_for_tool_ver}" export SHELL_EMU_VERSION="${XSA}" - XSA_S3_BASE_DIR="xsa_f2_${xsa_dir_map[${VIVADO_TOOL_VER}]}" + XSA_S3_BASE_DIR="xsa_f2_${xsa_dir_map[${VITIS_TOOL_VER}]}" if ! create_xsa_dirs; then return 1 @@ -329,12 +320,6 @@ xrt_install_map["Ubuntu_install_cmd"]="sudo dpkg -i" xrt_install_map["Ubuntu_xrt_pkg_prefix"]="amd64-xrt" xrt_install_map["Ubuntu_aws_pkg_prefix"]="amd64-aws" -xrt_install_map["AlmaLinux_pkg_ext"]="rpm" -xrt_install_map["AlmaLinux_install_cmd"]="sudo dnf install -y" -xrt_install_map["AlmaLinux_xrt_pkg_prefix"]="x86_64-xrt" -xrt_install_map["AlmaLinux_aws_pkg_prefix"]="x86_64-aws" - - function build_and_install_xrt { if ! sudo -E ./$xrt_deps_script_path; then err_msg "Couldn't install XRT dependencies!" @@ -384,7 +369,7 @@ function set_up_xrt_repo { if [[ ! -d $xrt_repo_name ]]; then info_msg "Cloning XRT repo into home directory: ${HOME}" info_msg "This directory may be moved to any destination, once the XRT install is complete!" - if ! git clone $xrt_repo_url --recurse-submodules; then + if ! git clone "${xrt_repo_url}" -b "${xrt_branch}" --recurse-submodules; then err_msg "Couldn't clone XRT repository!" cd $AWS_FPGA_REPO_DIR && return 1 fi @@ -393,16 +378,6 @@ function set_up_xrt_repo { fi cd $xrt_repo_name - if ! git fetch; then - err_msg "Couldn't fetch updated references for XRT repo!" - cd $AWS_FPGA_REPO_DIR && return 1 - fi - - if ! git checkout $xrt_branch; then - err_msg "Couldn't checkout branch: ${xrt_branch}!" - cd $AWS_FPGA_REPO_DIR && return 1 - fi - if ! git checkout $xrt_working_commit_hash; then err_msg "Couldn't checkout compatible commit: ${xrt_working_commit_hash}!" cd $AWS_FPGA_REPO_DIR && return 1 @@ -412,8 +387,8 @@ function set_up_xrt_repo { declare -A commit_hash_map -commit_hash_map["2024.1"]="7c27d759993a184bb7782fa9af0b3b7a92de5d32" -commit_hash_map["2024.2"]="a3f9984cdcf687d1cd960b3b6270a097516de3b5" +commit_hash_map["2024.1"]="a0729c69dba1ec05856d3008fbf9994a665f276c" +commit_hash_map["2024.2"]="d8cf77af92e92324b038d787773b78fb7a44f812" function set_up_xrt_vars { @@ -431,8 +406,8 @@ function set_up_xrt_vars { xrt_build_script_run="build/build.sh -noert" xrt_build_release_dir="build/Release" xrt_setup_script_path="${xrt_path}/setup.sh" - xrt_branch="${VIVADO_TOOL_VER}" - xrt_working_commit_hash="${commit_hash_map[${VIVADO_TOOL_VER}]}" + xrt_branch="${VITIS_TOOL_VER}" + xrt_working_commit_hash="${commit_hash_map[${VITIS_TOOL_VER}]}" return 0 } @@ -503,7 +478,6 @@ function pre_flight_checks { esac done - export NEEDRESTART_MODE=a export DEBIAN_FRONTEND="noninteractive" if ! source $script_dir/shared/bin/set_common_functions.sh; then