diff --git a/.gitignore b/.gitignore index c116c93b5..ebd40eddc 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,35 @@ nohup.out # Shell DCP files are stored in S3 and downloaded by hdk_setup.sh hdk/common/shell_*/build/checkpoints/from_aws/SH_CL_BB_routed.dcp* +# HDK generated files +hdk/cl/examples/*/build/create-afi/ +hdk/cl/examples/*/build/scripts/.srcs/ +hdk/cl/examples/*/software/runtime/sh_dpi_tasks +hdk/cl/examples/cl_hello_world/software/runtime/test_hello_world +hdk/cl/examples/cl_hello_world_vhdl/software/runtime/test_hello_world_vhdl +hdk/cl/examples/cl_hello_world_vhdl/software/runtime/test_hello_world_vhdl +hdk/cl/examples/cl_uram_example/software/runtime/test_uram_example +hdk/cl_uram_example/software/runtime/test_uram_example +hdk/common/shell_*/build/debug_probes/ +hdk/common/verif/models/sh_bfm/cl_ports_sh_bfm.vh + +# SDAccel generated files +SDAccel/**/*.d +SDAccel/**/*.o.cmd +SDAccel/aws_platform/xilinx_aws-*/hw/xilinx_aws-vu9p-*.dsa* +SDAccel/tools/*.d +SDAccel/tools/*.o +SDAccel/tools/xbsak + +# SDK generated files +sdk/linux_kernel_drivers/edma/.*.o.cmd +sdk/linux_kernel_drivers/edma/.*.ko.cmd +sdk/linux_kernel_drivers/edma/.tmp_versions +sdk/linux_kernel_drivers/xdma/.*.o.cmd +sdk/linux_kernel_drivers/xdma/.*.ko.cmd +sdk/linux_kernel_drivers/xdma/.tmp_versions +sdk/userspace/lib/ + # Generated checkpoints checkpoints @@ -56,5 +85,13 @@ vivado*.log # Pycharm projects .idea/ +# Eclipse projects +.project +.pydevproject +.settings + # Python compiled code -*.pyc \ No newline at end of file +*.pyc + +# pytest artifacts +.cache/ diff --git a/FAQs.md b/FAQs.md index 4cf8f1738..aec490754 100644 --- a/FAQs.md +++ b/FAQs.md @@ -139,7 +139,7 @@ Use [delete-fpga-image](./hdk/docs/delete_fpga_image.md) carefully. Once all AFI **Q: Can I bring my own bitstream for loading on an F1 FPGA?** -No. There is no mechanism for loading a bitstream directly onto the FPGAs of an F1 instance. All Custom Logic is loaded onto the FPGA by calling `$ fpga-local-load-image` tool at [AWS FPGA SDK](https://github.com/aws/aws-fpga/sdk). +No. There is no mechanism for loading a bitstream directly onto the FPGAs of an F1 instance. All Custom Logic is loaded onto the FPGA by calling `$ fpga-local-load-image` tool at [AWS FPGA SDK](./sdk). Developers create an AFI by creating a Vivado Design Checkpoint (DCP) and submitting that DCP to AWS using `aws ec2 create-fpga-image` API. AWS creates the AFI and bitstream from that DCP and returns a unique AFI ID referencing that AFI. @@ -211,7 +211,7 @@ Yes, examples are in the [examples directory](./hdk/cl/examples): The [cl_hello_world example](./hdk/cl/examples/cl_hello_world) is an RTL/Verilog simple example to build and test the Custom Logic development process, it does not use any of the external interfaces of the FPGA except the PCIe to "peek" and "poke" registers in the memory space of the CL inside the FPGA. -The [cl_dram_dma example](.hdk/cl/examples/cl_dram_dma) provides expanded features that demonstrates the use and connectivity for many of the Shell/CL interfaces and functionality. +The [cl_dram_dma example](./hdk/cl/examples/cl_dram_dma) provides expanded features that demonstrates the use and connectivity for many of the Shell/CL interfaces and functionality. **Q: How do I get access to AWS FPGA Developer AMI?** @@ -288,7 +288,7 @@ Both. The FPGA PCIe memory address space can be mmap() to both kernel and usersp **Q: How do I change what AFI is loaded in an FPGA?** -Changing the AFI loaded in an FPGA is done using the `fpga-clear-local-image` and `fpga-load-local-image` APIs from the [FPGA Image Management tools](./sdk/userspace/fpga_mgmt_tools). Note that to ensure your AFI is loaded to a consistent state, a loaded FPGA slot must be cleared with `fpga-clear-local-image` before loading another FPGA image. The `fpga-load-local-image` command takes the AFI ID and requests it to be programmed into the identified FPGA. The AWS infrastructure manages the actual FPGA image and programming of the FPGA using Partial Reconfiguration capabilities of the FPGA. The AFI image is not stored in the F1 instance nor AMI. The AFI image can’t be read or modified by the instance as there isn't a direct access to programming the FPGA from the instance. A user may call `fpga-load-local-image` at any time during the life of an instance, and may call `fpga-load-local-image` any number of times. +Changing the AFI loaded in an FPGA is done using the `fpga-clear-local-image` and `fpga-load-local-image` APIs from the [FPGA Image Management tools](./sdk/userspace/fpga_mgmt_tools). The `fpga-load-local-image` command takes the AFI ID and requests it to be programmed into the identified FPGA. The AWS infrastructure manages the actual FPGA image and programming of the FPGA using Partial Reconfiguration capabilities of the FPGA. The AFI image is not stored in the F1 instance nor AMI. The AFI image can’t be read or modified by the instance as there isn't a direct access to programming the FPGA from the instance. A user may call `fpga-load-local-image` at any time during the life of an instance, and may call `fpga-load-local-image` any number of times. @@ -446,6 +446,10 @@ You would need a valid [on premise license](./hdk/docs/on_premise_licensing_help > * The license included on FPGA Developer AMI Versions 1.3.0_a and earlier expires on October 31 2017. > * If you see the above error, please update to FPGA Developer AMI Version 1.3.3 or later. > * All FPGA Developer AMI Versions 1.3.0_a and earlier will be deprecated once Version 1.3.3 is released. +> * If you are using the FPGA Developer AMI Version 1.3.3 or later, please check if the environment variable `XILINXD_LICENSE_FILE` is set to `/opt/Xilinx/license/XilinxAWS.lic` +> * If you still face the above error, please contact us on the forums and we'd be happy to help further. -* If you are using the FPGA Developer AMI Version 1.3.3 or later, please check if the environment variable `XILINXD_LICENSE_FILE` is set to `/opt/Xilinx/license/XilinxAWS.lic` -* If you still face the above error, please contact us on the forums and we'd be happy to help further. +**Q: Why does Vivado in GUI mode show up blank ? or Why does Vivado in GUI mode show up as an empty window?** + +We have seen this issue when running RDP in 32 bit color mode where Vivado shows up as a blank window. +Please modify RDP options to choose any color depth less than 32 bit and try re-connecting. diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 000000000..ce0c571ce --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,492 @@ +#!/usr/bin/env groovy + +//============================================================================= +// Pipeline parameters +//============================================================================= +properties([parameters([ + string(name: 'branch', defaultValue: ''), + booleanParam(name: 'test_markdown_links', defaultValue: true), + booleanParam(name: 'test_hdk_scripts', defaultValue: true), + booleanParam(name: 'test_sims', defaultValue: true), + booleanParam(name: 'test_runtime_software', defaultValue: true), + booleanParam(name: 'test_fdf', defaultValue: true), + booleanParam(name: 'test_sdaccel_scripts', defaultValue: true), + booleanParam(name: 'test_sdaccel_builds', defaultValue: true), + booleanParam(name: 'debug_dcp_gen', defaultValue: false, description: 'Only run FDF on cl_hello_world. Overrides test_*.'), + booleanParam(name: 'debug_fdf_uram', defaultValue: false, description: 'Debug the FDF for cl_uram_example.') + +])]) + +//============================================================================= +// Configuration +//============================================================================= +boolean test_markdown_links = params.get('test_markdown_links') +boolean test_hdk_scripts = params.get('test_hdk_scripts') +boolean test_sims = params.get('test_sims') +boolean test_runtime_software = params.get('test_runtime_software') +boolean test_fdf = params.get('test_fdf') +boolean test_sdaccel_scripts = params.get('test_sdaccel_scripts') +boolean test_sdaccel_builds = params.get('test_sdaccel_builds') + +def runtime_sw_cl_names = ['cl_dram_dma', 'cl_hello_world'] +def fdf_cl_names = ['cl_dram_dma', 'cl_hello_world', 'cl_hello_world_vhdl', + 'cl_uram_example_uram_option_2', 'cl_uram_example_uram_option_3', 'cl_uram_example_uram_option_4'] + +boolean debug_dcp_gen = params.get('debug_dcp_gen') +if (debug_dcp_gen) { + fdf_cl_names = ['cl_hello_world'] + test_markdown_links = false + test_sims = false + test_runtime_software = false + test_sdaccel_scripts = false +} + +boolean debug_fdf_uram = params.get('debug_fdf_uram') +if (debug_fdf_uram) { + fdf_cl_names = ['cl_uram_example_option_2', 'cl_uram_example_option_3', 'cl_uram_example_option_4'] + test_markdown_links = false + test_sims = false + test_runtime_software = false + test_sdaccel_scripts = false +} +//============================================================================= +// Globals +//============================================================================= + +// Map that contains top level stages +def top_parallel_stages = [:] + +// Task to Label map +def task_label = [ + 'create-afi': 't2-l-50', + 'simulation': 'c4xl', + 'dcp_gen': 'c4-4xl', + 'runtime': 'f1-2xl', + 'runtime-all-slots': 'f1-16xl', + 'source_scripts': 'c4xl', + 'md_links': 'c4xl', + 'find_tests': 't2-l-50', + 'sdaccel_builds': 'c4-4xl' +] + +// Get serializable entry set +@NonCPS def entrySet(m) {m.collect {k, v -> [key: k, value: v]}} + + +//============================================================================= +// Shared Tests +//============================================================================= + +if (test_markdown_links) { + top_parallel_stages['Test Markdown Links'] = { + stage('Test Markdown Links') { + String report_file = 'test_md_links.xml' + node(task_label.get('md_links')) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_env.sh + pytest -v $WORKSPACE/shared/tests/test_md_links.py --junit-xml $WORKSPACE/${report_file} + """ + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } +} + +//============================================================================= +// HDK Tests +//============================================================================= + +if (test_hdk_scripts) { + top_parallel_stages['Test HDK Scripts'] = { + stage('Test HDK Scripts') { + String report_file = 'test_hdk_scripts.xml' + node(task_label.get('source_scripts')) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_env.sh + pytest -v $WORKSPACE/hdk/tests/test_hdk_scripts.py --junit-xml $WORKSPACE/${report_file} + """ + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } +} + +if (test_sims) { + top_parallel_stages['Run Sims'] = { + stage('Run Sims') { + def cl_names = ['cl_dram_dma', 'cl_hello_world'] + def sim_nodes = [:] + for (x in cl_names) { + String cl_name = x + String node_name = "Sims ${cl_name}" + String key = "test_${cl_name}__" + String report_file = "test_sims_${cl_name}.xml" + sim_nodes[node_name] = { + node(task_label.get('simulation')) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} + """ + } catch (exc) { + echo "${node_name} failed: archiving results" + archiveArtifacts artifacts: "hdk/cl/examples/${cl_name}/verif/sim/**", fingerprint: true + throw exc + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } + parallel sim_nodes + } + } +} + +if (test_runtime_software) { + top_parallel_stages['Test Runtime Software'] = { + stage('Test Runtime Software') { + def nodes = [:] + for (x in runtime_sw_cl_names) { + String cl_name = x + String node_name = "Test Runtime Software ${cl_name}" + String test = "hdk/tests/test_load_afi.py::TestLoadAfi::test_precompiled_${cl_name}" + String report_file = "test_runtime_software_${cl_name}.xml" + nodes[node_name] = { + node(task_label.get('runtime')) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_sdk_env.sh + pytest -v ${test} --junit-xml $WORKSPACE/${report_file} + """ + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } + parallel nodes + } + } +} + +if (test_fdf) { + // Top level stage for FDF + // Each CL will have its own parallel FDF stage under this one. + top_parallel_stages['FDF'] = { + stage('FDF') { + def fdf_stages = [:] + for (x in fdf_cl_names) { + String cl_name_with_options = x + String cl_name = cl_name_with_options + switch (cl_name_with_options) { + case "cl_uram_example_uram_option_2": + case "cl_uram_example_uram_option_3": + case "cl_uram_example_uram_option_4": + cl_name = "cl_uram_example" + break; + } + String fdf_stage_name = "FDF ${cl_name_with_options}" + fdf_stages[fdf_stage_name] = { + stage(fdf_stage_name) { + String build_dir = "hdk/cl/examples/${cl_name}/build" + String dcp_stash_name = "dcp_tarball_${cl_name_with_options}" + String dcp_stash_dir = "${build_dir}/checkpoints/to_aws" + String afi_stash_name = "afi_${cl_name_with_options}" + String afi_stash_dir = "${build_dir}/create-afi" + node(task_label.get('dcp_gen')) { + String test = "hdk/tests/test_gen_dcp.py::TestGenDcp::test_${cl_name_with_options}" + String report_file = "test_dcp_${cl_name_with_options}.xml" + checkout scm + // Clean out the to_aws directory to make sure there are no artifacts left over from a previous build + try { + sh """ + rm -rf ${dcp_stash_dir} + """ + } catch(exc) { + // Ignore any errors + echo "Failed to clean ${dcp_stash_dir}" + } + echo "Generate DCP for ${cl_name_with_options}" + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + pytest -v ${test} --junit-xml $WORKSPACE/${report_file} + """ + } catch (exc) { + echo "${cl_name_with_options} DCP generation failed: archiving results" + archiveArtifacts artifacts: "${build_dir}/**", fingerprint: true + throw exc + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + try { + stash name: dcp_stash_name, includes: "${dcp_stash_dir}/**" + } catch (exc) { + echo "stash ${dcp_stash_name} failed" + } + } + node(task_label.get('create-afi')) { + echo "Generate AFI for ${cl_name_with_options}" + checkout scm + String test = "hdk/tests/test_create_afi.py::TestCreateAfi::test_${cl_name_with_options}" + String report_file = "test_create_afi_${cl_name_with_options}.xml" + // Clean out the stash directories to make sure there are no artifacts left over from a previous build + try { + sh """ + rm -rf ${dcp_stash_dir} + """ + } catch(exc) { + // Ignore any errors + echo "Failed to clean ${dcp_stash_dir}" + } + try { + sh """ + rm -rf ${afi_stash_dir} + """ + } catch(exc) { + // Ignore any errors + echo "Failed to clean ${afi_stash_dir}" + } + try { + unstash name: dcp_stash_name + } catch (exc) { + echo "unstash ${dcp_stash_name} failed" + throw exc + } + try { + // There is a Xilinx bug that causes the following error during hdk_setup.sh if multiple + // processes are doing it at the same time: + // WARNING: [Common 17-1221] Tcl app 'xsim' is out of date for this release. Please run tclapp::reset_tclstore and reinstall the app. + // ERROR: [Common 17-685] Unable to load Tcl app xilinx::xsim + // ERROR: [Common 17-69] Command failed: ERROR: [Common 17-685] Unable to load Tcl app xilinx::xsim + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_env.sh + pytest -v ${test} --junit-xml $WORKSPACE/${report_file} + """ + } catch (exc) { + echo "${cl_name_with_options} AFI generation failed: archiving results" + archiveArtifacts artifacts: "${build_dir}/to_aws/**", fingerprint: true + throw exc + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + try { + stash name: afi_stash_name, includes: "${afi_stash_dir}/**" + } catch (exc) { + echo "stash ${afi_stash_name} failed" + throw exc + } + } + node(task_label.get('runtime')) { + String test = "hdk/tests/test_load_afi.py::TestLoadAfi::test_${cl_name_with_options}" + String report_file = "test_load_afi_${cl_name_with_options}.xml" + checkout scm + echo "Test AFI for ${cl_name_with_options} on F1 instance" + // Clean out the stash directories to make sure there are no artifacts left over from a previous build + try { + sh """ + rm -rf ${afi_stash_dir} + """ + } catch(exc) { + // Ignore any errors + echo "Failed to clean ${afi_stash_dir}" + } + try { + unstash name: afi_stash_name + } catch (exc) { + echo "unstash ${afi_stash_name} failed" + throw exc + } + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_sdk_env.sh + pytest -v ${test} --junit-xml $WORKSPACE/${report_file} + """ + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } + } + parallel fdf_stages + } + } +} + +//============================================================================= +// SDAccel Tests +//============================================================================= + +if (test_sdaccel_scripts) { + top_parallel_stages['Test SDAccel Scripts'] = { + stage('Test SDAccel Scripts') { + String report_file = 'test_sdaccel_scripts.xml' + node(task_label.get('source_scripts')) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_env.sh + pytest -v $WORKSPACE/SDAccel/tests/test_sdaccel_scripts.py --junit-xml $WORKSPACE/${report_file} + """ + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + } + } + } +} + +if (test_sdaccel_builds) { + top_parallel_stages['Run SDAccel Tests'] = { + def sdaccel_build_stages = [:] + String sdaccel_examples_list = 'sdaccel_examples_list.json' + + stage ('Find SDACCel tests') { + + String report_file = 'test_find_sdaccel_examples.xml' + + node(task_label.get('find_tests')) { + + checkout scm + + try { + sh """ + rm -rf ${sdaccel_examples_list} + """ + } catch(error) { + // Ignore any errors + echo "Failed to clean ${sdaccel_examples_list}" + } + + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_env.sh + pytest -v $WORKSPACE/SDAccel/tests/test_find_sdaccel_examples.py --junit-xml $WORKSPACE/${report_file} + """ + } catch (exc) { + echo "Could not find tests. Please check the repository." + throw exc + } finally { + junit healthScaleFactor: 10.0, testResults: report_file + } + + //def list_map = readJSON file: sdaccel_examples_list + // Just run the hello world example for now + def list_map = [ 'Hello_World': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl' ] + + for ( def e in entrySet(list_map) ) { + + String build_name = e.key + String example_path = e.value + String sw_emu_stage_name = "SDAccel SW_EMU ${build_name}" + String hw_emu_stage_name = "SDAccel HW_EMU ${build_name}" + String hw_stage_name = "SDAccel HW ${build_name}" + String create_afi_stage_name = "SDAccel AFI ${build_name}" + + String sw_emu_report_file = "sdaccel_sw_emu_${build_name}.xml" + String hw_emu_report_file = "sdaccel_hw_emu_${build_name}.xml" + String hw_report_file = "sdaccel_hw_${build_name}.xml" + + String xclbin_stash_name = "xclbin_${build_name}_stash" + + + sdaccel_build_stages[build_name] = { + + stage(sw_emu_stage_name) { + timeout (time: 1, unit: 'HOURS') { + node(task_label.get('sdaccel_builds')) { + + checkout scm + + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh + pytest -v $WORKSPACE/SDAccel/tests/test_run_sdaccel_examples.py::TestRunSDAccelExamples::test_sw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${sw_emu_report_file} + + """ + } catch (error) { + echo "${stage_name} SW EMU Build generation failed" + throw error + } finally { + junit healthScaleFactor: 0.0, testResults: sw_emu_report_file + } + } + } + } + + stage(hw_emu_stage_name) { + timeout (time: 1, unit: 'HOURS') { + node(task_label.get('sdaccel_builds')) { + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh + pytest -v $WORKSPACE/SDAccel/tests/test_run_sdaccel_examples.py::TestRunSDAccelExamples::test_hw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_emu_report_file} + """ + } catch (error) { + echo "${stage_name} HW EMU Build generation failed" + } finally { + junit healthScaleFactor: 0.0, testResults: hw_emu_report_file + } + } + } + } + + stage(hw_stage_name) { + timeout (time: 7, unit: 'HOURS') { + node(task_label.get('sdaccel_builds')) { + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh + pytest -v $WORKSPACE/SDAccel/tests/test_run_sdaccel_examples.py::TestRunSDAccelExamples::test_hw_build --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_report_file} + """ + } catch (error) { + echo "${stage_name} HW Build generation failed" + throw error + } finally { + junit healthScaleFactor: 0.0, testResults: hw_report_file + } + } + } + } + + } // sdaccel_build_stages[ e.key ] + } // for ( e in list_map ) + + parallel sdaccel_build_stages + } + } + } +} + +//============================================================================= +// SDK Tests +//============================================================================= + +parallel top_parallel_stages diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4b7e35cb1..a28b86b74 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -26,6 +26,21 @@ * 1 DDR controller implemented in the SH (always available) * 3 DDR controllers implemented in the CL (configurable number of implemented controllers allowed) +## Release 1.3.4 (See [ERRATA](./ERRATA.md) for unsupported features) + * EDMA/XDMA Driver improvements + * Additional SDAccel Platforms + * 1DDR for faster build times and smaller expanded shell + * RTL Kernel Debug adds support for virtual jtag debug on RTL kernels + * IP Integrator GUI (HLx) improvments + * CL\_DRAM\_DMA fixes and improvements + * Dual master support + * Simulation enviroment fixes and improvements + * AXI/AXIL Protocol checkers + * Shell model improvements + * SW co-simulation support on cl\_hello\_world + * DDR Model patch + * Updated SH\_DDR module in preperation for upcoming feature release + ## Release 1.3.3 (See [ERRATA](./ERRATA.md) for unsupported features) * New FPGA Image APIs for deleteing and reading/editing attributes @@ -179,10 +194,10 @@ The following major features are included in this HDK release: #### 1. New Shell, with modified Shell/CL interface. Changes are covered in: -* [New Shell Stable: 0x04151701](./hdk/common/shell_v04151701) +* New Shell Stable: 0x04151701: ./hdk/common/shell_v04151701 * cl_ports.vh have the updated port list * [AWS_Shell_Interface_Specification.md](./hdk/docs/AWS_Shell_Interface_Specification.md) has been updated -* Updated the xdc timing constrains under [constraints](./hdk/common/shell_v032117d7/build/constraints) to match the new interfaces +* Updated the xdc timing constrains under [constraints](./hdk/common/shell_v071417d3/build/constraints) to match the new interfaces * Updated [CL HELLO WORLD](./hdk/cl/examples/cl_hello_world) example to use the new cl_ports.vh * DCP for the latest shell v04151701. AWS Shell DCP is stored in S3 and fetched/verified when `hdk_setup.sh` script is sourced. @@ -362,7 +377,7 @@ Additional tunable auxiliary clocks are generated by the Shell and fed to the CL #### 24. Embed the HDK version and Shell Version as part of git tree * [hdk_version.txt](./hdk/hdk_version.txt) -* [shell_version.txt](./hdk/common/shell_stable) +* [shell_version.txt](./hdk/common/shell_v071417d3) #### 25. Initial Release of SDAccel and OpenCL Support (NA) diff --git a/SDAccel/FAQ.md b/SDAccel/FAQ.md index d3b69f61b..fd08504f2 100644 --- a/SDAccel/FAQ.md +++ b/SDAccel/FAQ.md @@ -55,8 +55,7 @@ A: A: SDAccel flow does not allow clocks running less that 60 MHz kernel clock, therefore, you will need to debug further using [HLS Debug suggestions](./docs/SDAccel_HLS_Debug.md) ## Q: Using the .xcp file generated from xocc results in an error? -A: Directly using the .xcp file without conversion to .xclbin file will result in an error - Error: ... invalid binary. See [Instructions on how to create AFI and subsequent execution process](../README.md#create-an-amazon-fpga-image-afi-for-your-kernel) - +A: Directly using the .xcp file without conversion to .xclbin file will result in an error - Error: ... invalid binary. See [Instructions on how to create AFI and subsequent execution process](./README.md#createafi) # Additional Resources diff --git a/SDAccel/Makefile b/SDAccel/Makefile index d3658be57..579750321 100644 --- a/SDAccel/Makefile +++ b/SDAccel/Makefile @@ -34,8 +34,16 @@ else $(info XILINX_SDX = $(XILINX_SDX)) endif -INSTALL_ROOT = /opt/Xilinx/SDx/2017.1 +INSTALL_ROOT = /opt/Xilinx/SDx/2017.1.rte DSA = xilinx_aws-vu9p-f1_4ddr-xpr-2pr_4_0 +OPENCLLIB := +OPENCLFILE = $(XILINX_SDX)/runtime/platforms/$(DSA)/sw/lib/x86_64/libxilinxopencl.so + +ifeq ($(wildcard $(OPENCLFILE)),) + OPENCLLIB := ${XILINX_SDX}/lib/lnx64.o/libxilinxopencl.so +else + OPENCLLIB := $(OPENCLFILE) +endif all: make -C ${SDACCEL_DIR}/userspace/src debug=$(debug) ec2=$(ec2) @@ -56,7 +64,8 @@ install: install -m 755 ${SDACCEL_DIR}/tools/awssak/xbsak $(INSTALL_ROOT)/runtime/bin install -m 755 ${XILINX_SDX}/runtime/bin/xclbincat $(INSTALL_ROOT)/runtime/bin install -m 755 ${XILINX_SDX}/runtime/bin/xclbinsplit $(INSTALL_ROOT)/runtime/bin - install -m 755 ${XILINX_SDX}/lib/lnx64.o/libxilinxopencl.so $(INSTALL_ROOT)/runtime/lib/x86_64 + #install -m 755 ${XILINX_SDX}/lib/lnx64.o/libxilinxopencl.so $(INSTALL_ROOT)/runtime/lib/x86_64 + install -m 755 ${OPENCLLIB} $(INSTALL_ROOT)/runtime/lib/x86_64 install -m 755 ${XILINX_SDX}/lib/lnx64.o/libstdc++.so* $(INSTALL_ROOT)/runtime/lib/x86_64 @echo "Generating SDAccel F1 runtime environment setup script, $(INSTALL_ROOT)/setup.sh for bash" @echo "export XILINX_OPENCL=$(INSTALL_ROOT)" > $(INSTALL_ROOT)/setup.sh diff --git a/SDAccel/README.md b/SDAccel/README.md index e8128c3d0..2d108f87f 100644 --- a/SDAccel/README.md +++ b/SDAccel/README.md @@ -44,7 +44,7 @@ It is highly recommended you read the documentation and utilize software and har * Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with SDAccel and required licenses. * You may use this F1 instance to [build your host application and Xilinx FPGA binary](#createapp), however, it may be more cost efficient to either: * Launch the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on a lower cost EC2 instance, with a minimum of 30GiB RAM), **OR** - * Follow the [On-Premises Instructions](../../hdk/docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. + * Follow the [On-Premises Instructions](../hdk/docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. * Setup AWS IAM permissions for creating FPGA Images (CreateFpgaImage and DescribeFpgaImages). [EC2 API Permissions are described in more detail](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html). It is highly recommended that you validate your AWS IAM permissions prior to proceeding with this quick start. By calling the [DescribeFpgaImages API](../hdk/docs/describe_fpga_images.md) you can check that your IAM permissions are correct. * [Setup AWS CLI and S3 Bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. * Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. @@ -195,6 +195,12 @@ Here are the steps: * Copy the \*.awsxclbin AWS FPGA binary file to the new instance * Copy any data files required for execution to the new instance * [Clone the github repository to the new F1 instance and install runtime drivers](#gitsetenv) + * Clone the github repository to the new F1 instance and install runtime drivers +``` + $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR + $ cd $AWS_FPGA_REPO_DIR + $ source sdaccel_setup.sh +``` * Ensure the host application can find and load the \*.awsxclbin AWS FPGA binary file. %s Done\n", __func__); @@ -360,7 +360,7 @@ static int edma_dev_release(struct inode *inode, struct file *file) BUG_ON(!file->private_data); - spin_lock(&device_private_data->edma_spin_lock); + mutex_lock(&device_private_data->edma_mutex); BUG_ON(device_private_data->stats.opened_times < 1); @@ -373,13 +373,20 @@ static int edma_dev_release(struct inode *inode, struct file *file) /* Get both read and write lock and update status to releasing. * This makes sure that outstanding read/write transactions will finish and * new will not start. Only open() will unset the RELEASING state. */ - spin_lock(&device_private_data->write_ebcs.ebcs_spin_lock); - spin_lock(&device_private_data->read_ebcs.ebcs_spin_lock); + mutex_lock(&device_private_data->write_ebcs.ebcs_mutex); + mutex_lock(&device_private_data->read_ebcs.ebcs_mutex); - // !!! Super important that for every spinlock of ebcs (read and write) check for releasing status !!! + // !!! Super important that for every lock of ebcs (read and write) check for releasing status !!! set_bit(EDMA_STATE_QUEUE_RELEASING_BIT, &device_private_data->state); + // Now that we signaled to the other threads that we want to release + // we can unlock the spin locks + // the code in the read/write/fsync function should always check the + // EDMA_STATE_QUEUE_RELEASING_BIT often to stop quickly + mutex_unlock(&device_private_data->read_ebcs.ebcs_mutex); + mutex_unlock(&device_private_data->write_ebcs.ebcs_mutex); + if(test_bit(EDMA_STATE_READ_IN_PROGRESS_BIT, &device_private_data->state) || test_bit(EDMA_STATE_WRITE_IN_PROGRESS_BIT, @@ -396,8 +403,6 @@ static int edma_dev_release(struct inode *inode, struct file *file) || test_bit(EDMA_STATE_FSYNC_IN_PROGRESS_BIT, &device_private_data->state)); - spin_unlock(&device_private_data->read_ebcs.ebcs_spin_lock); - spin_unlock(&device_private_data->write_ebcs.ebcs_spin_lock); // First, set the DEV_RELEASING flag so all other tasks are notified // disable hardware interrupts (note - we could still have interrupts inflight or interrupt routine in execution @@ -414,7 +419,7 @@ static int edma_dev_release(struct inode *inode, struct file *file) file->private_data = NULL; } - spin_unlock(&device_private_data->edma_spin_lock); + mutex_unlock(&device_private_data->edma_mutex); edma_dbg("\n-->%s Done\n", __func__); @@ -441,7 +446,7 @@ static ssize_t edma_dev_read(struct file *filp, char *buffer, size_t len, edma_dbg("\n-->%s Reading %zu bytes from %s in offset 0x%llx\n", __func__, len, filp->f_path.dentry->d_name.name, *off); read_ebcs = &private_data->read_ebcs; - spin_lock(&read_ebcs->ebcs_spin_lock); + mutex_lock(&read_ebcs->ebcs_mutex); u64_stats_update_begin(&private_data->stats.syncp); private_data->stats.read_requests++; @@ -616,7 +621,7 @@ static ssize_t edma_dev_read(struct file *filp, char *buffer, size_t len, edma_dev_read_done: clear_bit(EDMA_STATE_READ_IN_PROGRESS_BIT,&private_data->state); - spin_unlock(&read_ebcs->ebcs_spin_lock); + mutex_unlock(&read_ebcs->ebcs_mutex); edma_dbg("\n-->%s Done\n", __func__); @@ -641,7 +646,7 @@ static ssize_t edma_dev_write(struct file *filp, const char *buff, size_t len, edma_dbg("\n--> %s Writing %zu bytes to %s in offset 0x%llx\n", __func__, len, filp->f_path.dentry->d_name.name, *off); write_ebcs = &private_data->write_ebcs; - spin_lock(&write_ebcs->ebcs_spin_lock); + mutex_lock(&write_ebcs->ebcs_mutex); if(is_releasing(&((struct edma_queue_private_data*)filp->private_data)->state)) goto edma_dev_write_done; @@ -651,7 +656,7 @@ static ssize_t edma_dev_write(struct file *filp, const char *buff, size_t len, set_bit(EDMA_STATE_WRITE_IN_PROGRESS_BIT, &private_data->state); - ret = wait_is_fsync_running(private_data, &write_ebcs->ebcs_spin_lock); + ret = wait_is_fsync_running(private_data, &write_ebcs->ebcs_mutex); if(unlikely(ret)) goto edma_dev_write_done; @@ -672,9 +677,9 @@ static ssize_t edma_dev_write(struct file *filp, const char *buff, size_t len, u64_stats_update_end(&private_data->stats.syncp); //wait for write to be processed - spin_unlock(&write_ebcs->ebcs_spin_lock); + mutex_unlock(&write_ebcs->ebcs_mutex); usleep_range(SLEEP_MINIMUM_USEC, SLEEP_MAXIMUM_USEC); - spin_lock(&write_ebcs->ebcs_spin_lock); + mutex_lock(&write_ebcs->ebcs_mutex); //if releasing no need to wait if(unlikely(is_releasing(&private_data->state))) @@ -718,16 +723,16 @@ static ssize_t edma_dev_write(struct file *filp, const char *buff, size_t len, private_data->stats.dma_submit_error++; u64_stats_update_end(&private_data->stats.syncp); - spin_unlock(&write_ebcs->ebcs_spin_lock); + mutex_unlock(&write_ebcs->ebcs_mutex); usleep_range(SLEEP_MINIMUM_USEC, SLEEP_MAXIMUM_USEC); - spin_lock(&write_ebcs->ebcs_spin_lock); + mutex_lock(&write_ebcs->ebcs_mutex); //if releasing no need to wait if(unlikely(is_releasing(&private_data->state))) goto edma_dev_write_done; //if now fsync is running we should wait... - ret = wait_is_fsync_running(private_data, &write_ebcs->ebcs_spin_lock); + ret = wait_is_fsync_running(private_data, &write_ebcs->ebcs_mutex); if(unlikely(ret)) goto edma_dev_write_done; if(edma_backend_submit_m2s_request((u64*)request->phys_data, copy_to_rquest_size, write_ebcs->dma_queue_handle, *off)) { @@ -762,7 +767,7 @@ static ssize_t edma_dev_write(struct file *filp, const char *buff, size_t len, clear_bit(EDMA_STATE_WRITE_IN_PROGRESS_BIT, &private_data->state); - spin_unlock(&write_ebcs->ebcs_spin_lock); + mutex_unlock(&write_ebcs->ebcs_mutex); edma_dbg("\n--> %s done. RetVal is %zd\n", __func__, (data_copied == 0 ? ret : data_copied)); @@ -806,6 +811,7 @@ static int edma_dev_fsync(struct file *filp, loff_t start, loff_t end, int datas bool ebcs_is_clean = false; struct edma_buffer_control_structure* write_ebcs; struct edma_queue_private_data* private_data = (struct edma_queue_private_data*)filp->private_data; + u32 sched_limit = 1; (void)start; (void)end; @@ -820,7 +826,7 @@ static int edma_dev_fsync(struct file *filp, loff_t start, loff_t end, int datas edma_dbg("\n--> %s Fsyncing %s \n", __func__, filp->f_path.dentry->d_name.name); write_ebcs = &private_data->write_ebcs; - spin_lock(&write_ebcs->ebcs_spin_lock); + mutex_lock(&write_ebcs->ebcs_mutex); if(unlikely(is_releasing(&private_data->state))) goto edma_dev_fsync_done; @@ -835,9 +841,14 @@ static int edma_dev_fsync(struct file *filp, loff_t start, loff_t end, int datas == write_ebcs->next_to_use) ebcs_is_clean = true; else { - spin_unlock(&write_ebcs->ebcs_spin_lock); - usleep_range(SLEEP_MINIMUM_USEC, SLEEP_MAXIMUM_USEC); - spin_lock(&write_ebcs->ebcs_spin_lock); + mutex_unlock(&write_ebcs->ebcs_mutex); + + if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) { + schedule(); + } + sched_limit++; + + mutex_lock(&write_ebcs->ebcs_mutex); if(unlikely(is_releasing(&private_data->state))) goto edma_dev_fsync_done; @@ -847,7 +858,7 @@ static int edma_dev_fsync(struct file *filp, loff_t start, loff_t end, int datas edma_dev_fsync_done: clear_bit(EDMA_STATE_FSYNC_IN_PROGRESS_BIT, &private_data->state); - spin_unlock(&write_ebcs->ebcs_spin_lock); + mutex_unlock(&write_ebcs->ebcs_mutex); edma_dbg("\n--> %s done.\n", __func__); @@ -984,16 +995,18 @@ static ssize_t print_queue_stats(struct device* dev, struct device_attribute* at device_private_data = (struct edma_queue_private_data *)dev_get_drvdata(dev); if(!device_private_data) - char_count = sprintf(buf, "No Statistics available. The device is not in use."); + char_count = scnprintf(buf, PAGE_SIZE, + "No Statistics available. The device is not in use."); else - char_count = sprintf(buf, + char_count = scnprintf(buf, PAGE_SIZE, "read_requests_submitted - %llu\n" "read_requests_completed - %llu\n" "write_requests_submitted - %llu\n" "write_requests_completed - %llu\n" "fsync_count - %llu\n" "no_space_left_error - %llu\n" + "dma_submit_error - %llu\n" "fsync_busy_count - %llu\n" "read_timeouts_error - %llu\n" "opened_times - %llu\n", @@ -1003,6 +1016,7 @@ static ssize_t print_queue_stats(struct device* dev, struct device_attribute* at device_private_data[MINOR(dev->devt)].stats.write_completed_bytes, device_private_data[MINOR(dev->devt)].stats.fsync_count, device_private_data[MINOR(dev->devt)].stats.no_space_left_error, + device_private_data[MINOR(dev->devt)].stats.dma_submit_error, device_private_data[MINOR(dev->devt)].stats.fsync_busy_count, device_private_data[MINOR(dev->devt)].stats.read_timeouts_error, device_private_data[MINOR(dev->devt)].stats.opened_times); @@ -1059,6 +1073,8 @@ static struct device* edma_add_queue_device(struct class* edma_class, void* rx_h edma_queues->device_private_data[minor_index].write_ebcs.dma_queue_handle = tx_handle; edma_queues->device_private_data[minor_index].read_ebcs.dma_queue_handle = rx_handle; + mutex_init(&edma_queues->device_private_data[minor_index].edma_mutex); + edma_queue_device_done: return edmaCharDevice; } diff --git a/sdk/linux_kernel_drivers/edma/edma_dev.h b/sdk/linux_kernel_drivers/edma/edma_dev.h index 3dbe04183..8b60bcee2 100644 --- a/sdk/linux_kernel_drivers/edma/edma_dev.h +++ b/sdk/linux_kernel_drivers/edma/edma_dev.h @@ -90,7 +90,7 @@ struct edma_buffer_control_structure{ struct transient_buffer transient_buffer; struct request* request; - spinlock_t ebcs_spin_lock; + struct mutex ebcs_mutex; u32 next_to_use; u32 next_to_clean; u32 ebcs_depth; @@ -105,7 +105,7 @@ struct edma_queue_private_data struct edma_buffer_control_structure read_ebcs; struct edma_buffer_control_structure write_ebcs; struct edma_queue_stats stats; - spinlock_t edma_spin_lock; + struct mutex edma_mutex; unsigned long state; struct edma_device *dma_device; } ____cacheline_aligned; diff --git a/sdk/linux_kernel_drivers/edma/edma_install.md b/sdk/linux_kernel_drivers/edma/edma_install.md index 6582b7d65..3aa60bbe3 100644 --- a/sdk/linux_kernel_drivers/edma/edma_install.md +++ b/sdk/linux_kernel_drivers/edma/edma_install.md @@ -1,13 +1,13 @@ # Elastic DMA (EDMA) Installation and Frequently Asked Questions -EDMA is a Linux kernel driver provided by AWS for using DMA and/or User-defined interrupts for AWS FPGAs. Please see [EDMA README](./README.md) for details. +EDMA is a Linux kernel driver provided by AWS for using DMA and/or User-defined interrupts for AWS FPGAs. Please see [EDMA README](README.md) for details. # Table of Contents 1. [Q: How do I know if the EDMA driver is available and installed?](#howIKnow) 2. [Q: How do I get the source code of the `edma` driver and compile it?](#howToCompile) -3. [Q: How can I make sure the installed driver will be preserved following a kernel update?](#howToUpgradeKernel) +3. [Q: How can I make sure the installed driver will be preserved following a kernel update?](#howToUpdateKernel) 4. [Q: What PCIe Vendor-ID and Device-ID does EDMA driver support](#howToDIDnVID) @@ -30,7 +30,7 @@ The developer can operate these DMA queues and interrupts directly from Linux us **Q: How do I get the source code of the `edma` driver and compile it?** -Amazon `edma` driver is included in [AWS FPGA SDK](https://github.com/aws/aws-fpga/master/blob/sdk/kernel_drivers/edma) for integration with other Linux distributions, please follow the next set of steps: +Amazon `edma` driver is included in [AWS FPGA SDK](.) for integration with other Linux distributions, please follow the next set of steps: __**Step 1**__: Make sure you have `gcc` and `linux kernel source code` installed in your machine: diff --git a/sdk/linux_kernel_drivers/edma/libxdma.c b/sdk/linux_kernel_drivers/edma/libxdma.c index c48dd9331..3e82f56f7 100644 --- a/sdk/linux_kernel_drivers/edma/libxdma.c +++ b/sdk/linux_kernel_drivers/edma/libxdma.c @@ -1,10 +1,26 @@ -/* - * Driver for XDMA for Xilinx XDMA IP core +/******************************************************************************* + * + * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * Copyright (C) 2007-2017 Xilinx, Inc. + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - */ + * + ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -13,31 +29,50 @@ #include #include #include -//#include #include +#include #include "libxdma.h" #include "libxdma_api.h" /* SECTION: Module licensing */ -#define DRV_MODULE_NAME "edma" #ifdef __LIBXDMA_MOD__ +#include "version.h" +#define DRV_MODULE_NAME "libxdma" #define DRV_MODULE_DESC "Xilinx XDMA Base Driver" -#define DRV_MODULE_VERSION "1.0.29" #define DRV_MODULE_RELDATE "Feb. 2017" static char version[] = - DRV_MODULE_DESC " " DRV_MODULE_NAME - " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_DESC " " DRV_MODULE_NAME " v" DRV_MODULE_VERSION "\n"; MODULE_AUTHOR("Xilinx, Inc."); MODULE_DESCRIPTION(DRV_MODULE_DESC); MODULE_VERSION(DRV_MODULE_VERSION); MODULE_LICENSE("GPL v2"); +#endif +/* Module Parameters */ +static unsigned int poll_mode; +module_param(poll_mode, uint, 0644); +MODULE_PARM_DESC(poll_mode, "Set 1 for hw polling, default is 0 (interrupts)"); + +static unsigned int interrupt_mode; +#ifdef INTERNAL_TESTING +module_param(interrupt_mode, uint, 0644); +MODULE_PARM_DESC(interrupt_mode, "0 - MSI-x , 1 - MSI, 2 - Legacy"); +#endif + +static unsigned int enable_credit_mp; +#ifdef INTERNAL_TESTING +module_param(enable_credit_mp, uint, 0644); +MODULE_PARM_DESC(enable_credit_mp, "Set 1 to enable creidt feature, default is 0 (no credit control)"); #endif +unsigned int desc_blen_max = XDMA_DESC_BLEN_MAX; +module_param(desc_blen_max, uint, 0644); +MODULE_PARM_DESC(desc_blen_max, "per descriptor max. buffer length, default is (1 << 28) - 1"); + /* * xdma device management * maintains a list of the xdma devices @@ -48,6 +83,11 @@ static DEFINE_MUTEX(xdev_mutex); static LIST_HEAD(xdev_rcu_list); static DEFINE_SPINLOCK(xdev_rcu_lock); +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + static inline void xdev_list_add(struct xdma_dev *xdev) { mutex_lock(&xdev_mutex); @@ -62,13 +102,16 @@ static inline void xdev_list_add(struct xdma_dev *xdev) list_add_tail(&xdev->list_head, &xdev_list); mutex_unlock(&xdev_mutex); - dbg_init("xdev 0x%p, idx %d.\n", xdev, xdev->idx); + dbg_init("dev %s, xdev 0x%p, xdma idx %d.\n", + dev_name(&xdev->pdev->dev), xdev, xdev->idx); spin_lock(&xdev_rcu_lock); list_add_tail_rcu(&xdev->rcu_node, &xdev_rcu_list); spin_unlock(&xdev_rcu_lock); } +#undef list_last_entry + static inline void xdev_list_remove(struct xdma_dev *xdev) { mutex_lock(&xdev_mutex); @@ -87,7 +130,7 @@ struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev) mutex_lock(&xdev_mutex); list_for_each_entry_safe(xdev, tmp, &xdev_list, list_head) { - if (xdev->pci_dev == pdev) { + if (xdev->pdev == pdev) { mutex_unlock(&xdev_mutex); return xdev; } @@ -97,14 +140,40 @@ struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(xdev_find_by_pdev); +static inline int debug_check_dev_hndl(const char *fname, struct pci_dev *pdev, + void *hndl) +{ + struct xdma_dev *xdev; + + if (!pdev) + return -EINVAL; + + xdev = xdev_find_by_pdev(pdev); + if (!xdev) { + pr_info("%s pdev 0x%p, hndl 0x%p, NO match found!\n", + fname, pdev, hndl); + return -EINVAL; + } + if (xdev != hndl) { + pr_err("%s pdev 0x%p, hndl 0x%p != 0x%p!\n", + fname, pdev, hndl, xdev); + return -EINVAL; + } -static void engine_msix_teardown(struct xdma_engine *engine); + return 0; +} +#ifdef __LIBXDMA_DEBUG__ /* SECTION: Function definitions */ -inline void write_register(u32 value, void *iomem) +inline void __write_register(const char *fn, u32 value, void *iomem, unsigned long off) { + pr_err("%s: w reg 0x%lx(0x%p), 0x%x.\n", fn, off, iomem, value); iowrite32(value, iomem); } +#define write_register(v,mem,off) __write_register(__func__, v, mem, off) +#else +#define write_register(v,mem,off) iowrite32(v, mem) +#endif inline u32 read_register(void *iomem) { @@ -121,58 +190,82 @@ static inline u64 build_u64(u64 hi, u64 lo) return ((hi & 0xFFFFFFFULL) << 32) | (lo & 0xFFFFFFFFULL); } -static u64 find_feature_id(const struct xdma_dev *lro) +static void check_nonzero_interrupt_status(struct xdma_dev *xdev) { - u64 low = 0; - u64 high = 0; -#define FEATURE_ID 0x031000 + struct interrupt_regs *reg = (struct interrupt_regs *) + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); + u32 w; - low = ioread32(lro->bar[lro->user_bar_idx] + FEATURE_ID); - high = ioread32(lro->bar[lro->user_bar_idx] + FEATURE_ID + 8); - return low | (high << 32); + w = read_register(®->user_int_enable); + if (w) + pr_info("%s xdma%d user_int_enable = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); + + w = read_register(®->channel_int_enable); + if (w) + pr_info("%s xdma%d channel_int_enable = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); + + w = read_register(®->user_int_request); + if (w) + pr_info("%s xdma%d user_int_request = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); + w = read_register(®->channel_int_request); + if (w) + pr_info("%s xdma%d channel_int_request = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); + + w = read_register(®->user_int_pending); + if (w) + pr_info("%s xdma%d user_int_pending = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); + w = read_register(®->channel_int_pending); + if (w) + pr_info("%s xdma%d channel_int_pending = 0x%08x\n", + dev_name(&xdev->pdev->dev), xdev->idx, w); } /* channel_interrupts_enable -- Enable interrupts we are interested in */ -static void channel_interrupts_enable(struct xdma_dev *lro, u32 mask) +static void channel_interrupts_enable(struct xdma_dev *xdev, u32 mask) { struct interrupt_regs *reg = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - write_register(mask, ®->channel_int_enable_w1s); + write_register(mask, ®->channel_int_enable_w1s, XDMA_OFS_INT_CTRL); } /* channel_interrupts_disable -- Disable interrupts we not interested in */ -static void channel_interrupts_disable(struct xdma_dev *lro, u32 mask) +static void channel_interrupts_disable(struct xdma_dev *xdev, u32 mask) { struct interrupt_regs *reg = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - write_register(mask, ®->channel_int_enable_w1c); + write_register(mask, ®->channel_int_enable_w1c, XDMA_OFS_INT_CTRL); } /* user_interrupts_enable -- Enable interrupts we are interested in */ -static void user_interrupts_enable(struct xdma_dev *lro, u32 mask) +static void user_interrupts_enable(struct xdma_dev *xdev, u32 mask) { struct interrupt_regs *reg = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - write_register(mask, ®->user_int_enable_w1s); + write_register(mask, ®->user_int_enable_w1s, XDMA_OFS_INT_CTRL); } /* user_interrupts_disable -- Disable interrupts we not interested in */ -static void user_interrupts_disable(struct xdma_dev *lro, u32 mask) +static void user_interrupts_disable(struct xdma_dev *xdev, u32 mask) { struct interrupt_regs *reg = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - write_register(mask, ®->user_int_enable_w1c); + write_register(mask, ®->user_int_enable_w1c, XDMA_OFS_INT_CTRL); } /* read_interrupts -- Print the interrupt controller status */ -static u32 read_interrupts(struct xdma_dev *lro) +static u32 read_interrupts(struct xdma_dev *xdev) { struct interrupt_regs *reg = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); + (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); u32 lo; u32 hi; @@ -188,6 +281,59 @@ static u32 read_interrupts(struct xdma_dev *lro) return build_u32(hi, lo); } +void enable_perf(struct xdma_engine *engine) +{ + u32 w; + + w = XDMA_PERF_CLEAR; + write_register(w, &engine->regs->perf_ctrl, + (unsigned long)(&engine->regs->perf_ctrl) - + (unsigned long)(&engine->regs)); + read_register(&engine->regs->identifier); + w = XDMA_PERF_AUTO | XDMA_PERF_RUN; + write_register(w, &engine->regs->perf_ctrl, + (unsigned long)(&engine->regs->perf_ctrl) - + (unsigned long)(&engine->regs)); + read_register(&engine->regs->identifier); + + dbg_perf("IOCTL_XDMA_PERF_START\n"); + +} +EXPORT_SYMBOL_GPL(enable_perf); + +#ifdef INTERNAL_TESTING +void get_perf_stats(struct xdma_engine *engine) +{ + u32 hi; + u32 lo; + + BUG_ON(!engine); + + if (!engine->xdma_perf) { + pr_info("%s perf struct not set up.\n", engine->name); + return; + } + + hi = 0; + lo = read_register(&engine->regs->completed_desc_count); + engine->xdma_perf->iterations = build_u64(hi, lo); + + hi = read_register(&engine->regs->perf_cyc_hi); + lo = read_register(&engine->regs->perf_cyc_lo); + + engine->xdma_perf->clock_cycle_count = build_u64(hi, lo); + + hi = read_register(&engine->regs->perf_dat_hi); + lo = read_register(&engine->regs->perf_dat_lo); + engine->xdma_perf->data_cycle_count = build_u64(hi, lo); + + hi = read_register(&engine->regs->perf_pnd_hi); + lo = read_register(&engine->regs->perf_pnd_lo); + engine->xdma_perf->pending_count = build_u64(hi, lo); +} +EXPORT_SYMBOL_GPL(get_perf_stats); +#endif + static void engine_reg_dump(struct xdma_engine *engine) { u32 w; @@ -195,35 +341,35 @@ static void engine_reg_dump(struct xdma_engine *engine) BUG_ON(!engine); w = read_register(&engine->regs->identifier); - dbg_init("%s: ioread32(0x%p) = 0x%08x (id).\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (id).\n", engine->name, &engine->regs->identifier, w); w &= BLOCK_ID_MASK; if (w != BLOCK_ID_HEAD) { - dbg_init("%s: engine id missing, 0x%08x exp. 0xad4bXX01.\n", - engine->name, w); + pr_info("%s: engine id missing, 0x%08x exp. & 0x%x = 0x%x\n", + engine->name, w, BLOCK_ID_MASK, BLOCK_ID_HEAD); return; } /* extra debugging; inspect complete engine set of registers */ w = read_register(&engine->regs->status); - dbg_init("%s: ioread32(0x%p) = 0x%08x (status).\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (status).\n", engine->name, &engine->regs->status, w); w = read_register(&engine->regs->control); - dbg_init("%s: ioread32(0x%p) = 0x%08x (control)\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (control)\n", engine->name, &engine->regs->control, w); w = read_register(&engine->sgdma_regs->first_desc_lo); - dbg_init("%s: ioread32(0x%p) = 0x%08x (first_desc_lo)\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_lo)\n", engine->name, &engine->sgdma_regs->first_desc_lo, w); w = read_register(&engine->sgdma_regs->first_desc_hi); - dbg_init("%s: ioread32(0x%p) = 0x%08x (first_desc_hi)\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_hi)\n", engine->name, &engine->sgdma_regs->first_desc_hi, w); w = read_register(&engine->sgdma_regs->first_desc_adjacent); - dbg_init("%s: ioread32(0x%p) = 0x%08x (first_desc_adjacent).\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_adjacent).\n", engine->name, &engine->sgdma_regs->first_desc_adjacent, w); w = read_register(&engine->regs->completed_desc_count); - dbg_init("%s: ioread32(0x%p) = 0x%08x (completed_desc_count).\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (completed_desc_count).\n", engine->name, &engine->regs->completed_desc_count, w); w = read_register(&engine->regs->interrupt_enable_mask); - dbg_init("%s: ioread32(0x%p) = 0x%08x (interrupt_enable_mask)\n", + pr_info("%s: ioread32(0x%p) = 0x%08x (interrupt_enable_mask)\n", engine->name, &engine->regs->interrupt_enable_mask, w); } @@ -234,7 +380,94 @@ static void engine_reg_dump(struct xdma_engine *engine) * * @return -1 on failure, status register otherwise */ -static u32 engine_status_read(struct xdma_engine *engine, int clear, int dump) +static void engine_status_dump(struct xdma_engine *engine) +{ + u32 v = engine->status; + char buffer[256]; + char *buf = buffer; + int len = 0; + + len = sprintf(buf, "SG engine %s status: 0x%08x: ", engine->name, v); + + if ((v & XDMA_STAT_BUSY)) + len += sprintf(buf + len, "BUSY,"); + if ((v & XDMA_STAT_DESC_STOPPED)) + len += sprintf(buf + len, "DESC_STOPPED,"); + if ((v & XDMA_STAT_DESC_COMPLETED)) + len += sprintf(buf + len, "DESC_COMPL,"); + + /* common H2C & C2H */ + if ((v & XDMA_STAT_COMMON_ERR_MASK)) { + if ((v & XDMA_STAT_ALIGN_MISMATCH)) + len += sprintf(buf + len, "ALIGN_MISMATCH "); + if ((v & XDMA_STAT_MAGIC_STOPPED)) + len += sprintf(buf + len, "MAGIC_STOPPED "); + if ((v & XDMA_STAT_INVALID_LEN)) + len += sprintf(buf + len, "INVLIAD_LEN "); + if ((v & XDMA_STAT_IDLE_STOPPED)) + len += sprintf(buf + len, "IDLE_STOPPED "); + buf[len - 1] = ','; + } + + if ((engine->dir == DMA_TO_DEVICE)) { + /* H2C only */ + if ((v & XDMA_STAT_H2C_R_ERR_MASK)) { + len += sprintf(buf + len, "R:"); + if ((v & XDMA_STAT_H2C_R_UNSUPP_REQ)) + len += sprintf(buf + len, "UNSUPP_REQ "); + if ((v & XDMA_STAT_H2C_R_COMPL_ABORT)) + len += sprintf(buf + len, "COMPL_ABORT "); + if ((v & XDMA_STAT_H2C_R_PARITY_ERR)) + len += sprintf(buf + len, "PARITY "); + if ((v & XDMA_STAT_H2C_R_HEADER_EP)) + len += sprintf(buf + len, "HEADER_EP "); + if ((v & XDMA_STAT_H2C_R_UNEXP_COMPL)) + len += sprintf(buf + len, "UNEXP_COMPL "); + buf[len - 1] = ','; + } + + if ((v & XDMA_STAT_H2C_W_ERR_MASK)) { + len += sprintf(buf + len, "W:"); + if ((v & XDMA_STAT_H2C_W_DECODE_ERR)) + len += sprintf(buf + len, "DECODE_ERR "); + if ((v & XDMA_STAT_H2C_W_SLAVE_ERR)) + len += sprintf(buf + len, "SLAVE_ERR "); + buf[len - 1] = ','; + } + + } else { + /* C2H only */ + if ((v & XDMA_STAT_C2H_R_ERR_MASK)) { + len += sprintf(buf + len, "R:"); + if ((v & XDMA_STAT_C2H_R_DECODE_ERR)) + len += sprintf(buf + len, "DECODE_ERR "); + if ((v & XDMA_STAT_C2H_R_SLAVE_ERR)) + len += sprintf(buf + len, "SLAVE_ERR "); + buf[len - 1] = ','; + } + } + + /* common H2C & C2H */ + if ((v & XDMA_STAT_DESC_ERR_MASK)) { + len += sprintf(buf + len, "DESC_ERR:"); + if ((v & XDMA_STAT_DESC_UNSUPP_REQ)) + len += sprintf(buf + len, "UNSUPP_REQ "); + if ((v & XDMA_STAT_DESC_COMPL_ABORT)) + len += sprintf(buf + len, "COMPL_ABORT "); + if ((v & XDMA_STAT_DESC_PARITY_ERR)) + len += sprintf(buf + len, "PARITY "); + if ((v & XDMA_STAT_DESC_HEADER_EP)) + len += sprintf(buf + len, "HEADER_EP "); + if ((v & XDMA_STAT_DESC_UNEXP_COMPL)) + len += sprintf(buf + len, "UNEXP_COMPL "); + buf[len - 1] = ','; + } + + buf[len - 1] = '\0'; + pr_info("%s\n", buffer); +} + +static u32 engine_status_read(struct xdma_engine *engine, bool clear, bool dump) { u32 value; @@ -251,23 +484,8 @@ static u32 engine_status_read(struct xdma_engine *engine, int clear, int dump) value = engine->status = read_register(&engine->regs->status); if (dump) - dbg_sg("SG engine %s status: 0x%08x: %s%s%s%s%s%s%s%s%s.\n", - engine->name, (u32)engine->status, - (value & XDMA_STAT_BUSY) ? "BUSY " : "IDLE ", - (value & XDMA_STAT_DESC_STOPPED) ? - "DESC_STOPPED " : "", - (value & XDMA_STAT_DESC_COMPLETED) ? - "DESC_COMPLETED " : "", - (value & XDMA_STAT_ALIGN_MISMATCH) ? - "ALIGN_MISMATCH " : "", - (value & XDMA_STAT_MAGIC_STOPPED) ? - "MAGIC_STOPPED " : "", - (value & XDMA_STAT_FETCH_STOPPED) ? - "FETCH_STOPPED " : "", - (value & XDMA_STAT_READ_ERROR) ? "READ_ERROR " : "", - (value & XDMA_STAT_DESC_ERROR) ? "DESC_ERROR " : "", - (value & XDMA_STAT_IDLE_STOPPED) ? - "IDLE_STOPPED " : ""); + engine_status_dump(engine); + return value; } @@ -287,13 +505,24 @@ static void xdma_engine_stop(struct xdma_engine *engine) w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; w |= (u32)XDMA_CTRL_IE_READ_ERROR; w |= (u32)XDMA_CTRL_IE_DESC_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; + if (poll_mode) { + w |= (u32) XDMA_CTRL_POLL_MODE_WB; + } else { + w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; + w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; + + /* Disable IDLE STOPPED for MM */ + if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || + (engine->xdma_perf)) + w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; + } dbg_tfr("Stopping SG DMA %s engine; writing 0x%08x to 0x%p.\n", engine->name, w, (u32 *)&engine->regs->control); - write_register(w, &engine->regs->control); + write_register(w, &engine->regs->control, + (unsigned long)(&engine->regs->control) - + (unsigned long)(&engine->regs)); /* dummy read of status register to flush all previous writes */ dbg_tfr("xdma_engine_stop(%s) done\n", engine->name); } @@ -304,6 +533,21 @@ static void engine_start_mode_config(struct xdma_engine *engine) BUG_ON(!engine); + /* If a perf test is running, enable the engine interrupts */ + if (engine->xdma_perf) { + w = XDMA_CTRL_IE_DESC_STOPPED; + w |= XDMA_CTRL_IE_DESC_COMPLETED; + w |= XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; + w |= XDMA_CTRL_IE_MAGIC_STOPPED; + w |= XDMA_CTRL_IE_IDLE_STOPPED; + w |= XDMA_CTRL_IE_READ_ERROR; + w |= XDMA_CTRL_IE_DESC_ERROR; + + write_register(w, &engine->regs->interrupt_enable_mask, + (unsigned long)(&engine->regs->interrupt_enable_mask) - + (unsigned long)(&engine->regs)); + } + /* write control register of SG DMA engine */ w = (u32)XDMA_CTRL_RUN_STOP; w |= (u32)XDMA_CTRL_IE_READ_ERROR; @@ -311,17 +555,27 @@ static void engine_start_mode_config(struct xdma_engine *engine) w |= (u32)XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; + if (poll_mode) { + w |= (u32)XDMA_CTRL_POLL_MODE_WB; + } else { + w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; + w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; + + if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || + (engine->xdma_perf)) + w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - /* set non-incremental addressing mode */ - if (engine->non_incr_addr) - w |= (u32)XDMA_CTRL_NON_INCR_ADDR; + /* set non-incremental addressing mode */ + if (engine->non_incr_addr) + w |= (u32)XDMA_CTRL_NON_INCR_ADDR; + } dbg_tfr("iowrite32(0x%08x to 0x%p) (control)\n", w, (void *)&engine->regs->control); /* start the engine */ - write_register(w, &engine->regs->control); + write_register(w, &engine->regs->control, + (unsigned long)(&engine->regs->control) - + (unsigned long)(&engine->regs)); /* dummy read of status register to flush all previous writes */ w = read_register(&engine->regs->status); @@ -372,12 +626,16 @@ static struct xdma_transfer *engine_start(struct xdma_engine *engine) w = cpu_to_le32(PCI_DMA_L(transfer->desc_bus)); dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_lo)\n", w, (void *)&engine->sgdma_regs->first_desc_lo); - write_register(w, &engine->sgdma_regs->first_desc_lo); + write_register(w, &engine->sgdma_regs->first_desc_lo, + (unsigned long)(&engine->sgdma_regs->first_desc_lo) - + (unsigned long)(&engine->sgdma_regs)); /* write upper 32-bit of bus address of transfer first descriptor */ w = cpu_to_le32(PCI_DMA_H(transfer->desc_bus)); dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_hi)\n", w, (void *)&engine->sgdma_regs->first_desc_hi); - write_register(w, &engine->sgdma_regs->first_desc_hi); + write_register(w, &engine->sgdma_regs->first_desc_hi, + (unsigned long)(&engine->sgdma_regs->first_desc_hi) - + (unsigned long)(&engine->sgdma_regs)); if (transfer->desc_adjacent > 0) { extra_adj = transfer->desc_adjacent - 1; @@ -386,7 +644,9 @@ static struct xdma_transfer *engine_start(struct xdma_engine *engine) } dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_adjacent)\n", extra_adj, (void *)&engine->sgdma_regs->first_desc_adjacent); - write_register(extra_adj, &engine->sgdma_regs->first_desc_adjacent); + write_register(extra_adj, &engine->sgdma_regs->first_desc_adjacent, + (unsigned long)(&engine->sgdma_regs->first_desc_adjacent) - + (unsigned long)(&engine->sgdma_regs)); dbg_tfr("ioread32(0x%p) (dummy read flushes writes).\n", &engine->regs->status); @@ -425,7 +685,11 @@ struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, struct xdma_transfer *transfer) { BUG_ON(!engine); - BUG_ON(!transfer); + + if (unlikely(!transfer)) { + pr_info("%s: xfer empty.\n", engine->name); + return NULL; + } /* synchronous I/O? */ /* awake task on transfer's wait queue */ @@ -438,14 +702,20 @@ struct xdma_transfer *engine_service_transfer_list(struct xdma_engine *engine, struct xdma_transfer *transfer, u32 *pdesc_completed) { BUG_ON(!engine); - BUG_ON(!transfer); BUG_ON(!pdesc_completed); + if (unlikely(!transfer)) { + pr_info("%s xfer empty, pdesc completed %u.\n", + engine->name, *pdesc_completed); + return NULL; + } + /* * iterate over all the transfers completed by the engine, * except for the last (i.e. use > instead of >=). */ - while (transfer && (*pdesc_completed > transfer->desc_num)) { + while (transfer && (!transfer->cyclic) && + (*pdesc_completed > transfer->desc_num)) { /* remove this transfer from pdesc_completed */ *pdesc_completed -= transfer->desc_num; dbg_tfr("%s engine completed non-cyclic xfer 0x%p (%d desc)\n", @@ -487,15 +757,16 @@ static void engine_err_handle(struct xdma_engine *engine, */ if (engine->status & XDMA_STAT_BUSY) { value = read_register(&engine->regs->status); - if (value & XDMA_STAT_BUSY) - dbg_tfr("%s engine has errors but is still BUSY\n", + if ((value & XDMA_STAT_BUSY) && printk_ratelimit()) + pr_info("%s has errors but is still BUSY\n", engine->name); } - dbg_tfr("Aborted %s engine transfer 0x%p\n", engine->name, transfer); - dbg_tfr("%s engine was %d descriptors into transfer (with %d desc)\n", - engine->name, desc_completed, transfer->desc_num); - dbg_tfr("%s engine status = %d\n", engine->name, engine->status); + if (printk_ratelimit()) { + pr_info("%s, s 0x%x, aborted xfer 0x%p, cmpl %d/%d\n", + engine->name, engine->status, transfer, desc_completed, + transfer->desc_num); + } /* mark transfer as failed */ transfer->state = TRANSFER_STATE_FAILED; @@ -505,31 +776,34 @@ static void engine_err_handle(struct xdma_engine *engine, struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, struct xdma_transfer *transfer, u32 *pdesc_completed) { - u32 err_flags; BUG_ON(!engine); - BUG_ON(!transfer); BUG_ON(!pdesc_completed); - err_flags = XDMA_STAT_MAGIC_STOPPED; - err_flags |= XDMA_STAT_ALIGN_MISMATCH; - err_flags |= XDMA_STAT_READ_ERROR; - err_flags |= XDMA_STAT_DESC_ERROR; - /* inspect the current transfer */ - if (transfer) { - if (engine->status & err_flags) { + if (unlikely(!transfer)) { + pr_info("%s xfer empty, pdesc completed %u.\n", + engine->name, *pdesc_completed); + return NULL; + } else { + if (((engine->dir == DMA_FROM_DEVICE) && + (engine->status & XDMA_STAT_C2H_ERR_MASK)) || + ((engine->dir == DMA_TO_DEVICE) && + (engine->status & XDMA_STAT_H2C_ERR_MASK))) { + pr_info("engine %s, status error 0x%x.\n", + engine->name, engine->status); + engine_status_dump(engine); engine_err_handle(engine, transfer, *pdesc_completed); - return transfer; + goto transfer_del; } if (engine->status & XDMA_STAT_BUSY) - dbg_tfr("Engine %s is unexpectedly busy - ignoring\n", + pr_debug("engine %s is unexpectedly busy - ignoring\n", engine->name); /* the engine stopped on current transfer? */ if (*pdesc_completed < transfer->desc_num) { transfer->state = TRANSFER_STATE_FAILED; - dbg_tfr("%s, xfer 0x%p, stopped half-way, %d/%d.\n", + pr_info("%s, xfer 0x%p, stopped half-way, %d/%d.\n", engine->name, transfer, *pdesc_completed, transfer->desc_num); } else { @@ -538,15 +812,18 @@ struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, dbg_tfr("*pdesc_completed=%d, transfer->desc_num=%d", *pdesc_completed, transfer->desc_num); - /* - * if the engine stopped on this transfer, - * it should be the last - */ - WARN_ON(*pdesc_completed > transfer->desc_num); + if (!transfer->cyclic) { + /* + * if the engine stopped on this transfer, + * it should be the last + */ + WARN_ON(*pdesc_completed > transfer->desc_num); + } /* mark transfer as succesfully completed */ transfer->state = TRANSFER_STATE_COMPLETED; } +transfer_del: /* remove completed transfer from list */ list_del(engine->transfer_list.next); /* add to dequeued number of descriptors during this run */ @@ -557,11 +834,204 @@ struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, * transfer has completed */ transfer = engine_transfer_completion(engine, transfer); - } + } return transfer; } +#ifdef INTERNAL_TESTING +static void engine_service_perf(struct xdma_engine *engine, u32 desc_completed) +{ + BUG_ON(!engine); + + /* performance measurement is running? */ + if (engine->xdma_perf) { + /* a descriptor was completed? */ + if (engine->status & XDMA_STAT_DESC_COMPLETED) { + engine->xdma_perf->iterations = desc_completed; + dbg_perf("transfer->xdma_perf->iterations=%d\n", + engine->xdma_perf->iterations); + } + + /* a descriptor stopped the engine? */ + if (engine->status & XDMA_STAT_DESC_STOPPED) { + engine->xdma_perf->stopped = 1; + /* + * wake any XDMA_PERF_IOCTL_STOP waiting for + * the performance run to finish + */ + wake_up_interruptible(&engine->xdma_perf_wq); + dbg_perf("transfer->xdma_perf stopped\n"); + } + } +} +#endif + +static void engine_transfer_dequeue(struct xdma_engine *engine) +{ + struct xdma_transfer *transfer; + + BUG_ON(!engine); + + /* pick first transfer on the queue (was submitted to the engine) */ + transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, + entry); + if (!transfer || transfer != &engine->cyclic_req->xfer) { + pr_info("%s, xfer 0x%p != 0x%p.\n", + engine->name, transfer, &engine->cyclic_req->xfer); + return; + } + dbg_tfr("%s engine completed cyclic transfer 0x%p (%d desc).\n", + engine->name, transfer, transfer->desc_num); + /* remove completed transfer from list */ + list_del(engine->transfer_list.next); +} + +static int engine_ring_process(struct xdma_engine *engine) +{ + struct xdma_result *result; + int start; + int eop_count = 0; + + BUG_ON(!engine); + result = engine->cyclic_result; + BUG_ON(!result); + + /* where we start receiving in the ring buffer */ + start = engine->rx_tail; + + /* iterate through all newly received RX result descriptors */ + dbg_tfr("%s, result %d, 0x%x, len 0x%x.\n", + engine->name, engine->rx_tail, result[engine->rx_tail].status, + result[engine->rx_tail].length); + while (result[engine->rx_tail].status && !engine->rx_overrun) { + /* EOP bit set in result? */ + if (result[engine->rx_tail].status & RX_STATUS_EOP){ + eop_count++; + } + + /* increment tail pointer */ + engine->rx_tail = (engine->rx_tail + 1) % CYCLIC_RX_PAGES_MAX; + + dbg_tfr("%s, head %d, tail %d, 0x%x, len 0x%x.\n", + engine->name, engine->rx_head, engine->rx_tail, + result[engine->rx_tail].status, + result[engine->rx_tail].length); + + /* overrun? */ + if (engine->rx_tail == engine->rx_head) { + dbg_tfr("%s: overrun\n", engine->name); + /* flag to user space that overrun has occurred */ + engine->rx_overrun = 1; + } + } + + return eop_count; +} + +static int engine_service_cyclic_polled(struct xdma_engine *engine) +{ + int eop_count = 0; + int rc = 0; + struct xdma_poll_wb *writeback_data; + u32 sched_limit = 0; + + BUG_ON(!engine); + BUG_ON(engine->magic != MAGIC_ENGINE); + + writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + + while (eop_count == 0) { + if (sched_limit != 0) { + if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) + schedule(); + } + sched_limit++; + + /* Monitor descriptor writeback address for errors */ + if ((writeback_data->completed_desc_count) & WB_ERR_MASK) { + rc = -1; + break; + } + + eop_count = engine_ring_process(engine); + } + + if (eop_count == 0) { + engine_status_read(engine, 1, 0); + if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { + /* transfers on queue? */ + if (!list_empty(&engine->transfer_list)) + engine_transfer_dequeue(engine); + + engine_service_shutdown(engine); + } + } + + return rc; +} + +static int engine_service_cyclic_interrupt(struct xdma_engine *engine) +{ + int eop_count = 0; + struct xdma_transfer *xfer; + + BUG_ON(!engine); + BUG_ON(engine->magic != MAGIC_ENGINE); + + engine_status_read(engine, 1, 0); + + eop_count = engine_ring_process(engine); + /* + * wake any reader on EOP, as one or more packets are now in + * the RX buffer + */ + xfer = &engine->cyclic_req->xfer; + if(enable_credit_mp){ + if (eop_count > 0) { + //engine->eop_found = 1; + } + wake_up_interruptible(&xfer->wq); + }else{ + if (eop_count > 0) { + /* awake task on transfer's wait queue */ + dbg_tfr("wake_up_interruptible() due to %d EOP's\n", eop_count); + engine->eop_found = 1; + wake_up_interruptible(&xfer->wq); + } + } + + /* engine was running but is no longer busy? */ + if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { + /* transfers on queue? */ + if (!list_empty(&engine->transfer_list)) + engine_transfer_dequeue(engine); + + engine_service_shutdown(engine); + } + + return 0; +} + +/* must be called with engine->lock already acquired */ +static int engine_service_cyclic(struct xdma_engine *engine) +{ + int rc = 0; + + dbg_tfr("engine_service_cyclic()"); + + BUG_ON(!engine); + BUG_ON(engine->magic != MAGIC_ENGINE); + + if (poll_mode) + rc = engine_service_cyclic_polled(engine); + else + rc = engine_service_cyclic_interrupt(engine); + + return rc; +} + + static void engine_service_resume(struct xdma_engine *engine) { struct xdma_transfer *transfer_started; @@ -570,16 +1040,17 @@ static void engine_service_resume(struct xdma_engine *engine) /* engine stopped? */ if (!engine->running) { + /* in the case of shutdown, let it finish what's in the Q */ + if (!list_empty(&engine->transfer_list)) { + /* (re)start engine */ + transfer_started = engine_start(engine); + pr_info("re-started %s engine with pending xfer 0x%p\n", + engine->name, transfer_started); /* engine was requested to be shutdown? */ - if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { + } else if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { engine->shutdown |= ENGINE_SHUTDOWN_IDLE; /* awake task on engine's shutdown wait queue */ wake_up_interruptible(&engine->shutdown_wq); - } else if (!list_empty(&engine->transfer_list)) { - /* (re)start engine */ - transfer_started = engine_start(engine); - dbg_tfr("re-started %s engine with pending xfer 0x%p\n", - engine->name, transfer_started); } else { dbg_tfr("no pending transfers, %s engine stays idle.\n", engine->name); @@ -587,7 +1058,7 @@ static void engine_service_resume(struct xdma_engine *engine) } else { /* engine is still running? */ if (list_empty(&engine->transfer_list)) { - dbg_tfr("no queued transfers but %s engine running!\n", + pr_warn("no queued transfers but %s engine running!\n", engine->name); WARN_ON(1); } @@ -602,14 +1073,20 @@ static void engine_service_resume(struct xdma_engine *engine) * @engine pointer to struct xdma_engine * */ -static int engine_service(struct xdma_engine *engine) +static int engine_service(struct xdma_engine *engine, int desc_writeback) { struct xdma_transfer *transfer = NULL; - u32 desc_count; - int rc = 0; + u32 desc_count = desc_writeback & WB_COUNT_MASK; + u32 err_flag = desc_writeback & WB_ERR_MASK; + int rv = 0; + struct xdma_poll_wb *wb_data; BUG_ON(!engine); + /* If polling detected an error, signal to the caller */ + if (err_flag) + rv = -1; + /* Service the engine */ if (!engine->running) { dbg_tfr("Engine was not running!!! Clearing status\n"); @@ -622,13 +1099,15 @@ static int engine_service(struct xdma_engine *engine) * engine status. For polled mode descriptor completion, this read is * unnecessary and is skipped to reduce latency */ - engine_status_read(engine, 1, 0); + if ((desc_count == 0) || (err_flag != 0)) + engine_status_read(engine, 1, 0); /* * engine was running but is no longer busy, or writeback occurred, * shut down */ - if (engine->running && !(engine->status & XDMA_STAT_BUSY)) + if ((engine->running && !(engine->status & XDMA_STAT_BUSY)) || + (desc_count != 0)) engine_service_shutdown(engine); /* @@ -637,8 +1116,8 @@ static int engine_service(struct xdma_engine *engine) * from HW. In polled mode descriptor completion, this read is * unnecessary and is skipped to reduce latency */ - desc_count = read_register(&engine->regs->completed_desc_count); - + if (!desc_count) + desc_count = read_register(&engine->regs->completed_desc_count); dbg_tfr("desc_count = %d\n", desc_count); /* transfers on queue? */ @@ -653,6 +1132,10 @@ static int engine_service(struct xdma_engine *engine) dbg_tfr("Engine completed %d desc, %d not yet dequeued\n", (int)desc_count, (int)desc_count - engine->desc_dequeued); + +#ifdef INTERNAL_TESTING + engine_service_perf(engine, desc_count); +#endif } /* account for already dequeued transfers during this engine run */ @@ -667,10 +1150,16 @@ static int engine_service(struct xdma_engine *engine) */ transfer = engine_service_final_transfer(engine, transfer, &desc_count); + /* Before starting engine again, clear the writeback data */ + if (poll_mode) { + wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + wb_data->completed_desc_count = 0; + } + /* Restart the engine following the servicing */ engine_service_resume(engine); - return rc; + return 0; } /* engine_service_work */ @@ -687,20 +1176,117 @@ static void engine_service_work(struct work_struct *work) dbg_tfr("engine_service() for %s engine %p\n", engine->name, engine); - engine_service(engine); + if (engine->cyclic_req) + engine_service_cyclic(engine); + else + engine_service(engine, 0); /* re-enable interrupts for this engine */ - if(engine->lro->msix_enabled){ + if (engine->xdev->msix_enabled){ write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1s); - }else{ - channel_interrupts_enable(engine->lro, engine->irq_bitmask); - } + &engine->regs->interrupt_enable_mask_w1s, + (unsigned long)(&engine->regs->interrupt_enable_mask_w1s) - + (unsigned long)(&engine->regs)); + } else + channel_interrupts_enable(engine->xdev, engine->irq_bitmask); + /* unlock the engine */ spin_unlock_irqrestore(&engine->lock, flags); } -static irqreturn_t user_irq_service(int irq, struct xdma_irq *user_irq) +static u32 engine_service_wb_monitor(struct xdma_engine *engine, + u32 expected_wb) +{ + struct xdma_poll_wb *wb_data; + u32 desc_wb = 0; + u32 sched_limit = 0; + unsigned long timeout; + + BUG_ON(!engine); + wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + + /* + * Poll the writeback location for the expected number of + * descriptors / error events This loop is skipped for cyclic mode, + * where the expected_desc_count passed in is zero, since it cannot be + * determined before the function is called + */ + + timeout = jiffies + (POLL_TIMEOUT_SECONDS * HZ); + while (expected_wb != 0) { + desc_wb = wb_data->completed_desc_count; + + if (desc_wb & WB_ERR_MASK) + break; + else if (desc_wb == expected_wb) + break; + + /* RTO - prevent system from hanging in polled mode */ + if (time_after(jiffies, timeout)) { + dbg_tfr("Polling timeout occurred"); + dbg_tfr("desc_wb = 0x%08x, expected 0x%08x\n", desc_wb, + expected_wb); + if ((desc_wb & WB_COUNT_MASK) > expected_wb) + desc_wb = expected_wb | WB_ERR_MASK; + + break; + } + + /* + * Define NUM_POLLS_PER_SCHED to limit how much time is spent + * in the scheduler + */ + + if (sched_limit != 0) { + if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) + schedule(); + } + sched_limit++; + } + + return desc_wb; +} + +static int engine_service_poll(struct xdma_engine *engine, + u32 expected_desc_count) +{ + struct xdma_poll_wb *writeback_data; + u32 desc_wb = 0; + unsigned long flags; + int rv = 0; + + BUG_ON(!engine); + BUG_ON(engine->magic != MAGIC_ENGINE); + + writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + + if ((expected_desc_count & WB_COUNT_MASK) != expected_desc_count) { + dbg_tfr("Queued descriptor count is larger than supported\n"); + return -1; + } + + /* + * Poll the writeback location for the expected number of + * descriptors / error events This loop is skipped for cyclic mode, + * where the expected_desc_count passed in is zero, since it cannot be + * determined before the function is called + */ + + desc_wb = engine_service_wb_monitor(engine, expected_desc_count); + + spin_lock_irqsave(&engine->lock, flags); + dbg_tfr("%s service.\n", engine->name); + if (engine->cyclic_req) { + rv = engine_service_cyclic(engine); + } else { + rv = engine_service(engine, desc_wb); + } + spin_unlock_irqrestore(&engine->lock, flags); + + return rv; +} + +static irqreturn_t user_irq_service(int irq, struct xdma_user_irq *user_irq) { unsigned long flags; @@ -728,23 +1314,21 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) { u32 ch_irq; u32 user_irq; - struct xdma_dev *lro; + u32 mask; + struct xdma_dev *xdev; struct interrupt_regs *irq_regs; - int user_irq_bit; - struct xdma_engine *engine; - int channel; - dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); + dbg_irq("(irq=%d, dev 0x%p) <<<< ISR.\n", irq, dev_id); BUG_ON(!dev_id); - lro = (struct xdma_dev *)dev_id; + xdev = (struct xdma_dev *)dev_id; - if (!lro) { - WARN_ON(!lro); - dbg_irq("xdma_isr(irq=%d) lro=%p ??\n", irq, lro); + if (!xdev) { + WARN_ON(!xdev); + dbg_irq("xdma_isr(irq=%d) xdev=%p ??\n", irq, xdev); return IRQ_NONE; } - irq_regs = (struct interrupt_regs *)(lro->bar[lro->config_bar_idx] + + irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); /* read channel interrupt requests */ @@ -755,40 +1339,65 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) * disable all interrupts that fired; these are re-enabled individually * after the causing module has been fully serviced. */ - channel_interrupts_disable(lro, ch_irq); + if (ch_irq) + channel_interrupts_disable(xdev, ch_irq); /* read user interrupts - this read also flushes the above write */ user_irq = read_register(&irq_regs->user_int_request); dbg_irq("user_irq = 0x%08x\n", user_irq); - for (user_irq_bit = 0; user_irq_bit < MAX_USER_IRQ; user_irq_bit++) { - if (user_irq & (1 << user_irq_bit)) - user_irq_service(user_irq_bit, &lro->user_irq[user_irq_bit]); + if (user_irq) { + int user = 0; + u32 mask = 1; + int max = xdev->h2c_channel_max; + + for (; user < max && user_irq; user++, mask <<= 1) { + if (user_irq & mask) { + user_irq &= ~mask; + user_irq_service(irq, &xdev->user_irq[user]); + } + } } - /* iterate over H2C (PCIe read) */ - for (channel = 0; channel < XDMA_CHANNEL_NUM_MAX; channel++) { - engine = &lro->engine_h2c[channel]; - /* engine present and its interrupt fired? */ - if ((engine->magic == MAGIC_ENGINE) && - (engine->irq_bitmask & ch_irq)) { - dbg_tfr("schedule_work(engine=%p)\n", engine); - schedule_work(&engine->work); + mask = ch_irq & xdev->mask_irq_h2c; + if (mask) { + int channel = 0; + int max = xdev->h2c_channel_max; + + /* iterate over H2C (PCIe read) */ + for (channel = 0; channel < max && mask; channel++) { + struct xdma_engine *engine = &xdev->engine_h2c[channel]; + + /* engine present and its interrupt fired? */ + if((engine->irq_bitmask & mask) && + (engine->magic == MAGIC_ENGINE)) { + mask &= ~engine->irq_bitmask; + dbg_tfr("schedule_work, %s.\n", engine->name); + schedule_work(&engine->work); + } } } - /* iterate over C2H (PCIe write) */ - for (channel = 0; channel < XDMA_CHANNEL_NUM_MAX; channel++) { - engine = &lro->engine_c2h[channel]; - /* engine present and its interrupt fired? */ - if ((engine->magic == MAGIC_ENGINE) && - (engine->irq_bitmask & ch_irq)) { - dbg_tfr("schedule_work(engine=%p)\n", engine); - schedule_work(&engine->work); + mask = ch_irq & xdev->mask_irq_c2h; + if (mask) { + int channel = 0; + int max = xdev->c2h_channel_max; + + /* iterate over C2H (PCIe write) */ + for (channel = 0; channel < max && mask; channel++) { + struct xdma_engine *engine = &xdev->engine_c2h[channel]; + + /* engine present and its interrupt fired? */ + if((engine->irq_bitmask & mask) && + (engine->magic == MAGIC_ENGINE)) { + mask &= ~engine->irq_bitmask; + dbg_tfr("schedule_work, %s.\n", engine->name); + schedule_work(&engine->work); + } } } - lro->irq_count++; + xdev->irq_count++; return IRQ_HANDLED; } @@ -799,12 +1408,12 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) */ static irqreturn_t xdma_user_irq(int irq, void *dev_id) { - struct xdma_irq *user_irq; + struct xdma_user_irq *user_irq; dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); BUG_ON(!dev_id); - user_irq = (struct xdma_irq *)dev_id; + user_irq = (struct xdma_user_irq *)dev_id; return user_irq_service(irq, user_irq); } @@ -816,7 +1425,7 @@ static irqreturn_t xdma_user_irq(int irq, void *dev_id) */ static irqreturn_t xdma_channel_irq(int irq, void *dev_id) { - struct xdma_dev *lro; + struct xdma_dev *xdev; struct xdma_engine *engine; struct interrupt_regs *irq_regs; @@ -824,21 +1433,23 @@ static irqreturn_t xdma_channel_irq(int irq, void *dev_id) BUG_ON(!dev_id); engine = (struct xdma_engine *)dev_id; - lro = engine->lro; + xdev = engine->xdev; - if (!lro) { - WARN_ON(!lro); - dbg_irq("xdma_channel_irq(irq=%d) lro=%p ??\n", irq, lro); + if (!xdev) { + WARN_ON(!xdev); + dbg_irq("xdma_channel_irq(irq=%d) xdev=%p ??\n", irq, xdev); return IRQ_NONE; } - irq_regs = (struct interrupt_regs *)(lro->bar[lro->config_bar_idx] + + irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); /* Disable the interrupt for this engine */ write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1c); - + &engine->regs->interrupt_enable_mask_w1c, + (unsigned long) + (&engine->regs->interrupt_enable_mask_w1c) - + (unsigned long)(&engine->regs)); /* Dummy read to flush the above write */ read_register(&irq_regs->channel_int_pending); /* Schedule the bottom half */ @@ -848,29 +1459,29 @@ static irqreturn_t xdma_channel_irq(int irq, void *dev_id) * RTO - need to protect access here if multiple MSI-X are used for * user interrupts */ - lro->irq_count++; + xdev->irq_count++; return IRQ_HANDLED; } /* * Unmap the BAR regions that had been mapped earlier using map_bars() */ -static void unmap_bars(struct xdma_dev *lro, struct pci_dev *dev) +static void unmap_bars(struct xdma_dev *xdev, struct pci_dev *dev) { int i; for (i = 0; i < XDMA_BAR_NUM; i++) { /* is this BAR mapped? */ - if (lro->bar[i]) { + if (xdev->bar[i]) { /* unmap BAR */ - pci_iounmap(dev, lro->bar[i]); + pci_iounmap(dev, xdev->bar[i]); /* mark as unmapped */ - lro->bar[i] = NULL; + xdev->bar[i] = NULL; } } } -static int map_single_bar(struct xdma_dev *lro, struct pci_dev *dev, int idx) +static int map_single_bar(struct xdma_dev *xdev, struct pci_dev *dev, int idx) { resource_size_t bar_start; resource_size_t bar_len; @@ -880,17 +1491,17 @@ static int map_single_bar(struct xdma_dev *lro, struct pci_dev *dev, int idx) bar_len = pci_resource_len(dev, idx); map_len = bar_len; - lro->bar[idx] = NULL; + xdev->bar[idx] = NULL; /* do not map BARs with length 0. Note that start MAY be 0! */ if (!bar_len) { - dbg_init("BAR #%d is not present - skipping\n", idx); + //pr_info("BAR #%d is not present - skipping\n", idx); return 0; } /* BAR size exceeds maximum desired mapping? */ if (bar_len > INT_MAX) { - dbg_init("Limit BAR %d mapping from %llu to %d bytes\n", idx, + pr_info("Limit BAR %d mapping from %llu to %d bytes\n", idx, (u64)bar_len, INT_MAX); map_len = (resource_size_t)INT_MAX; } @@ -899,46 +1510,47 @@ static int map_single_bar(struct xdma_dev *lro, struct pci_dev *dev, int idx) * address space */ dbg_init("BAR%d: %llu bytes to be mapped.\n", idx, (u64)map_len); - lro->bar[idx] = pci_iomap(dev, idx, map_len); + xdev->bar[idx] = pci_iomap(dev, idx, map_len); - if (!lro->bar[idx]) { - dbg_init("Could not map BAR %d.\n", idx); + if (!xdev->bar[idx]) { + pr_info("Could not map BAR %d.\n", idx); return -1; } - dbg_init("BAR%d at 0x%llx mapped at 0x%p, length=%llu(/%llu)\n", idx, - (u64)bar_start, lro->bar[idx], (u64)map_len, (u64)bar_len); + pr_info("BAR%d at 0x%llx mapped at 0x%p, length=%llu(/%llu)\n", idx, + (u64)bar_start, xdev->bar[idx], (u64)map_len, (u64)bar_len); return (int)map_len; } -static int is_config_bar(struct xdma_dev *lro, int idx) +static int is_config_bar(struct xdma_dev *xdev, int idx) { u32 irq_id = 0; u32 cfg_id = 0; int flag = 0; u32 mask = 0xffff0000; /* Compare only XDMA ID's not Version number */ struct interrupt_regs *irq_regs = - (struct interrupt_regs *) (lro->bar[idx] + XDMA_OFS_INT_CTRL); + (struct interrupt_regs *) (xdev->bar[idx] + XDMA_OFS_INT_CTRL); struct config_regs *cfg_regs = - (struct config_regs *)(lro->bar[idx] + XDMA_OFS_CONFIG); + (struct config_regs *)(xdev->bar[idx] + XDMA_OFS_CONFIG); irq_id = read_register(&irq_regs->identifier); cfg_id = read_register(&cfg_regs->identifier); - if (((irq_id & mask)== IRQ_BLOCK_ID) && ((cfg_id & mask)== CONFIG_BLOCK_ID)) { + if (((irq_id & mask)== IRQ_BLOCK_ID) && + ((cfg_id & mask)== CONFIG_BLOCK_ID)) { dbg_init("BAR %d is the XDMA config BAR\n", idx); flag = 1; } else { - dbg_init("BAR %d is not XDMA config BAR\n", idx); - dbg_init("BAR %d is NOT the XDMA config BAR: 0x%x, 0x%x.\n", idx, irq_id, cfg_id); + dbg_init("BAR %d is NOT the XDMA config BAR: 0x%x, 0x%x.\n", + idx, irq_id, cfg_id); flag = 0; } return flag; } -static void identify_bars(struct xdma_dev *lro, int *bar_id_list, int num_bars, +static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, int config_bar_pos) { /* @@ -955,11 +1567,11 @@ static void identify_bars(struct xdma_dev *lro, int *bar_id_list, int num_bars, * correctly with both 32-bit and 64-bit BARs. */ - BUG_ON(!lro); + BUG_ON(!xdev); BUG_ON(!bar_id_list); - dbg_init("lro 0x%p, bars %d, config at %d.\n", - lro, num_bars, config_bar_pos); + dbg_init("xdev 0x%p, bars %d, config at %d.\n", + xdev, num_bars, config_bar_pos); switch (num_bars) { case 1: @@ -968,39 +1580,38 @@ static void identify_bars(struct xdma_dev *lro, int *bar_id_list, int num_bars, case 2: if (config_bar_pos == 0) { - lro->bypass_bar_idx = bar_id_list[1]; - dbg_init("bypass bar %d.\n", lro->bypass_bar_idx); + xdev->bypass_bar_idx = bar_id_list[1]; } else if (config_bar_pos == 1) { - lro->user_bar_idx = bar_id_list[0]; - dbg_init("user bar %d.\n", lro->user_bar_idx); + xdev->user_bar_idx = bar_id_list[0]; } else { - dbg_init("2, XDMA config BAR unexpected %d.\n", + pr_info("2, XDMA config BAR unexpected %d.\n", config_bar_pos); } break; case 3: case 4: - if (config_bar_pos == 1 || config_bar_pos == 2) { + if ((config_bar_pos == 1) || (config_bar_pos == 2)) { /* user bar at bar #0 */ - lro->user_bar_idx = bar_id_list[0]; + xdev->user_bar_idx = bar_id_list[0]; /* bypass bar at the last bar */ - lro->bypass_bar_idx = bar_id_list[num_bars - 1]; - dbg_init("bypass bar %d, user bar %d.\n", - lro->bypass_bar_idx, lro->user_bar_idx); + xdev->bypass_bar_idx = bar_id_list[num_bars - 1]; } else { - dbg_init("3/4, XDMA config BAR unexpected %d.\n", + pr_info("3/4, XDMA config BAR unexpected %d.\n", config_bar_pos); } break; default: /* Should not occur - warn user but safe to continue */ - dbg_init("Unexpected number of BARs (%d)\n", num_bars); - dbg_init("Only XDMA config BAR accessible\n"); + pr_info("Unexpected # BARs (%d), XDMA config BAR only.\n", + num_bars); break; } + pr_info("%d BARs: config %d, user %d, bypass %d.\n", + num_bars, config_bar_pos, xdev->user_bar_idx, + xdev->bypass_bar_idx); } /* map_bars() -- map device regions into kernel virtual address space @@ -1008,9 +1619,9 @@ static void identify_bars(struct xdma_dev *lro, int *bar_id_list, int num_bars, * Map the device memory regions into kernel virtual address space after * verifying their sizes respect the minimum sizes needed */ -static int map_bars(struct xdma_dev *lro, struct pci_dev *dev) +static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) { - int rc; + int rv; int i; int bar_id_list[XDMA_BAR_NUM]; int bar_id_idx = 0; @@ -1020,22 +1631,22 @@ static int map_bars(struct xdma_dev *lro, struct pci_dev *dev) for (i = 0; i < XDMA_BAR_NUM; i++) { int bar_len; - bar_len = map_single_bar(lro, dev, i); + bar_len = map_single_bar(xdev, dev, i); if (bar_len == 0) { continue; } else if (bar_len < 0) { - rc = -1; + rv = -EINVAL; goto fail; } /* Try to identify BAR as XDMA control BAR */ - if ((bar_len >= XDMA_BAR_SIZE) && (lro->config_bar_idx < 0)) { + if ((bar_len >= XDMA_BAR_SIZE) && (xdev->config_bar_idx < 0)) { - if (is_config_bar(lro, i)) { - lro->config_bar_idx = i; + if (is_config_bar(xdev, i)) { + xdev->config_bar_idx = i; config_bar_pos = bar_id_idx; - dbg_init("config bar %d, pos %d.\n", - lro->config_bar_idx, config_bar_pos); + pr_info("config bar %d, pos %d.\n", + xdev->config_bar_idx, config_bar_pos); } } @@ -1044,24 +1655,433 @@ static int map_bars(struct xdma_dev *lro, struct pci_dev *dev) } /* The XDMA config BAR must always be present */ - if (lro->config_bar_idx < 0) { - dbg_init("Failed to detect XDMA config BAR\n"); - rc = -1; + if (xdev->config_bar_idx < 0) { + pr_info("Failed to detect XDMA config BAR\n"); + rv = -EINVAL; goto fail; } - identify_bars(lro, bar_id_list, bar_id_idx, config_bar_pos); + identify_bars(xdev, bar_id_list, bar_id_idx, config_bar_pos); /* successfully mapped all required BAR regions */ - rc = 0; - goto success; + return 0; + fail: /* unwind; unmap any BARs that we did map */ - unmap_bars(lro, dev); -success: - return rc; + unmap_bars(xdev, dev); + return rv; +} + +/* + * MSI-X interrupt: + * vectors, followed by vectors + */ + +/* + * RTO - code to detect if MSI/MSI-X capability exists is derived + * from linux/pci/msi.c - pci_msi_check_device + */ + +#ifndef arch_msi_check_device +int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +{ + return 0; +} +#endif + +/* type = PCI_CAP_ID_MSI or PCI_CAP_ID_MSIX */ +static int msi_msix_capable(struct pci_dev *dev, int type) +{ + struct pci_bus *bus; + int ret; + + if (!dev || dev->no_msi) + return 0; + + for (bus = dev->bus; bus; bus = bus->parent) + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return 0; + + ret = arch_msi_check_device(dev, 1, type); + if (ret) + return 0; + + if (!pci_find_capability(dev, type)) + return 0; + + return 1; +} + +static void disable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) +{ + if (xdev->msix_enabled) { + pci_disable_msix(pdev); + xdev->msix_enabled = 0; + } else if (xdev->msi_enabled) { + pci_disable_msi(pdev); + xdev->msi_enabled = 0; + } +} + +static int enable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) +{ + int rv = 0; + + BUG_ON(!xdev); + BUG_ON(!pdev); + + if (!interrupt_mode && msi_msix_capable(pdev, PCI_CAP_ID_MSIX)) { + int req_nvec = xdev->c2h_channel_max + xdev->h2c_channel_max + + xdev->user_max; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + dbg_init("Enabling MSI-X\n"); + rv = pci_alloc_irq_vectors(pdev, req_nvec, req_nvec, + PCI_IRQ_MSIX); +#else + int i; + + dbg_init("Enabling MSI-X\n"); + for (i = 0; i < req_nvec; i++) + xdev->entry[i].entry = i; + + rv = pci_enable_msix(pdev, xdev->entry, req_nvec); +#endif + if (rv < 0) + dbg_init("Couldn't enable MSI-X mode: %d\n", rv); + + xdev->msix_enabled = 1; + + } else if (interrupt_mode == 1 && + msi_msix_capable(pdev, PCI_CAP_ID_MSI)) { + /* enable message signalled interrupts */ + dbg_init("pci_enable_msi()\n"); + rv = pci_enable_msi(pdev); + if (rv < 0) + dbg_init("Couldn't enable MSI mode: %d\n", rv); + xdev->msi_enabled = 1; + + } else { + dbg_init("MSI/MSI-X not detected - using legacy interrupts\n"); + } + + return rv; +} + +static void pci_check_intr_pend(struct pci_dev *pdev) +{ + u16 v; + + pci_read_config_word(pdev, PCI_STATUS, &v); + if (v & PCI_STATUS_INTERRUPT) { + pr_info("%s PCI STATUS Interrupt pending 0x%x.\n", + dev_name(&pdev->dev), v); + pci_write_config_word(pdev, PCI_STATUS, PCI_STATUS_INTERRUPT); + } +} + +static void pci_keep_intx_enabled(struct pci_dev *pdev) +{ + /* workaround to a h/w bug: + * when msix/msi become unavaile, default to legacy. + * However the legacy enable was not checked. + * If the legacy was disabled, no ack then everything stuck + */ + u16 pcmd, pcmd_new; + + pci_read_config_word(pdev, PCI_COMMAND, &pcmd); + pcmd_new = pcmd & ~PCI_COMMAND_INTX_DISABLE; + if (pcmd_new != pcmd) { + pr_info("%s: clear INTX_DISABLE, 0x%x -> 0x%x.\n", + dev_name(&pdev->dev), pcmd, pcmd_new); + pci_write_config_word(pdev, PCI_COMMAND, pcmd_new); + } +} + +static void prog_irq_msix_user(struct xdma_dev *xdev, bool clear) +{ + /* user */ + struct interrupt_regs *int_regs = (struct interrupt_regs *) + (xdev->bar[xdev->config_bar_idx] + + XDMA_OFS_INT_CTRL); + u32 i = xdev->c2h_channel_max + xdev->h2c_channel_max; + u32 max = i + xdev->user_max; + int j; + + for (j = 0; i < max; j++) { + u32 val = 0; + int k; + int shift = 0; + + if (clear) + i += 4; + else + for (k = 0; k < 4 && i < max; i++, k++, shift += 8) + val |= (i & 0x1f) << shift; + + write_register(val, &int_regs->user_msi_vector[j], + XDMA_OFS_INT_CTRL + + ((unsigned long)&int_regs->user_msi_vector[j] - + (unsigned long)int_regs)); + + dbg_init("vector %d, 0x%x.\n", j, val); + } +} + +static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) +{ + struct interrupt_regs *int_regs = (struct interrupt_regs *) + (xdev->bar[xdev->config_bar_idx] + + XDMA_OFS_INT_CTRL); + u32 max = xdev->c2h_channel_max + xdev->h2c_channel_max; + u32 i; + int j; + + /* engine */ + for (i = 0, j = 0; i < max; j++) { + u32 val = 0; + int k; + int shift = 0; + + if (clear) + i += 4; + else + for (k = 0; k < 4 && i < max; i++, k++, shift += 8) + val |= (i & 0x1f) << shift; + + write_register(val, &int_regs->channel_msi_vector[j], + XDMA_OFS_INT_CTRL + + ((unsigned long)&int_regs->channel_msi_vector[j] - + (unsigned long)int_regs)); + dbg_init("vector %d, 0x%x.\n", j, val); + } +} + +static void irq_msix_channel_teardown(struct xdma_dev *xdev) +{ + struct xdma_engine *engine; + int j = 0; + int i = 0; + + if (!xdev->msix_enabled) + return; + + prog_irq_msix_channel(xdev, 1); + + engine = xdev->engine_h2c; + for (i = 0; i < xdev->h2c_channel_max; i++, j++, engine++) { + if (!engine->msix_irq_line) + break; + dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, + engine); + free_irq(engine->msix_irq_line, engine); + } + + engine = xdev->engine_c2h; + for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { + if (!engine->msix_irq_line) + break; + dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, + engine); + free_irq(engine->msix_irq_line, engine); + } +} + +static int irq_msix_channel_setup(struct xdma_dev *xdev) +{ + int i; + int j = xdev->h2c_channel_max; + int rv = 0; + u32 vector; + struct xdma_engine *engine; + + BUG_ON(!xdev); + if (!xdev->msix_enabled) + return 0; + + engine = xdev->engine_h2c; + for (i = 0; i < xdev->h2c_channel_max; i++, engine++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + vector = pci_irq_vector(xdev->pdev, i); +#else + vector = xdev->entry[i].vector; +#endif + rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, + engine); + if (rv) { + pr_info("requesti irq#%d failed %d, engine %s.\n", + vector, rv, engine->name); + return rv; + } + pr_info("engine %s, irq#%d.\n", engine->name, vector); + engine->msix_irq_line = vector; + } + + engine = xdev->engine_c2h; + for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + vector = pci_irq_vector(xdev->pdev, j); +#else + vector = xdev->entry[j].vector; +#endif + rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, + engine); + if (rv) { + pr_info("requesti irq#%d failed %d, engine %s.\n", + vector, rv, engine->name); + return rv; + } + pr_info("engine %s, irq#%d.\n", engine->name, vector); + engine->msix_irq_line = vector; + } + + return 0; +} + +static void irq_msix_user_teardown(struct xdma_dev *xdev) +{ + int i; + int j = xdev->h2c_channel_max + xdev->c2h_channel_max; + + BUG_ON(!xdev); + + if (!xdev->msix_enabled) + return; + + prog_irq_msix_user(xdev, 1); + + for (i = 0; i < xdev->user_max; i++, j++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + u32 vector = pci_irq_vector(xdev->pdev, j); +#else + u32 vector = xdev->entry[j].vector; +#endif + dbg_init("user %d, releasing IRQ#%d\n", i, vector); + free_irq(vector, &xdev->user_irq[i]); + } +} + +static int irq_msix_user_setup(struct xdma_dev *xdev) +{ + int i; + int j = xdev->h2c_channel_max + xdev->c2h_channel_max; + int rv = 0; + + /* vectors set in probe_scan_for_msi() */ + for (i = 0; i < xdev->user_max; i++, j++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + u32 vector = pci_irq_vector(xdev->pdev, j); +#else + u32 vector = xdev->entry[j].vector; +#endif + rv = request_irq(vector, xdma_user_irq, 0, xdev->mod_name, + &xdev->user_irq[i]); + if (rv) { + pr_info("user %d couldn't use IRQ#%d, %d\n", + i, vector, rv); + break; + } + pr_info("%d-USR-%d, IRQ#%d with 0x%p\n", xdev->idx, i, vector, + &xdev->user_irq[i]); + } + + /* If any errors occur, free IRQs that were successfully requested */ + if (rv) { + for (i--, j--; i >= 0; i--, j--) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) + u32 vector = pci_irq_vector(xdev->pdev, j); +#else + u32 vector = xdev->entry[j].vector; +#endif + free_irq(vector, &xdev->user_irq[i]); + } + } + + return rv; +} + +static int irq_msi_setup(struct xdma_dev *xdev, struct pci_dev *pdev) +{ + int rv; + + xdev->irq_line = (int)pdev->irq; + rv = request_irq(pdev->irq, xdma_isr, 0, xdev->mod_name, xdev); + if (rv) + dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); + else + dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); + + return rv; +} + +static int irq_legacy_setup(struct xdma_dev *xdev, struct pci_dev *pdev) +{ + u32 w; + u8 val; + void *reg; + int rv; + + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &val); + dbg_init("Legacy Interrupt register value = %d\n", val); + if (val > 1) { + val--; + w = (val<<24) | (val<<16) | (val<<8)| val; + /* Program IRQ Block Channel vactor and IRQ Block User vector + * with Legacy interrupt value */ + reg = xdev->bar[xdev->config_bar_idx] + 0x2080; // IRQ user + write_register(w, reg, 0x2080); + write_register(w, reg+0x4, 0x2084); + write_register(w, reg+0x8, 0x2088); + write_register(w, reg+0xC, 0x208C); + reg = xdev->bar[xdev->config_bar_idx] + 0x20A0; // IRQ Block + write_register(w, reg, 0x20A0); + write_register(w, reg+0x4, 0x20A4); + } + + xdev->irq_line = (int)pdev->irq; + rv = request_irq(pdev->irq, xdma_isr, IRQF_SHARED, xdev->mod_name, + xdev); + if (rv) + dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); + else + dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); + + return rv; +} + +static void irq_teardown(struct xdma_dev *xdev) +{ + if (xdev->msix_enabled) { + irq_msix_channel_teardown(xdev); + irq_msix_user_teardown(xdev); + } else if (xdev->irq_line != -1) { + dbg_init("Releasing IRQ#%d\n", xdev->irq_line); + free_irq(xdev->irq_line, xdev); + } +} + +static int irq_setup(struct xdma_dev *xdev, struct pci_dev *pdev) +{ + pci_keep_intx_enabled(pdev); + + if (xdev->msix_enabled) { + int rv = irq_msix_channel_setup(xdev); + if (rv) + return rv; + rv = irq_msix_user_setup(xdev); + if (rv) + return rv; + prog_irq_msix_channel(xdev, 0); + prog_irq_msix_user(xdev, 0); + + return 0; + } else if (xdev->msi_enabled) + return irq_msi_setup(xdev, pdev); + + return irq_legacy_setup(xdev, pdev); } +#ifdef __LIBXDMA_DEBUG__ static void dump_desc(struct xdma_desc *desc_virt) { int j; @@ -1081,7 +2101,7 @@ static void dump_desc(struct xdma_desc *desc_virt) le32_to_cpu(*p), field_name[j]); p++; } - dbg_desc("\n"); + pr_info("\n"); } static void transfer_dump(struct xdma_transfer *transfer) @@ -1089,15 +2109,17 @@ static void transfer_dump(struct xdma_transfer *transfer) int i; struct xdma_desc *desc_virt = transfer->desc_virt; - pr_info("transfer 0x%p, state 0x%x, f 0x%x, dir %d, len %u, last %d.\n", + pr_info("xfer 0x%p, state 0x%x, f 0x%x, dir %d, len %u, last %d.\n", transfer, transfer->state, transfer->flags, transfer->dir, - transfer->xfer_len, transfer->last_in_request); + transfer->len, transfer->last_in_request); + pr_info("transfer 0x%p, desc %d, bus 0x%llx, adj %d.\n", transfer, transfer->desc_num, (u64)transfer->desc_bus, transfer->desc_adjacent); for (i = 0; i < transfer->desc_num; i += 1) dump_desc(desc_virt + i); } +#endif /* __LIBXDMA_DEBUG__ */ /* xdma_desc_alloc() - Allocate cache-coherent array of N descriptors. * @@ -1113,28 +2135,19 @@ static void transfer_dump(struct xdma_transfer *transfer) * @return Virtual address of the first descriptor * */ -static struct xdma_desc *xdma_desc_alloc(struct pci_dev *dev, int number, - dma_addr_t *desc_bus_p) +static void transfer_desc_init(struct xdma_transfer *transfer, int count) { - struct xdma_desc *desc_virt; /* virtual address */ - dma_addr_t desc_bus; /* bus address */ + struct xdma_desc *desc_virt = transfer->desc_virt; + dma_addr_t desc_bus = transfer->desc_bus; int i; - int adj = number - 1; + int adj = count - 1; int extra_adj; u32 temp_control; - BUG_ON(number < 1); - - /* allocate a set of cache-coherent contiguous pages */ - desc_virt = (struct xdma_desc *)pci_alloc_consistent(dev, - number * sizeof(struct xdma_desc), desc_bus_p); - if (!desc_virt) - return NULL; - /* get bus address of the first descriptor */ - desc_bus = *desc_bus_p; + BUG_ON(count > XDMA_TRANSFER_MAX_DESC); /* create singly-linked list for SG DMA controller */ - for (i = 0; i < number - 1; i++) { + for (i = 0; i < count - 1; i++) { /* increment bus address to next in array */ desc_bus += sizeof(struct xdma_desc); @@ -1167,9 +2180,6 @@ static struct xdma_desc *xdma_desc_alloc(struct pci_dev *dev, int number, temp_control = DESC_MAGIC; desc_virt[i].control = cpu_to_le32(temp_control); - - /* return the virtual address of the first descriptor */ - return desc_virt; } /* xdma_desc_link() - Link two descriptors @@ -1240,7 +2250,7 @@ static void xdma_desc_adjacent(struct xdma_desc *desc, int next_adjacent) } /* xdma_desc_control -- Set complete control field of a descriptor. */ -static void xdma_desc_control(struct xdma_desc *first, u32 control_field) +static void xdma_desc_control_set(struct xdma_desc *first, u32 control_field) { /* remember magic and adjacent number */ u32 control = le32_to_cpu(first->control) & ~(LS_BYTE_MASK); @@ -1252,21 +2262,30 @@ static void xdma_desc_control(struct xdma_desc *first, u32 control_field) first->control = cpu_to_le32(control); } -/* xdma_desc_free - Free cache-coherent linked list of N descriptors. +/* xdma_desc_clear -- Clear bits in control field of a descriptor. */ +static void xdma_desc_control_clear(struct xdma_desc *first, u32 clear_mask) +{ + /* remember magic and adjacent number */ + u32 control = le32_to_cpu(first->control); + + BUG_ON(clear_mask & ~(LS_BYTE_MASK)); + + /* merge adjacent and control field */ + control &= (~clear_mask); + /* write control and next_adjacent */ + first->control = cpu_to_le32(control); +} + +/* xdma_desc_done - recycle cache-coherent linked list of descriptors. * * @dev Pointer to pci_dev * @number Number of descriptors to be allocated * @desc_virt Pointer to (i.e. virtual address of) first descriptor in list * @desc_bus Bus address of first descriptor in list */ -static void xdma_desc_free(struct pci_dev *dev, int number, - struct xdma_desc *desc_virt, dma_addr_t desc_bus) +static inline void xdma_desc_done(struct xdma_desc *desc_virt) { - BUG_ON(!desc_virt); - BUG_ON(number < 0); - /* free contiguous list */ - pci_free_consistent(dev, number * sizeof(struct xdma_desc), desc_virt, - desc_bus); + memset(desc_virt, 0, XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc)); } /* xdma_desc() - Fill a descriptor with the transfer details @@ -1283,9 +2302,6 @@ static void xdma_desc_free(struct pci_dev *dev, int number, static void xdma_desc_set(struct xdma_desc *desc, dma_addr_t rc_bus_addr, u64 ep_addr, int len, int dir) { - /* length (in bytes) must be a non-negative multiple of four */ -// BUG_ON(len & 3); - /* transfer length */ desc->bytes = cpu_to_le32(len); if (dir == DMA_TO_DEVICE) { @@ -1305,6 +2321,33 @@ static void xdma_desc_set(struct xdma_desc *desc, dma_addr_t rc_bus_addr, } } +/* + * should hold the engine->lock; + */ +static void transfer_abort(struct xdma_engine *engine, + struct xdma_transfer *transfer) +{ + struct xdma_transfer *head; + + BUG_ON(!engine); + BUG_ON(!transfer); + BUG_ON(transfer->desc_num == 0); + + pr_info("abort transfer 0x%p, desc %d, engine desc queued %d.\n", + transfer, transfer->desc_num, engine->desc_dequeued); + + head = list_entry(engine->transfer_list.next, struct xdma_transfer, + entry); + if (head == transfer) + list_del(engine->transfer_list.next); + else + pr_info("engine %s, transfer 0x%p NOT found, 0x%p.\n", + engine->name, transfer, head); + + if (transfer->state == TRANSFER_STATE_SUBMITTED) + transfer->state = TRANSFER_STATE_ABORTED; +} + /* transfer_queue() - Queue a DMA transfer on the engine * * @engine DMA engine doing the transfer @@ -1315,23 +2358,35 @@ static void xdma_desc_set(struct xdma_desc *desc, dma_addr_t rc_bus_addr, static int transfer_queue(struct xdma_engine *engine, struct xdma_transfer *transfer) { - int rc = 0; + int rv = 0; struct xdma_transfer *transfer_started; + struct xdma_dev *xdev; unsigned long flags; BUG_ON(!engine); + BUG_ON(!engine->xdev); BUG_ON(!transfer); BUG_ON(transfer->desc_num == 0); dbg_tfr("transfer_queue(transfer=0x%p).\n", transfer); - /* lock the engine state */ + xdev = engine->xdev; + if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { + pr_info("dev 0x%p offline, transfer 0x%p not queued.\n", + xdev, transfer); + return -EBUSY; + } + + /* lock the engine state */ spin_lock_irqsave(&engine->lock, flags); + engine->prev_cpu = get_cpu(); put_cpu(); /* engine is being shutdown; do not accept new transfers */ if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { - rc = -1; + pr_info("engine %s offline, transfer 0x%p not queued.\n", + engine->name, transfer); + rv = -EBUSY; goto shutdown; } @@ -1357,8 +2412,8 @@ static int transfer_queue(struct xdma_engine *engine, /* unlock the engine state */ dbg_tfr("engine->running = %d\n", engine->running); spin_unlock_irqrestore(&engine->lock, flags); - return rc; -}; + return rv; +} static void engine_alignments(struct xdma_engine *engine) { @@ -1392,76 +2447,269 @@ static void engine_alignments(struct xdma_engine *engine) } } -static void engine_destroy(struct xdma_dev *lro, struct xdma_engine *engine) +static void engine_free_resource(struct xdma_engine *engine) +{ + struct xdma_dev *xdev = engine->xdev; + + /* Release memory use for descriptor writebacks */ + if (engine->poll_mode_addr_virt) { + dbg_sg("Releasing memory for descriptor writeback\n"); + dma_free_coherent(&xdev->pdev->dev, + sizeof(struct xdma_poll_wb), + engine->poll_mode_addr_virt, + engine->poll_mode_bus); + dbg_sg("Released memory for descriptor writeback\n"); + engine->poll_mode_addr_virt = NULL; + } + + if (engine->desc) { + dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", + dev_name(&xdev->pdev->dev), engine->name, + engine->desc, engine->desc_bus); + dma_free_coherent(&xdev->pdev->dev, + XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), + engine->desc, engine->desc_bus); + engine->desc = NULL; + } + + if (engine->cyclic_result) { + dma_free_coherent(&xdev->pdev->dev, + CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), + engine->cyclic_result, engine->cyclic_result_bus); + engine->cyclic_result = NULL; + } +} + +static void engine_destroy(struct xdma_dev *xdev, struct xdma_engine *engine) { - BUG_ON(!lro); + BUG_ON(!xdev); BUG_ON(!engine); dbg_sg("Shutting down engine %s%d", engine->name, engine->channel); /* Disable interrupts to stop processing new events during shutdown */ - write_register(0x0, &engine->regs->interrupt_enable_mask); + write_register(0x0, &engine->regs->interrupt_enable_mask, + (unsigned long)(&engine->regs->interrupt_enable_mask) - + (unsigned long)(&engine->regs)); + + if (enable_credit_mp && engine->streaming && + engine->dir == DMA_FROM_DEVICE) { + u32 reg_value = (0x1 << engine->channel) << 16; + struct sgdma_common_regs *reg = (struct sgdma_common_regs *) + (xdev->bar[xdev->config_bar_idx] + + (0x6*TARGET_SPACING)); + write_register(reg_value, ®->credit_mode_enable_w1c, 0); + } - engine_msix_teardown(engine); + /* Release memory use for descriptor writebacks */ + engine_free_resource(engine); memset(engine, 0, sizeof(struct xdma_engine)); /* Decrement the number of engines available */ - lro->engines_num--; + xdev->engines_num--; } -static void engine_msix_teardown(struct xdma_engine *engine) +/** + *engine_cyclic_stop() - stop a cyclic transfer running on an SG DMA engine + * + *engine->lock must be taken + */ +struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine) { - BUG_ON(!engine); - if (engine->msix_irq_line) { - dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, - engine); - free_irq(engine->msix_irq_line, engine); + struct xdma_transfer *transfer = 0; + + /* transfers on queue? */ + if (!list_empty(&engine->transfer_list)) { + /* pick first transfer on the queue (was submitted to engine) */ + transfer = list_entry(engine->transfer_list.next, + struct xdma_transfer, entry); + BUG_ON(!transfer); + + xdma_engine_stop(engine); + + if (transfer->cyclic) { + if (engine->xdma_perf) + dbg_perf("Stopping perf transfer on %s\n", + engine->name); + else + dbg_perf("Stopping cyclic transfer on %s\n", + engine->name); + /* make sure the handler sees correct transfer state */ + transfer->cyclic = 1; + /* + * set STOP flag and interrupt on completion, on the + * last descriptor + */ + xdma_desc_control_set( + transfer->desc_virt + transfer->desc_num - 1, + XDMA_DESC_COMPLETED | XDMA_DESC_STOPPED); + } else { + dbg_sg("(engine=%p) running transfer is not cyclic\n", + engine); + } + } else { + dbg_sg("(engine=%p) found not running transfer.\n", engine); } + return transfer; } +EXPORT_SYMBOL_GPL(engine_cyclic_stop); -static int engine_msix_setup(struct xdma_engine *engine) +static int engine_writeback_setup(struct xdma_engine *engine) { - int rc = 0; - u32 vector; - struct xdma_dev *lro; + u32 w; + struct xdma_dev *xdev; + struct xdma_poll_wb *writeback; BUG_ON(!engine); - lro = engine->lro; - BUG_ON(!lro); - - vector = lro->entry[lro->engines_num + MAX_USER_IRQ].vector; + xdev = engine->xdev; + BUG_ON(!xdev); - dbg_init("Requesting IRQ#%d for engine %p\n", vector, engine); - rc = request_irq(vector, xdma_channel_irq, 0, DRV_MODULE_NAME, engine); - if (rc) { - dbg_init("Unable to request_irq for engine %d\n", - lro->engines_num); - } else { - dbg_init("Requested IRQ#%d for engine %d\n", vector, - lro->engines_num); - engine->msix_irq_line = vector; - } + /* + * RTO - doing the allocation per engine is wasteful since a full page + * is allocated each time - better to allocate one page for the whole + * device during probe() and set per-engine offsets here + */ + writeback = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + writeback->completed_desc_count = 0; + + dbg_init("Setting writeback location to 0x%llx for engine %p", + engine->poll_mode_bus, engine); + w = cpu_to_le32(PCI_DMA_L(engine->poll_mode_bus)); + write_register(w, &engine->regs->poll_mode_wb_lo, + (unsigned long)(&engine->regs->poll_mode_wb_lo) - + (unsigned long)(&engine->regs)); + w = cpu_to_le32(PCI_DMA_H(engine->poll_mode_bus)); + write_register(w, &engine->regs->poll_mode_wb_hi, + (unsigned long)(&engine->regs->poll_mode_wb_hi) - + (unsigned long)(&engine->regs)); - return rc; + return 0; } + /* engine_create() - Create an SG DMA engine bookkeeping data structure * * An SG DMA engine consists of the resources for a single-direction transfer * queue; the SG DMA hardware, the software queue and interrupt handling. * * @dev Pointer to pci_dev - * @offset byte address offset in BAR[lro->config_bar_idx] resource for the + * @offset byte address offset in BAR[xdev->config_bar_idx] resource for the * SG DMA * controller registers. * @dir: DMA_TO/FROM_DEVICE * @streaming Whether the engine is attached to AXI ST (rather than MM) */ -static int engine_init(struct xdma_engine *engine, struct xdma_dev *lro, - int offset, enum dma_data_direction dir, int channel) +static int engine_init_regs(struct xdma_engine *engine) { u32 reg_value; - int sgdma_offset = offset + SGDMA_OFFSET_FROM_CHANNEL; - int rc; + int rv = 0; + + write_register(XDMA_CTRL_NON_INCR_ADDR, &engine->regs->control_w1c, + (unsigned long)(&engine->regs->control_w1c) - + (unsigned long)(&engine->regs)); + + engine_alignments(engine); + + /* Configure error interrupts by default */ + reg_value = XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; + reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; + reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; + reg_value |= XDMA_CTRL_IE_READ_ERROR; + reg_value |= XDMA_CTRL_IE_DESC_ERROR; + + /* if using polled mode, configure writeback address */ + if (poll_mode) { + rv = engine_writeback_setup(engine); + if (rv) { + dbg_init("%s descr writeback setup failed.\n", + engine->name); + goto fail_wb; + } + } else { + /* enable the relevant completion interrupts */ + reg_value |= XDMA_CTRL_IE_DESC_STOPPED; + reg_value |= XDMA_CTRL_IE_DESC_COMPLETED; + + if (engine->streaming && engine->dir == DMA_FROM_DEVICE) + reg_value |= XDMA_CTRL_IE_IDLE_STOPPED; + } + + /* Apply engine configurations */ + write_register(reg_value, &engine->regs->interrupt_enable_mask, + (unsigned long)(&engine->regs->interrupt_enable_mask) - + (unsigned long)(&engine->regs)); + + engine->interrupt_enable_mask_value = reg_value; + + /* only enable credit mode for AXI-ST C2H */ + if (enable_credit_mp && engine->streaming && + engine->dir == DMA_FROM_DEVICE) { + + struct xdma_dev *xdev = engine->xdev; + u32 reg_value = (0x1 << engine->channel) << 16; + struct sgdma_common_regs *reg = (struct sgdma_common_regs *) + (xdev->bar[xdev->config_bar_idx] + + (0x6*TARGET_SPACING)); + + write_register(reg_value, ®->credit_mode_enable_w1s, 0); + } + + return 0; + +fail_wb: + return rv; +} + +static int engine_alloc_resource(struct xdma_engine *engine) +{ + struct xdma_dev *xdev = engine->xdev; + + engine->desc = dma_alloc_coherent(&xdev->pdev->dev, + XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), + &engine->desc_bus, GFP_KERNEL); + if (!engine->desc) { + pr_warn("dev %s, %s pre-alloc desc OOM.\n", + dev_name(&xdev->pdev->dev), engine->name); + goto err_out; + } + + if (poll_mode) { + engine->poll_mode_addr_virt = dma_alloc_coherent( + &xdev->pdev->dev, + sizeof(struct xdma_poll_wb), + &engine->poll_mode_bus, GFP_KERNEL); + if (!engine->poll_mode_addr_virt) { + pr_warn("%s, %s poll pre-alloc writeback OOM.\n", + dev_name(&xdev->pdev->dev), engine->name); + goto err_out; + } + } + + if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { + engine->cyclic_result = dma_alloc_coherent(&xdev->pdev->dev, + CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), + &engine->cyclic_result_bus, GFP_KERNEL); + + if (!engine->cyclic_result) { + pr_warn("%s, %s pre-alloc result OOM.\n", + dev_name(&xdev->pdev->dev), engine->name); + goto err_out; + } + } + + return 0; + +err_out: + engine_free_resource(engine); + return -ENOMEM; +} + +static int engine_init(struct xdma_engine *engine, struct xdma_dev *xdev, + int offset, enum dma_data_direction dir, int channel) +{ + int rv; + u32 val; + + dbg_init("channel %d, offset 0x%x, dir %d.\n", channel, offset, dir); /* set magic */ engine->magic = MAGIC_ENGINE; @@ -1470,22 +2718,24 @@ static int engine_init(struct xdma_engine *engine, struct xdma_dev *lro, /* engine interrupt request bit */ engine->irq_bitmask = (1 << XDMA_ENG_IRQ_NUM) - 1; - engine->irq_bitmask <<= (lro->engines_num * XDMA_ENG_IRQ_NUM); - engine->bypass_offset = lro->engines_num * BYPASS_MODE_SPACING; + engine->irq_bitmask <<= (xdev->engines_num * XDMA_ENG_IRQ_NUM); + engine->bypass_offset = xdev->engines_num * BYPASS_MODE_SPACING; - /* initialize spinlock */ - spin_lock_init(&engine->lock); - /* initialize transfer_list */ - INIT_LIST_HEAD(&engine->transfer_list); /* parent */ - engine->lro = lro; + engine->xdev = xdev; /* register address */ - engine->regs = (lro->bar[lro->config_bar_idx] + offset); - engine->sgdma_regs = (lro->bar[lro->config_bar_idx] + sgdma_offset); + engine->regs = (xdev->bar[xdev->config_bar_idx] + offset); + engine->sgdma_regs = xdev->bar[xdev->config_bar_idx] + offset + + SGDMA_OFFSET_FROM_CHANNEL; + val = read_register(&engine->regs->identifier); + if (val & 0x8000U) + engine->streaming = 1; + /* remember SG DMA direction */ engine->dir = dir; - sprintf(engine->name, "%s%d", - (dir == DMA_TO_DEVICE) ? "H2C" : "C2H", channel); + sprintf(engine->name, "%d-%s%d-%s", xdev->idx, + (dir == DMA_TO_DEVICE) ? "H2C" : "C2H", channel, + engine->streaming ? "ST" : "MM"); dbg_init("engine %p name %s irq_bitmask=0x%08x\n", engine, engine->name, (int)engine->irq_bitmask); @@ -1493,428 +2743,582 @@ static int engine_init(struct xdma_engine *engine, struct xdma_dev *lro, /* initialize the deferred work for transfer completion */ INIT_WORK(&engine->work, engine_service_work); - /* Configure per-engine MSI-X vector if MSI-X is enabled */ - if (lro->msix_enabled) { - rc = engine_msix_setup(engine); - if (rc) { - dbg_init("MSI-X config for engine %p failed\n", engine); - return rc; - } - } - - lro->engines_num++; - - /* initialize wait queue */ - init_waitqueue_head(&engine->shutdown_wq); - - write_register(XDMA_CTRL_NON_INCR_ADDR, &engine->regs->control_w1c); - - engine_alignments(engine); - - /* Configure error interrupts by default */ - reg_value = XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_READ_ERROR; - reg_value |= XDMA_CTRL_IE_DESC_ERROR; - - /* enable the relevant completion interrupts */ - reg_value |= XDMA_CTRL_IE_DESC_STOPPED; - reg_value |= XDMA_CTRL_IE_DESC_COMPLETED; + if (dir == DMA_TO_DEVICE) + xdev->mask_irq_h2c |= engine->irq_bitmask; + else + xdev->mask_irq_c2h |= engine->irq_bitmask; + xdev->engines_num++; - /* Apply engine configurations */ - write_register(reg_value, &engine->regs->interrupt_enable_mask); + rv = engine_alloc_resource(engine); + if (rv) + return rv; - engine->interrupt_enable_mask_value = reg_value; + rv = engine_init_regs(engine); + if (rv) + return rv; - /* all engine setup completed successfully */ return 0; } /* transfer_destroy() - free transfer */ -static void transfer_destroy(struct xdma_dev *lro, - struct xdma_transfer *transfer, bool force) +static void transfer_destroy(struct xdma_dev *xdev, struct xdma_transfer *xfer) { /* free descriptors */ - xdma_desc_free(lro->pci_dev, transfer->desc_num, transfer->desc_virt, - transfer->desc_bus); + xdma_desc_done(xfer->desc_virt); + if (xfer->last_in_request && (xfer->flags & XFER_FLAG_NEED_UNMAP)) { + struct sg_table *sgt = xfer->sgt; - if ((force || transfer->last_in_request) && - (transfer->flags & XFER_FLAG_NEED_UNMAP)) { - struct sg_table *sgt = transfer->sgt; if (sgt->nents) { - pci_unmap_sg(lro->pci_dev, sgt->sgl, sgt->nents, - transfer->dir); + pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->nents, + xfer->dir); sgt->nents = 0; } } - - /* free transfer */ - kfree(transfer); } static int transfer_build(struct xdma_engine *engine, - struct xdma_transfer *transfer, u64 ep_addr, - struct scatterlist **sgl_p, unsigned int nents) + struct xdma_request_cb *req, unsigned int desc_max) { - struct scatterlist *sg = *sgl_p; + struct xdma_transfer *xfer = &req->xfer; + struct sw_desc *sdesc = &(req->sdesc[req->sw_desc_idx]); int i = 0; int j = 0; - dma_addr_t cont_addr = sg_dma_address(sg); - unsigned int cont_len = sg_dma_len(sg); - unsigned int next_len = 0; - - dbg_desc("sg %d/%u: addr=0x%llx, len=0x%x\n", - i, nents, cont_addr, cont_len); - for (i = 1, sg = sg_next(sg); i < nents; i++, sg = sg_next(sg)) { - dma_addr_t next_addr = sg_dma_address(sg); - next_len = sg_dma_len(sg); - - dbg_desc("sg %d/%u: addr=0x%llx, len=0x%x, cont 0x%llx,0x%x.\n", - i, nents, next_addr, next_len, cont_addr, cont_len); - /* contiguous ? */ - if (next_addr == (cont_addr + cont_len)) { - cont_len += next_len; - continue; - } - dbg_desc("DESC %d: addr=0x%llx, 0x%x, ep_addr=0x%llx\n", - j, (u64)cont_addr, cont_len, (u64)ep_addr); + for (; i < desc_max; i++, j++, sdesc++) { + dbg_desc("sw desc %d/%u: 0x%llx, 0x%x, ep 0x%llx.\n", + i + req->sw_desc_idx, req->sw_desc_cnt, + sdesc->addr, sdesc->len, req->ep_addr); + /* fill in descriptor entry j with transfer details */ - xdma_desc_set(transfer->desc_virt + j, cont_addr, ep_addr, - cont_len, transfer->dir); - transfer->xfer_len += cont_len; + xdma_desc_set(xfer->desc_virt + j, sdesc->addr, req->ep_addr, + sdesc->len, xfer->dir); + xfer->len += sdesc->len; /* for non-inc-add mode don't increment ep_addr */ if (!engine->non_incr_addr) - ep_addr += cont_len; - - /* start new contiguous block */ - cont_addr = next_addr; - cont_len = next_len; - j++; - } - BUG_ON(j > nents); - - if (cont_len) { - dbg_desc("DESC %d: addr=0x%llx, 0x%x, ep_addr=0x%llx\n", - j, (u64)cont_addr, cont_len, (u64)ep_addr); - xdma_desc_set(transfer->desc_virt + j, cont_addr, ep_addr, - cont_len, transfer->dir); - transfer->xfer_len += cont_len; + req->ep_addr += sdesc->len; } - - *sgl_p = sg; - return j; + req->sw_desc_idx += desc_max; + return 0; } -static struct xdma_transfer *transfer_create(struct xdma_engine *engine, - u64 ep_addr, struct scatterlist **sgl_p, int nents) +static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req) { - struct xdma_dev *lro = engine->lro; - struct xdma_transfer *transfer; + struct xdma_transfer *xfer = &req->xfer; + unsigned int desc_max = min_t(unsigned int, + req->sw_desc_cnt - req->sw_desc_idx, + XDMA_TRANSFER_MAX_DESC); int i = 0; int last = 0; u32 control; - transfer = kzalloc(sizeof(struct xdma_transfer), GFP_KERNEL); - if (!transfer) { - dbg_tfr("OOM.\n"); - return NULL; - } + memset(xfer, 0, sizeof(*xfer)); + + /* initialize wait queue */ + init_waitqueue_head(&xfer->wq); /* remember direction of transfer */ - transfer->dir = engine->dir; + xfer->dir = engine->dir; - /* allocate descriptor list */ - transfer->desc_virt = xdma_desc_alloc(lro->pci_dev, nents, - &transfer->desc_bus); - dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)transfer->desc_bus); + xfer->desc_virt = engine->desc; + xfer->desc_bus = engine->desc_bus; - last = transfer_build(engine, transfer, ep_addr, sgl_p, nents); + transfer_desc_init(xfer, desc_max); + + dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)xfer->desc_bus); + + transfer_build(engine, req, desc_max); /* terminate last descriptor */ - xdma_desc_link(transfer->desc_virt + last, 0, 0); + last = desc_max - 1; + xdma_desc_link(xfer->desc_virt + last, 0, 0); /* stop engine, EOP for AXI ST, req IRQ on last descriptor */ control = XDMA_DESC_STOPPED; control |= XDMA_DESC_EOP; control |= XDMA_DESC_COMPLETED; - xdma_desc_control(transfer->desc_virt + last, control); + xdma_desc_control_set(xfer->desc_virt + last, control); - last++; - /* last is the number of descriptors */ - transfer->desc_num = transfer->desc_adjacent = last; + xfer->desc_num = xfer->desc_adjacent = desc_max; - dbg_sg("transfer 0x%p has %d descriptors\n", transfer, - transfer->desc_num); + dbg_sg("transfer 0x%p has %d descriptors\n", xfer, xfer->desc_num); /* fill in adjacent numbers */ - for (i = 0; i < transfer->desc_num; i++) { - xdma_desc_adjacent(transfer->desc_virt + i, - transfer->desc_num - i - 1); - } + for (i = 0; i < xfer->desc_num; i++) + xdma_desc_adjacent(xfer->desc_virt + i, xfer->desc_num - i - 1); - /* initialize wait queue */ - init_waitqueue_head(&transfer->wq); + return 0; +} - return transfer; +#ifdef __LIBXDMA_DEBUG__ +static void sgt_dump(struct sg_table *sgt) +{ + int i; + struct scatterlist *sg = sgt->sgl; + + pr_info("sgt 0x%p, sgl 0x%p, nents %u/%u.\n", + sgt, sgt->sgl, sgt->nents, sgt->orig_nents); + + for (i = 0; i < sgt->orig_nents; i++, sg = sg_next(sg)) + pr_info("%d, 0x%p, pg 0x%p,%u+%u, dma 0x%llx,%u.\n", + i, sg, sg_page(sg), sg->offset, sg->length, + sg_dma_address(sg), sg_dma_len(sg)); } -static void transfer_abort(struct xdma_engine *engine, - struct xdma_transfer *transfer) +static void xdma_request_cb_dump(struct xdma_request_cb *req) { - struct xdma_transfer *head; + int i; - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); + pr_info("request 0x%p, total %u, ep 0x%llx, sw_desc %u, sgt 0x%p.\n", + req, req->total_len, req->ep_addr, req->sw_desc_cnt, req->sgt); + sgt_dump(req->sgt); + for (i = 0; i < req->sw_desc_cnt; i++) + pr_info("%d/%u, 0x%llx, %u.\n", + i, req->sw_desc_cnt, req->sdesc[i].addr, + req->sdesc[i].len); +} +#endif - head = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - if (head == transfer) - list_del(engine->transfer_list.next); +static void xdma_request_free(struct xdma_request_cb *req) +{ + if (((unsigned long)req) >= VMALLOC_START && + ((unsigned long)req) < VMALLOC_END) + vfree(req); else - pr_info("engine %s, transfer 0x%p NOT found, 0x%p.\n", - engine->name, transfer, head); + kfree(req); +} + +static struct xdma_request_cb * xdma_request_alloc(unsigned int sdesc_nr) +{ + struct xdma_request_cb *req; + unsigned int size = sizeof(struct xdma_request_cb) + + sdesc_nr * sizeof(struct sw_desc); + + req = kzalloc(size, GFP_KERNEL); + if (!req) { + req = vmalloc(size); + if (req) + memset(req, 0, size); + } + if (!req) { + pr_info("OOM, %u sw_desc, %u.\n", sdesc_nr, size); + return NULL; + } + + return req; +} + +static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, + u64 ep_addr) +{ + struct xdma_request_cb *req; + struct scatterlist *sg = sgt->sgl; + int max = sgt->nents; + int extra = 0; + int i, j = 0; + + for (i = 0; i < max; i++, sg = sg_next(sg)) { + unsigned int len = sg_dma_len(sg); + + if (unlikely(len > desc_blen_max)) + extra += (len + desc_blen_max - 1) / desc_blen_max; + } + +//pr_info("ep 0x%llx, desc %u+%u.\n", ep_addr, max, extra); + + max += extra; + req = xdma_request_alloc(max); + if (!req) + return NULL; + + req->sgt = sgt; + req->ep_addr = ep_addr; + + for (i = 0, sg = sgt->sgl; i < sgt->nents; i++, sg = sg_next(sg)) { + unsigned int tlen = sg_dma_len(sg); + dma_addr_t addr = sg_dma_address(sg); + + req->total_len += tlen; + while (tlen) { + req->sdesc[j].addr = addr; + if (tlen > desc_blen_max) { + req->sdesc[j].len = desc_blen_max; + addr += desc_blen_max; + tlen -= desc_blen_max; + } else { + req->sdesc[j].len = tlen; + tlen = 0; + } + j++; + } + } + BUG_ON(j > max); + + req->sw_desc_cnt = j; +#ifdef __LIBXDMA_DEBUG__ + xdma_request_cb_dump(req); +#endif + return req; } -int xdma_xfer_submit(void *channel, enum dma_data_direction dir, u64 ep_addr, - struct sg_table *sgt, int dma_mapped, int timeout_ms) +ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, + struct sg_table *sgt, bool dma_mapped, int timeout_ms) { + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; + struct xdma_engine *engine; int rv = 0; ssize_t done = 0; - struct xdma_engine *engine = (struct xdma_engine *)channel; struct scatterlist *sg = sgt->sgl; - struct xdma_dev *lro; int nents; + enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE; + struct xdma_request_cb *req = NULL; + + if (!dev_hndl) + return -EINVAL; + + if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) + return -EINVAL; + + if (write == 1) { + if (channel >= xdev->h2c_channel_max) { + pr_warn("H2C channel %d >= %d.\n", + channel, xdev->h2c_channel_max); + return -EINVAL; + } + engine = &xdev->engine_h2c[channel]; + } else if (write == 0) { + if (channel >= xdev->c2h_channel_max) { + pr_warn("C2H channel %d >= %d.\n", + channel, xdev->c2h_channel_max); + return -EINVAL; + } + engine = &xdev->engine_c2h[channel]; + } else { + pr_warn("write %d, exp. 0|1.\n", write); + return -EINVAL; + } BUG_ON(!engine); BUG_ON(engine->magic != MAGIC_ENGINE); - lro = engine->lro; + xdev = engine->xdev; + if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { + pr_info("xdev 0x%p, offline.\n", xdev); + return -EBUSY; + } + /* check the direction */ if (engine->dir != dir) { - dbg_tfr("channel 0x%p, %s, dir 0x%x/0x%x mismatch.\n", - channel, engine->name, engine->dir, dir); + pr_info("0x%p, %s, %d, W %d, 0x%x/0x%x mismatch.\n", + engine, engine->name, channel, write, engine->dir, dir); return -EINVAL; } if (!dma_mapped) { - nents = pci_map_sg(lro->pci_dev, sg, sgt->orig_nents, dir); + nents = pci_map_sg(xdev->pdev, sg, sgt->orig_nents, dir); if (!nents) { - dbg_tfr("map sgl failed, sgt 0x%p.\n", sgt); + pr_info("map sgl failed, sgt 0x%p.\n", sgt); return -EIO; } sgt->nents = nents; } else { BUG_ON(!sgt->nents); - nents = sgt->nents; } - + + req = xdma_init_request(sgt, ep_addr); + if (!req) { + rv = -ENOMEM; + goto unmap_sgl; + } + + dbg_tfr("%s, len %u sg cnt %u.\n", + engine->name, req->total_len, req->sw_desc_cnt); + + sg = sgt->sgl; + nents = req->sw_desc_cnt; while (nents) { unsigned long flags; - unsigned int xfer_nents = min_t(unsigned int, - nents, XDMA_TRANSFER_MAX_DESC); - struct xdma_transfer *transfer; + struct xdma_transfer *xfer; + + /* one transfer at a time */ + spin_lock(&engine->desc_lock); /* build transfer */ - transfer = transfer_create(engine, ep_addr, &sg, xfer_nents); - if (!transfer) { - dbg_tfr("OOM.\n"); - if (!dma_mapped) { - pci_unmap_sg(lro->pci_dev, sgt->sgl, - sgt->orig_nents, dir); - sgt->nents = 0; - } - return -ENOMEM; + rv = transfer_init(engine, req); + if (rv < 0) { + spin_unlock(&engine->desc_lock); + goto unmap_sgl; } + xfer = &req->xfer; if (!dma_mapped) - transfer->flags = XFER_FLAG_NEED_UNMAP; + xfer->flags = XFER_FLAG_NEED_UNMAP; /* last transfer for the given request? */ - nents -= xfer_nents; + nents -= xfer->desc_num; if (!nents) { - transfer->last_in_request = 1; - transfer->sgt = sgt; + xfer->last_in_request = 1; + xfer->sgt = sgt; } - //transfer_dump(transfer); + dbg_tfr("xfer, %u, ep 0x%llx, done %lu, sg %u/%u.\n", + xfer->len, req->ep_addr, done, req->sw_desc_idx, + req->sw_desc_cnt); + +#ifdef __LIBXDMA_DEBUG__ + transfer_dump(xfer); +#endif - rv = transfer_queue(engine, transfer); + rv = transfer_queue(engine, xfer); if (rv < 0) { - dbg_tfr("unable to submit %s.\n", engine->name); - transfer_destroy(lro, transfer, 1); - return -ERESTARTSYS; + spin_unlock(&engine->desc_lock); + pr_info("unable to submit %s, %d.\n", engine->name, rv); + goto unmap_sgl; } - rv = wait_event_interruptible_timeout(transfer->wq, - (transfer->state != TRANSFER_STATE_SUBMITTED), - msecs_to_jiffies(timeout_ms)); + /* + * When polling, determine how many descriptors have been queued * on the engine to determine the writeback value expected + */ + if (poll_mode) { + unsigned int desc_count; + + spin_lock_irqsave(&engine->lock, flags); + desc_count = xfer->desc_num; + spin_unlock_irqrestore(&engine->lock, flags); + + dbg_tfr("%s poll desc_count=%d\n", + engine->name, desc_count); + rv = engine_service_poll(engine, desc_count); + + } else { + rv = wait_event_interruptible_timeout(xfer->wq, + (xfer->state != TRANSFER_STATE_SUBMITTED), + msecs_to_jiffies(timeout_ms)); + } spin_lock_irqsave(&engine->lock, flags); - switch(transfer->state) { + + switch(xfer->state) { case TRANSFER_STATE_COMPLETED: spin_unlock_irqrestore(&engine->lock, flags); - dbg_tfr("transfer %p, %u completed.\n", transfer, - transfer->xfer_len); - done += transfer->xfer_len; - ep_addr += transfer->xfer_len; - transfer_destroy(lro, transfer, 0); + + dbg_tfr("transfer %p, %u, ep 0x%llx compl, +%lu.\n", + xfer, xfer->len, req->ep_addr - xfer->len, done); + done += xfer->len; + rv = 0; break; case TRANSFER_STATE_FAILED: + pr_info("xfer 0x%p,%u, failed, ep 0x%llx.\n", + xfer, xfer->len, req->ep_addr - xfer->len); spin_unlock_irqrestore(&engine->lock, flags); - pr_info("transfer %p, %u failed.\n", transfer, - transfer->xfer_len); - transfer_destroy(lro, transfer, 1); - return -EIO; + +#ifdef __LIBXDMA_DEBUG__ + transfer_dump(xfer); + sgt_dump(sgt); +#endif + rv = -EIO; + break; default: /* transfer can still be in-flight */ - pr_info("xfer 0x%p,%u, state 0x%x, timed out.\n", - transfer, transfer->xfer_len, transfer->state); - transfer_abort(engine, transfer); - spin_unlock_irqrestore(&engine->lock, flags); + pr_info("xfer 0x%p,%u, s 0x%x timed out, ep 0x%llx.\n", + xfer, xfer->len, xfer->state, req->ep_addr); + engine_status_read(engine, 0, 1); + //engine_status_dump(engine); + transfer_abort(engine, xfer); xdma_engine_stop(engine); - transfer_dump(transfer); - transfer_destroy(lro, transfer, 1); - return -ERESTARTSYS; + spin_unlock_irqrestore(&engine->lock, flags); + +#ifdef __LIBXDMA_DEBUG__ + transfer_dump(xfer); + sgt_dump(sgt); +#endif + rv = -ERESTARTSYS; + break; } + + transfer_destroy(xdev, xfer); + spin_unlock(&engine->desc_lock); + + if (rv < 0) + goto unmap_sgl; } /* while (sg) */ +unmap_sgl: + if (!dma_mapped && sgt->nents) { + pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->orig_nents, dir); + sgt->nents = 0; + } + + if (req) + xdma_request_free(req); + + if (rv < 0) + return rv; + return done; } EXPORT_SYMBOL_GPL(xdma_xfer_submit); -/* - * RTO - code to detect if MSI/MSI-X capability exists is derived - * from linux/pci/msi.c - pci_msi_check_device - */ - -#ifndef arch_msi_check_device -int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +#ifdef INTERNAL_TESTING +int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) { - return 0; -} -#endif + u8 *buffer_virt; + u32 max_consistent_size = 128 * 32 * 1024; /* 1024 pages, 4MB */ + dma_addr_t buffer_bus; /* bus address */ + struct xdma_transfer *transfer; + u64 ep_addr = 0; + int num_desc_in_a_loop = 128; + int size_in_desc = engine->xdma_perf->transfer_size; + int size = size_in_desc * num_desc_in_a_loop; + int i; -/* type = PCI_CAP_ID_MSI or PCI_CAP_ID_MSIX */ -static int msi_msix_capable(struct pci_dev *dev, int type) -{ - struct pci_bus *bus; - int ret; + BUG_ON(size_in_desc > max_consistent_size); - if (!dev || dev->no_msi) - return 0; + if (size > max_consistent_size) { + size = max_consistent_size; + num_desc_in_a_loop = size / size_in_desc; + } - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return 0; + buffer_virt = dma_alloc_coherent(&xdev->pdev->dev, size, + &buffer_bus, GFP_KERNEL); - ret = arch_msi_check_device(dev, 1, type); - if (ret) - return 0; + /* allocate transfer data structure */ + transfer = kzalloc(sizeof(struct xdma_transfer), GFP_KERNEL); + BUG_ON(!transfer); - if (!pci_find_capability(dev, type)) - return 0; + /* 0 = write engine (to_dev=0) , 1 = read engine (to_dev=1) */ + transfer->dir = engine->dir; + /* set number of descriptors */ + transfer->desc_num = num_desc_in_a_loop; + + /* allocate descriptor list */ + if (!engine->desc) { + engine->desc = dma_alloc_coherent(&xdev->pdev->dev, + num_desc_in_a_loop * sizeof(struct xdma_desc), + &engine->desc_bus, GFP_KERNEL); + BUG_ON(!engine->desc); + dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", + dev_name(&xdev->pdev->dev), engine->name, + engine->desc, engine->desc_bus); + } + transfer->desc_virt = engine->desc; + transfer->desc_bus = engine->desc_bus; + + transfer_desc_init(transfer, transfer->desc_num); + + dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)transfer->desc_bus); + + for (i = 0; i < transfer->desc_num; i++) { + struct xdma_desc *desc = transfer->desc_virt + i; + dma_addr_t rc_bus_addr = buffer_bus + size_in_desc * i; + + /* fill in descriptor entry with transfer details */ + xdma_desc_set(desc, rc_bus_addr, ep_addr, size_in_desc, + engine->dir); + } + + /* stop engine and request interrupt on last descriptor */ + xdma_desc_control_set(transfer->desc_virt, 0); + /* create a linked loop */ + xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, + transfer->desc_virt, transfer->desc_bus); + + transfer->cyclic = 1; + + /* initialize wait queue */ + init_waitqueue_head(&transfer->wq); + + //printk("=== Descriptor print for PERF \n"); + //transfer_dump(transfer); + + dbg_perf("Queueing XDMA I/O %s request for performance measurement.\n", + engine->dir ? "write (to dev)" : "read (from dev)"); + transfer_queue(engine, transfer); + return 0; - return 1; } +EXPORT_SYMBOL_GPL(xdma_performance_submit); +#endif static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) { int i; - struct xdma_dev *lro; + struct xdma_dev *xdev; + struct xdma_engine *engine; BUG_ON(!pdev); /* allocate zeroed device book keeping structure */ - lro = kzalloc(sizeof(struct xdma_dev), GFP_KERNEL); - if (!lro) { - dbg_init("Could not kzalloc(xdma_dev).\n"); + xdev = kzalloc(sizeof(struct xdma_dev), GFP_KERNEL); + if (!xdev) { + pr_info("OOM, xdma_dev.\n"); return NULL; } + spin_lock_init(&xdev->lock); - lro->magic = MAGIC_DEVICE; - lro->config_bar_idx = -1; - lro->user_bar_idx = -1; - lro->bypass_bar_idx = -1; - lro->irq_line = -1; + xdev->magic = MAGIC_DEVICE; + xdev->config_bar_idx = -1; + xdev->user_bar_idx = -1; + xdev->bypass_bar_idx = -1; + xdev->irq_line = -1; /* create a driver to device reference */ - lro->pci_dev = pdev; - dbg_init("probe() lro = 0x%p\n", lro); + xdev->pdev = pdev; + dbg_init("xdev = 0x%p\n", xdev); /* Set up data user IRQ data structures */ - for (i = 0; i < MAX_USER_IRQ; i++) { - lro->user_irq[i].lro = lro; - spin_lock_init(&lro->user_irq[i].events_lock); - init_waitqueue_head(&lro->user_irq[i].events_wq); - lro->user_irq[i].handler = NULL; - lro->user_irq[i].user_idx = i; /* 0 ~ 15 */ + for (i = 0; i < 16; i++) { + xdev->user_irq[i].xdev = xdev; + spin_lock_init(&xdev->user_irq[i].events_lock); + init_waitqueue_head(&xdev->user_irq[i].events_wq); + xdev->user_irq[i].handler = NULL; + xdev->user_irq[i].user_idx = i; /* 0 based */ } - return lro; -} - -static int probe_scan_for_msi(struct xdma_dev *lro, struct pci_dev *pdev) -{ - int i; - int rc = 0; - int req_nvec = MAX_NUM_ENGINES + MAX_USER_IRQ; - - BUG_ON(!lro); - BUG_ON(!pdev); - - if (msi_msix_capable(pdev, PCI_CAP_ID_MSIX)) { - dbg_init("Enabling MSI-X\n"); - for (i = 0; i < req_nvec; i++) - lro->entry[i].entry = i; - - rc = pci_enable_msix(pdev, lro->entry, req_nvec); - if (rc < 0) - dbg_init("Couldn't enable MSI-X mode: rc = %d\n", rc); + engine = xdev->engine_h2c; + for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { + spin_lock_init(&engine->lock); + spin_lock_init(&engine->desc_lock); + INIT_LIST_HEAD(&engine->transfer_list); + init_waitqueue_head(&engine->shutdown_wq); + init_waitqueue_head(&engine->xdma_perf_wq); + } - lro->msix_enabled = 1; - } else if (msi_msix_capable(pdev, PCI_CAP_ID_MSI)) { - /* enable message signalled interrupts */ - dbg_init("pci_enable_msi()\n"); - rc = pci_enable_msi(pdev); - if (rc < 0) - dbg_init("Couldn't enable MSI mode: rc = %d\n", rc); - lro->msi_enabled = 1; - } else { - dbg_init("MSI/MSI-X not detected - using legacy interrupts\n"); + engine = xdev->engine_c2h; + for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { + spin_lock_init(&engine->lock); + spin_lock_init(&engine->desc_lock); + INIT_LIST_HEAD(&engine->transfer_list); + init_waitqueue_head(&engine->shutdown_wq); + init_waitqueue_head(&engine->xdma_perf_wq); } - return rc; + return xdev; } -static int request_regions(struct xdma_dev *lro, struct pci_dev *pdev) +static int request_regions(struct xdma_dev *xdev, struct pci_dev *pdev) { - int rc; + int rv; - BUG_ON(!lro); + BUG_ON(!xdev); BUG_ON(!pdev); dbg_init("pci_request_regions()\n"); - rc = pci_request_regions(pdev, DRV_MODULE_NAME); + rv = pci_request_regions(pdev, xdev->mod_name); /* could not request all regions? */ - if (rc) { - dbg_init("pci_request_regions() = %d, device in use?\n", rc); + if (rv) { + dbg_init("pci_request_regions() = %d, device in use?\n", rv); /* assume device is in use so do not disable it later */ - lro->regions_in_use = 1; + xdev->regions_in_use = 1; } else { - lro->got_regions = 1; + xdev->got_regions = 1; } - return rc; + return rv; } static int set_dma_mask(struct pci_dev *pdev) { - int rc = 0; - BUG_ON(!pdev); dbg_init("sizeof(dma_addr_t) == %ld\n", sizeof(dma_addr_t)); @@ -1935,142 +3339,10 @@ static int set_dma_mask(struct pci_dev *pdev) dbg_init("Using a 32-bit DMA mask.\n"); } else { dbg_init("No suitable DMA possible.\n"); - rc = -1; - } - - return rc; -} - -static u32 build_vector_reg(u32 a, u32 b, u32 c, u32 d) -{ - u32 reg_val = 0; - - reg_val |= (a & 0x1f) << 0; - reg_val |= (b & 0x1f) << 8; - reg_val |= (c & 0x1f) << 16; - reg_val |= (d & 0x1f) << 24; - - return reg_val; -} - -static void write_msix_vectors(struct xdma_dev *lro) -{ - struct interrupt_regs *int_regs; - u32 reg_val; - - BUG_ON(!lro); - int_regs = (struct interrupt_regs *) - (lro->bar[lro->config_bar_idx] + XDMA_OFS_INT_CTRL); - - /* user irq MSI-X vectors */ - reg_val = build_vector_reg(0, 1, 2, 3); - write_register(reg_val, &int_regs->user_msi_vector[0]); - - reg_val = build_vector_reg(4, 5, 6, 7); - write_register(reg_val, &int_regs->user_msi_vector[1]); - - reg_val = build_vector_reg(8, 9, 10, 11); - write_register(reg_val, &int_regs->user_msi_vector[2]); - - reg_val = build_vector_reg(12, 13, 14, 15); - write_register(reg_val, &int_regs->user_msi_vector[3]); - - /* channel irq MSI-X vectors */ - reg_val = build_vector_reg(16, 17, 18, 19); - write_register(reg_val, &int_regs->channel_msi_vector[0]); - - reg_val = build_vector_reg(20, 21, 22, 23); - write_register(reg_val, &int_regs->channel_msi_vector[1]); -} - -static int msix_irq_setup(struct xdma_dev *lro) -{ - int i; - int rc; - - BUG_ON(!lro); - write_msix_vectors(lro); - - for (i = 0; i < MAX_USER_IRQ; i++) { - rc = request_irq(lro->entry[i].vector, xdma_user_irq, 0, - DRV_MODULE_NAME, &lro->user_irq[i]); - - if (rc) { - dbg_init("Couldn't use IRQ#%d, rc=%d\n", - lro->entry[i].vector, rc); - break; - } - - dbg_init("Using IRQ#%d with 0x%p\n", lro->entry[i].vector, - &lro->user_irq[i]); - } - - /* If any errors occur, free IRQs that were successfully requested */ - if (rc) { - while (--i >= 0) - free_irq(lro->entry[i].vector, &lro->user_irq[i]); - } - - return rc; -} - -static void irq_teardown(struct xdma_dev *lro) -{ - int i; - - BUG_ON(!lro); - - if (lro->msix_enabled) { - for (i = 0; i < MAX_USER_IRQ; i++) { - dbg_init("Releasing IRQ#%d\n", lro->entry[i].vector); - free_irq(lro->entry[i].vector, &lro->user_irq[i]); - } - } else if (lro->irq_line != -1) { - dbg_init("Releasing IRQ#%d\n", lro->irq_line); - free_irq(lro->irq_line, lro); - } -} - -static int irq_setup(struct xdma_dev *lro, struct pci_dev *pdev) -{ - int rc = 0; - u32 irq_flag; - u8 val; - void *reg; - u32 w; - - BUG_ON(!lro); - - if (lro->msix_enabled) { - rc = msix_irq_setup(lro); - } else { - if (!lro->msi_enabled){ - pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &val); - dbg_init("Legacy Interrupt register value = %d\n", val); - if (val > 1) { - val--; - w = (val<<24) | (val<<16) | (val<<8)| val; - // Program IRQ Block Channel vactor and IRQ Block User vector with Legacy interrupt value - reg = lro->bar[lro->config_bar_idx] + 0x2080; // IRQ user - write_register(w, reg); - write_register(w, reg+0x4); - write_register(w, reg+0x8); - write_register(w, reg+0xC); - reg = lro->bar[lro->config_bar_idx] + 0x20A0; // IRQ Block - write_register(w, reg); - write_register(w, reg+0x4); - } - } - irq_flag = lro->msi_enabled ? 0 : IRQF_SHARED; - lro->irq_line = (int)pdev->irq; - rc = request_irq(pdev->irq, xdma_isr, irq_flag, DRV_MODULE_NAME, lro); - if (rc) - dbg_init("Couldn't use IRQ#%d, rc=%d\n", pdev->irq, rc); - else - dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, lro); + return -EINVAL; } - return rc; + return 0; } static u32 get_engine_channel_id(struct engine_regs *regs) @@ -2094,32 +3366,34 @@ static u32 get_engine_id(struct engine_regs *regs) return (value & 0xffff0000U) >> 16; } -static void remove_engines(struct xdma_dev *lro) +static void remove_engines(struct xdma_dev *xdev) { struct xdma_engine *engine; - int channel; + int i; - BUG_ON(!lro); + BUG_ON(!xdev); /* iterate over channels */ - for (channel = 0; channel < XDMA_CHANNEL_NUM_MAX; channel++) { - engine = &lro->engine_h2c[channel]; + for (i = 0; i < xdev->h2c_channel_max; i++) { + engine = &xdev->engine_h2c[i]; if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, engine->channel); - engine_destroy(lro, engine); - dbg_sg("%s, %d removed", engine->name, engine->channel); + dbg_sg("Remove %s, %d", engine->name, i); + engine_destroy(xdev, engine); + dbg_sg("%s, %d removed", engine->name, i); } + } - engine = &lro->engine_c2h[channel]; + for (i = 0; i < xdev->c2h_channel_max; i++) { + engine = &xdev->engine_c2h[i]; if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, engine->channel); - engine_destroy(lro, engine); - dbg_sg("%s, %d removed", engine->name, engine->channel); + dbg_sg("Remove %s, %d", engine->name, i); + engine_destroy(xdev, engine); + dbg_sg("%s, %d removed", engine->name, i); } } } -static int probe_for_engine(struct xdma_dev *lro, enum dma_data_direction dir, +static int probe_for_engine(struct xdma_dev *xdev, enum dma_data_direction dir, int channel) { struct engine_regs *regs; @@ -2135,34 +3409,34 @@ static int probe_for_engine(struct xdma_dev *lro, enum dma_data_direction dir, * channels at 0x100 interval */ if (dir == DMA_TO_DEVICE) { engine_id_expected = XDMA_ID_H2C; - engine = &lro->engine_h2c[channel]; + engine = &xdev->engine_h2c[channel]; } else { offset += H2C_CHANNEL_OFFSET; engine_id_expected = XDMA_ID_C2H; - engine = &lro->engine_c2h[channel]; + engine = &xdev->engine_c2h[channel]; } - regs = lro->bar[lro->config_bar_idx] + offset; + regs = xdev->bar[xdev->config_bar_idx] + offset; engine_id = get_engine_id(regs); channel_id = get_engine_channel_id(regs); if ((engine_id != engine_id_expected) || (channel_id != channel)) { - dbg_tfr("%s %d engine, reg off 0x%x, id mismatch 0x%x,0x%x," + dbg_init("%s %d engine, reg off 0x%x, id mismatch 0x%x,0x%x," "exp 0x%x,0x%x, SKIP.\n", dir == DMA_TO_DEVICE ? "H2C" : "C2H", channel, offset, engine_id, channel_id, engine_id_expected, channel_id != channel); - return 0; + return -EINVAL; } - dbg_tfr("found AXI %s %d engine, reg. off 0x%x, id 0x%x,0x%x.\n", + dbg_init("found AXI %s %d engine, reg. off 0x%x, id 0x%x,0x%x.\n", dir == DMA_TO_DEVICE ? "H2C" : "C2H", channel, offset, engine_id, channel_id); /* allocate and initialize engine */ - rv = engine_init(engine, lro, offset, dir, channel); + rv = engine_init(engine, xdev, offset, dir, channel); if (rv != 0) { - dbg_tfr("failed to create AXI %s %d engine.\n", + pr_info("failed to create AXI %s %d engine.\n", dir == DMA_TO_DEVICE ? "H2C" : "C2H", channel); return rv; @@ -2171,34 +3445,50 @@ static int probe_for_engine(struct xdma_dev *lro, enum dma_data_direction dir, return 0; } -static int probe_engines(struct xdma_dev *lro) +static int probe_engines(struct xdma_dev *xdev) { - int channel; - int rc = 0; + int i; + int rv = 0; - BUG_ON(!lro); + BUG_ON(!xdev); /* iterate over channels */ - for (channel = 0; channel < XDMA_CHANNEL_NUM_MAX; channel++) { - rc = probe_for_engine(lro, DMA_TO_DEVICE, channel); - if (rc) - goto fail; + for (i = 0; i < xdev->h2c_channel_max; i++) { + rv = probe_for_engine(xdev, DMA_TO_DEVICE, i); + if (rv) + break; } + xdev->h2c_channel_max = i; - for (channel = 0; channel < XDMA_CHANNEL_NUM_MAX; channel++) { - rc = probe_for_engine(lro, DMA_FROM_DEVICE, channel); - if (rc) + for (i = 0; i < xdev->c2h_channel_max; i++) { + rv = probe_for_engine(xdev, DMA_FROM_DEVICE, i); + if (rv) break; } + xdev->c2h_channel_max = i; - return rc; + return 0; +} -fail: - dbg_init("Engine probing failed - unwinding\n"); - remove_engines(lro); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) +static void pci_enable_relaxed_ordering(struct pci_dev *pdev) +{ + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); +} +#else +static void pci_enable_relaxed_ordering(struct pci_dev *pdev) +{ + u16 v; + int pos; - return rc; + pos = pci_pcie_cap(pdev); + if (pos > 0) { + pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &v); + v |= PCI_EXP_DEVCTL_RELAX_EN; + pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, v); + } } +#endif static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) { @@ -2215,8 +3505,7 @@ static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) if (pos > 0) pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &cap); else { - pr_info("%s, unable to access pcie cap.\n", - dev_name(&pdev->dev)); + pr_info("pdev 0x%p, unable to access pcie cap.\n", pdev); return; } #endif @@ -2225,304 +3514,359 @@ static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) return; /* extended tag not enabled */ + pr_info("0x%p EXT_TAG disabled.\n", pdev); + if (xdev->config_bar_idx < 0) { pr_info("pdev 0x%p, xdev 0x%p, config bar UNKNOWN.\n", pdev, xdev); - return; + return; } reg = xdev->bar[xdev->config_bar_idx] + XDMA_OFS_CONFIG + 0x4C; v = read_register(reg); v = (v & 0xFF) | (((u32)32) << 8); - write_register(v, reg); -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_RELAX_EN); -} -#else -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - u16 v; - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) { - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &v); - v |= PCI_EXP_DEVCTL_RELAX_EN; - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, v); - } -} -#endif - -static void pci_keep_intx_enabled(struct pci_dev *pdev) -{ - u16 pcmd, pcmd_new; - - pci_read_config_word(pdev, PCI_COMMAND, &pcmd); - pcmd_new = pcmd & ~PCI_COMMAND_INTX_DISABLE; - if (pcmd_new != pcmd) { - pr_info("%s: clear INTX_DISABLE, 0x%x -> 0x%x.\n", - dev_name(&pdev->dev), pcmd, pcmd_new); - pci_write_config_word(pdev, PCI_COMMAND, pcmd_new); - } -} - -static void pci_clear_intr_status(struct pci_dev *pdev) -{ - u16 v; - - pci_read_config_word(pdev, PCI_STATUS, &v); - if (v & PCI_STATUS_INTERRUPT) { - pr_info("%s: PCI STATUS Interrupt pending 0x%x.\n", - dev_name(&pdev->dev), v); - pci_write_config_word(pdev, PCI_STATUS, PCI_STATUS_INTERRUPT); - } + write_register(v, reg, XDMA_OFS_CONFIG + 0x4C); } -int xdma_device_open(struct pci_dev *pdev, xdma_channel_tuple **tuple_p) +void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, + int *h2c_channel_max, int *c2h_channel_max) { - int i, j; - int rc = 0; - struct xdma_dev *lro = NULL; - xdma_channel_tuple *tuple; + struct xdma_dev *xdev = NULL; + int rv = 0; - tuple = kzalloc(sizeof(xdma_channel_tuple) * XDMA_CHANNEL_NUM_MAX, - GFP_KERNEL); - if (!tuple) - return -ENOMEM; + pr_info("%s device %s, 0x%p.\n", mname, dev_name(&pdev->dev), pdev); /* allocate zeroed device book keeping structure */ - lro = alloc_dev_instance(pdev); - if (!lro) - goto err_alloc; - - rc = pci_enable_device(pdev); - if (rc) { - dbg_init("pci_enable_device() failed, rc = %d.\n", rc); + xdev = alloc_dev_instance(pdev); + if (!xdev) + return NULL; + xdev->mod_name = mname; + xdev->user_max = *user_max; + xdev->h2c_channel_max = *h2c_channel_max; + xdev->c2h_channel_max = *c2h_channel_max; + + xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); + xdev_list_add(xdev); + + if (xdev->user_max == 0 || xdev->user_max > MAX_USER_IRQ) + xdev->user_max = MAX_USER_IRQ; + if (xdev->h2c_channel_max == 0 || + xdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) + xdev->h2c_channel_max = XDMA_CHANNEL_NUM_MAX; + if (xdev->c2h_channel_max == 0 || + xdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) + xdev->c2h_channel_max = XDMA_CHANNEL_NUM_MAX; + + rv = pci_enable_device(pdev); + if (rv) { + dbg_init("pci_enable_device() failed, %d.\n", rv); goto err_enable; } - pci_clear_intr_status(pdev); + /* keep INTx enabled */ + pci_check_intr_pend(pdev); + /* enable relaxed ordering */ pci_enable_relaxed_ordering(pdev); + pci_check_extended_tag(xdev, pdev); + + /* force MRRS to be 512 */ + rv = pcie_set_readrq(pdev, 512); + if (rv) + pr_info("device %s, error set PCI_EXP_DEVCTL_READRQ: %d.\n", + dev_name(&pdev->dev), rv); + /* enable bus master capability */ - dbg_init("pci_set_master()\n"); pci_set_master(pdev); - rc = probe_scan_for_msi(lro, pdev); - if (rc < 0) - goto err_scan_msi; - - rc = request_regions(lro, pdev); - if (rc) + rv = request_regions(xdev, pdev); + if (rv) goto err_regions; - rc = map_bars(lro, pdev); - if (rc) + rv = map_bars(xdev, pdev); + if (rv) goto err_map; - pci_check_extended_tag(lro, pdev); - - rc = set_dma_mask(pdev); - if (rc) + rv = set_dma_mask(pdev); + if (rv) goto err_mask; - /* clear out all irq enable masks */ - channel_interrupts_disable(lro, ~0); - user_interrupts_disable(lro, ~0); - read_interrupts(lro); + check_nonzero_interrupt_status(xdev); + /* explicitely zero all interrupt enable masks */ + channel_interrupts_disable(xdev, ~0); + user_interrupts_disable(xdev, ~0); + read_interrupts(xdev); - pci_keep_intx_enabled(pdev); + rv = probe_engines(xdev); + if (rv) + goto err_engines; - rc = irq_setup(lro, pdev); - if (rc) - goto err_interrupts; + rv = enable_msi_msix(xdev, pdev); + if (rv < 0) + goto err_enable_msix; - rc = probe_engines(lro); - if (rc) - goto err_engines; + rv = irq_setup(xdev, pdev); + if (rv < 0) + goto err_interrupts; - channel_interrupts_enable(lro, ~0); + if (!poll_mode) + channel_interrupts_enable(xdev, ~0); /* Flush writes */ - read_interrupts(lro); + read_interrupts(xdev); - lro->feature_id = find_feature_id(lro); - - xdev_list_add(lro); + *user_max = xdev->user_max; + *h2c_channel_max = xdev->h2c_channel_max; + *c2h_channel_max = xdev->c2h_channel_max; - for (i = 0, j = 0; i < XDMA_CHANNEL_NUM_MAX; i++) { - if (lro->engine_h2c[i].magic == MAGIC_ENGINE) { - tuple[j].h2c = &lro->engine_h2c[i]; - tuple[j].c2h = &lro->engine_c2h[i]; - j++; - } - } - *tuple_p = tuple; - return j; + xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); + return (void *)xdev; -err_engines: - remove_engines(lro); - irq_teardown(lro); err_interrupts: + irq_teardown(xdev); +err_enable_msix: + disable_msi_msix(xdev, pdev); +err_engines: + remove_engines(xdev); err_mask: - unmap_bars(lro, pdev); + unmap_bars(xdev, pdev); err_map: - if (lro->got_regions) + if (xdev->got_regions) pci_release_regions(pdev); err_regions: - if (lro->msi_enabled) - pci_disable_msi(pdev); -err_scan_msi: - if (!lro->regions_in_use) + if (!xdev->regions_in_use) pci_disable_device(pdev); err_enable: - kfree(lro); -err_alloc: - kfree(tuple); - return rc; + xdev_list_remove(xdev); + kfree(xdev); + return NULL; } EXPORT_SYMBOL_GPL(xdma_device_open); -void xdma_device_close(struct pci_dev *pdev, xdma_channel_tuple *tuple) +void xdma_device_close(struct pci_dev *pdev, void *dev_hndl) { - struct xdma_dev *lro; + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - if (!pdev) + dbg_init("pdev 0x%p, xdev 0x%p.\n", pdev, dev_hndl); + + if (!dev_hndl) return; - lro = xdev_find_by_pdev(pdev); - if (!lro) { - dbg_sg("remove(dev = 0x%p) empty.\n", pdev); + if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) return; - } + dbg_sg("remove(dev = 0x%p) where pdev->dev.driver_data = 0x%p\n", - pdev, lro); - if (lro->pci_dev != pdev) { + pdev, xdev); + if (xdev->pdev != pdev) { dbg_sg("pci_dev(0x%lx) != pdev(0x%lx)\n", - (unsigned long)lro->pci_dev, (unsigned long)pdev); + (unsigned long)xdev->pdev, (unsigned long)pdev); } - channel_interrupts_disable(lro, ~0); - user_interrupts_disable(lro, ~0); - read_interrupts(lro); + channel_interrupts_disable(xdev, ~0); + user_interrupts_disable(xdev, ~0); + read_interrupts(xdev); - remove_engines(lro); - irq_teardown(lro); - unmap_bars(lro, pdev); + irq_teardown(xdev); + disable_msi_msix(xdev, pdev); - if (lro->got_regions) - pci_release_regions(pdev); + remove_engines(xdev); + unmap_bars(xdev, pdev); - if (lro->msix_enabled) { - pci_disable_msix(pdev); - lro->msix_enabled = 0; - } else if (lro->msi_enabled) { - pci_disable_msi(pdev); - lro->msi_enabled = 0; + if (xdev->got_regions) { + dbg_init("pci_release_regions 0x%p.\n", pdev); + pci_release_regions(pdev); } - if (!lro->regions_in_use) + if (!xdev->regions_in_use) { + dbg_init("pci_disable_device 0x%p.\n", pdev); pci_disable_device(pdev); + } - xdev_list_remove(lro); - - kfree(lro); + xdev_list_remove(xdev); - if (tuple) - kfree(tuple); + kfree(xdev); } EXPORT_SYMBOL_GPL(xdma_device_close); -int xdma_device_restart(struct pci_dev *pdev) +void xdma_device_offline(struct pci_dev *pdev, void *dev_hndl) { - struct xdma_dev *lro; + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; + struct xdma_engine *engine; + int i; - if (!pdev) - return -EINVAL; + if (!dev_hndl) + return; - lro = xdev_find_by_pdev(pdev); - if (!lro) { - dbg_sg("pdev 0x%p, no match found.\n", pdev); - return -EINVAL; - } - dbg_sg("NOT implemented.\n"); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(xdma_device_restart); + if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) + return; -int xdma_user_isr_register(struct pci_dev *pdev, unsigned int mask, - irq_handler_t handler, const char *name, void *dev) -{ - struct xdma_dev *lro; - int i; +pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); + xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); - if (!pdev) - return -EINVAL; + /* wait for all engines to be idle */ + for (i = 0; i < xdev->h2c_channel_max; i++) { + unsigned long flags; - lro = xdev_find_by_pdev(pdev); - if (!lro) { - dbg_irq("pdev 0x%p, no match found.\n", pdev); - return -EINVAL; + engine = &xdev->engine_h2c[i]; + + if (engine->magic == MAGIC_ENGINE) { + spin_lock_irqsave(&engine->lock, flags); + engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; + + xdma_engine_stop(engine); + engine->running = 0; + spin_unlock_irqrestore(&engine->lock, flags); + } } - for (i = 0; i < MAX_USER_IRQ && mask; i++) { - unsigned int bit = (1 << i); + for (i = 0; i < xdev->c2h_channel_max; i++) { + unsigned long flags; - if ((bit & mask) == 0) - continue; + engine = &xdev->engine_c2h[i]; + if (engine->magic == MAGIC_ENGINE) { + spin_lock_irqsave(&engine->lock, flags); + engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; - mask &= ~bit; - lro->user_irq[i].handler = handler; - lro->user_irq[i].name = name; - lro->user_irq[i].dev = dev; + xdma_engine_stop(engine); + engine->running = 0; + spin_unlock_irqrestore(&engine->lock, flags); + } + } + + /* turn off interrupts */ + channel_interrupts_disable(xdev, ~0); + user_interrupts_disable(xdev, ~0); + read_interrupts(xdev); + irq_teardown(xdev); + + pr_info("xdev 0x%p, done.\n", xdev); +} +EXPORT_SYMBOL_GPL(xdma_device_offline); + +void xdma_device_online(struct pci_dev *pdev, void *dev_hndl) +{ + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; + struct xdma_engine *engine; + unsigned long flags; + int i; + + if (!dev_hndl) + return; + + if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) + return; + +pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); + + for (i = 0; i < xdev->h2c_channel_max; i++) { + engine = &xdev->engine_h2c[i]; + if (engine->magic == MAGIC_ENGINE) { + engine_init_regs(engine); + spin_lock_irqsave(&engine->lock, flags); + engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; + spin_unlock_irqrestore(&engine->lock, flags); + } + } + + for (i = 0; i < xdev->c2h_channel_max; i++) { + engine = &xdev->engine_c2h[i]; + if (engine->magic == MAGIC_ENGINE) { + engine_init_regs(engine); + spin_lock_irqsave(&engine->lock, flags); + engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; + spin_unlock_irqrestore(&engine->lock, flags); + } + } + + /* re-write the interrupt table */ + if (!poll_mode) { + irq_setup(xdev, pdev); + + channel_interrupts_enable(xdev, ~0); + user_interrupts_enable(xdev, xdev->mask_irq_user); + read_interrupts(xdev); + } + + xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); +pr_info("xdev 0x%p, done.\n", xdev); +} +EXPORT_SYMBOL_GPL(xdma_device_online); + +int xdma_device_restart(struct pci_dev *pdev, void *dev_hndl) +{ + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; + + if (!dev_hndl) + return -EINVAL; + + if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) + return -EINVAL; + + pr_info("NOT implemented, 0x%p.\n", xdev); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xdma_device_restart); + +int xdma_user_isr_register(void *dev_hndl, unsigned int mask, + irq_handler_t handler, void *dev) +{ + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; + int i; + + if (!dev_hndl) + return -EINVAL; + + if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) + return -EINVAL; + + for (i = 0; i < xdev->user_max && mask; i++) { + unsigned int bit = (1 << i); + + if ((bit & mask) == 0) + continue; + + mask &= ~bit; + xdev->user_irq[i].handler = handler; + xdev->user_irq[i].dev = dev; } return 0; } EXPORT_SYMBOL_GPL(xdma_user_isr_register); -int xdma_user_isr_enable(struct pci_dev *pdev, unsigned int mask) +int xdma_user_isr_enable(void *dev_hndl, unsigned int mask) { - struct xdma_dev *lro; + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - if (!pdev) + if (!dev_hndl) return -EINVAL; - lro = xdev_find_by_pdev(pdev); - if (!lro) { - dbg_irq("pdev 0x%p, no match found.\n", pdev); + if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) return -EINVAL; - } + xdev->mask_irq_user |= mask; /* enable user interrupts */ - user_interrupts_enable(lro, mask); - read_interrupts(lro); + user_interrupts_enable(xdev, mask); + read_interrupts(xdev); return 0; } EXPORT_SYMBOL_GPL(xdma_user_isr_enable); -int xdma_user_isr_disable(struct pci_dev *pdev, unsigned int mask) +int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) { - struct xdma_dev *lro; - - if (!pdev) - return -EINVAL; + struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - lro = xdev_find_by_pdev(pdev); - if (!lro) { - dbg_irq("pdev 0x%p, no match found.\n", pdev); + if (!dev_hndl) return -EINVAL; - } - user_interrupts_disable(lro, mask); - read_interrupts(lro); + if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) + return -EINVAL; + + xdev->mask_irq_user &= ~mask; + user_interrupts_disable(xdev, mask); + read_interrupts(xdev); return 0; } @@ -2543,3 +3887,556 @@ static void __exit xdma_base_exit(void) module_init(xdma_base_init); module_exit(xdma_base_exit); #endif +/* makes an existing transfer cyclic */ +static void xdma_transfer_cyclic(struct xdma_transfer *transfer) +{ + /* link last descriptor to first descriptor */ + xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, + transfer->desc_virt, transfer->desc_bus); + /* remember transfer is cyclic */ + transfer->cyclic = 1; +} + +static int transfer_monitor_cyclic(struct xdma_engine *engine, + struct xdma_transfer *transfer, int timeout_ms) +{ + struct xdma_result *result; + int rc = 0; + + BUG_ON(!engine); + BUG_ON(!transfer); + + result = engine->cyclic_result; + BUG_ON(!result); + + if (poll_mode) { + int i ; + for (i = 0; i < 5; i++) { + rc = engine_service_poll(engine, 0); + if (rc) { + pr_info("%s service_poll failed %d.\n", + engine->name, rc); + rc = -ERESTARTSYS; + } + if (result[engine->rx_head].status) + return 0; + } + } else { + if (enable_credit_mp){ + dbg_tfr("%s: rx_head=%d,rx_tail=%d, wait ...\n", + engine->name, engine->rx_head, engine->rx_tail); + rc = wait_event_interruptible_timeout( transfer->wq, + (engine->rx_head!=engine->rx_tail || + engine->rx_overrun), + msecs_to_jiffies(timeout_ms)); + dbg_tfr("%s: wait returns %d, rx %d/%d, overrun %d.\n", + engine->name, rc, engine->rx_head, + engine->rx_tail, engine->rx_overrun); + } else { + rc = wait_event_interruptible_timeout( transfer->wq, + engine->eop_found, + msecs_to_jiffies(timeout_ms)); + dbg_tfr("%s: wait returns %d, eop_found %d.\n", + engine->name, rc, engine->eop_found); + } + } + + return 0; +} + +struct scatterlist *sglist_index(struct sg_table *sgt, unsigned int idx) +{ + struct scatterlist *sg = sgt->sgl; + int i; + + if (idx >= sgt->orig_nents) + return NULL; + + if (!idx) + return sg; + + for (i = 0; i < idx; i++, sg = sg_next(sg)) + ; + + return sg; +} + +static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, + int head, char __user *buf, size_t count) +{ + struct scatterlist *sg; + int more = pkt_length; + + BUG_ON(!engine); + BUG_ON(!buf); + + dbg_tfr("%s, pkt_len %d, head %d, user buf idx %u.\n", + engine->name, pkt_length, head, engine->user_buffer_index); + + sg = sglist_index(&engine->cyclic_sgt, head); + if (!sg) { + pr_info("%s, head %d OOR, sgl %u.\n", + engine->name, head, engine->cyclic_sgt.orig_nents); + return -EIO; + } + + /* EOP found? Transfer anything from head to EOP */ + while (more) { + unsigned int copy = more > PAGE_SIZE ? PAGE_SIZE : more; + unsigned int blen = count - engine->user_buffer_index; + int rv; + + if (copy > blen) + copy = blen; + + dbg_tfr("%s sg %d, 0x%p, copy %u to user %u.\n", + engine->name, head, sg, copy, + engine->user_buffer_index); + + rv = copy_to_user(&buf[engine->user_buffer_index], + page_address(sg_page(sg)), copy); + if (rv) { + pr_info("%s copy_to_user %u failed %d\n", + engine->name, copy, rv); + return -EIO; + } + + more -= copy; + engine->user_buffer_index += copy; + + if (engine->user_buffer_index == count) { + /* user buffer used up */ + break; + } + + head++; + if (head >= CYCLIC_RX_PAGES_MAX) { + head = 0; + sg = engine->cyclic_sgt.sgl; + } else + sg = sg_next(sg); + } + + return pkt_length; +} + +static int complete_cyclic(struct xdma_engine *engine, char __user *buf, + size_t count) +{ + struct xdma_result *result; + int pkt_length = 0; + int fault = 0; + int eop = 0; + int head; + int rc = 0; + int num_credit = 0; + unsigned long flags; + + BUG_ON(!engine); + result = engine->cyclic_result; + BUG_ON(!result); + + spin_lock_irqsave(&engine->lock, flags); + + /* where the host currently is in the ring buffer */ + head = engine->rx_head; + + /* iterate over newly received results */ + while (engine->rx_head != engine->rx_tail||engine->rx_overrun) { + + WARN_ON(result[engine->rx_head].status==0); + + dbg_tfr("%s, result[%d].status = 0x%x length = 0x%x.\n", + engine->name, engine->rx_head, + result[engine->rx_head].status, + result[engine->rx_head].length); + + if ((result[engine->rx_head].status >> 16) != C2H_WB) { + pr_info("%s, result[%d].status 0x%x, no magic.\n", + engine->name, engine->rx_head, + result[engine->rx_head].status); + fault = 1; + } else if (result[engine->rx_head].length > PAGE_SIZE) { + pr_info("%s, result[%d].len 0x%x, > PAGE_SIZE 0x%lx.\n", + engine->name, engine->rx_head, + result[engine->rx_head].length, PAGE_SIZE); + fault = 1; + } else if (result[engine->rx_head].length == 0) { + pr_info("%s, result[%d].length 0x%x.\n", + engine->name, engine->rx_head, + result[engine->rx_head].length); + fault = 1; + /* valid result */ + } else { + pkt_length += result[engine->rx_head].length; + num_credit++; + /* seen eop? */ + //if (result[engine->rx_head].status & RX_STATUS_EOP) + if (result[engine->rx_head].status & RX_STATUS_EOP){ + eop = 1; + engine->eop_found = 1; + } + + dbg_tfr("%s, pkt_length=%d (%s)\n", + engine->name, pkt_length, + eop ? "with EOP" : "no EOP yet"); + } + /* clear result */ + result[engine->rx_head].status = 0; + result[engine->rx_head].length = 0; + /* proceed head pointer so we make progress, even when fault */ + engine->rx_head = (engine->rx_head + 1) % CYCLIC_RX_PAGES_MAX; + + /* stop processing if a fault/eop was detected */ + if (fault || eop){ + break; + } + } + + spin_unlock_irqrestore(&engine->lock, flags); + + if (fault) + return -EIO; + + rc = copy_cyclic_to_user(engine, pkt_length, head, buf, count); + engine->rx_overrun = 0; + /* if copy is successful, release credits */ + if(rc > 0) + write_register(num_credit,&engine->sgdma_regs->credits, 0); + + return rc; +} + +ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, + size_t count, int timeout_ms) +{ + int i = 0; + int rc = 0; + int rc_len = 0; + struct xdma_transfer *transfer; + + BUG_ON(!engine); + BUG_ON(engine->magic != MAGIC_ENGINE); + + transfer = &engine->cyclic_req->xfer; + BUG_ON(!transfer); + + engine->user_buffer_index = 0; + + do { + rc = transfer_monitor_cyclic(engine, transfer, timeout_ms); + if (rc < 0) + return rc; + rc = complete_cyclic(engine, buf, count); + if (rc < 0) + return rc; + rc_len += rc; + + i++; + if (i > 10) + break; + } while (!engine->eop_found); + + if(enable_credit_mp) + engine->eop_found = 0; + + return rc_len; +} + +static void sgt_free_with_pages(struct sg_table *sgt, int dir, + struct pci_dev *pdev) +{ + struct scatterlist *sg = sgt->sgl; + int npages = sgt->orig_nents; + int i; + + for (i = 0; i < npages; i++, sg = sg_next(sg)) { + struct page *pg = sg_page(sg); + dma_addr_t bus = sg_dma_address(sg); + + if (pg) { + if (pdev) + pci_unmap_page(pdev, bus, PAGE_SIZE, dir); + __free_page(pg); + } else + break; + } + sg_free_table(sgt); + memset(sgt, 0, sizeof(struct sg_table)); +} + +static int sgt_alloc_with_pages(struct sg_table *sgt, unsigned int npages, + int dir, struct pci_dev *pdev) +{ + struct scatterlist *sg; + int i; + + if (sg_alloc_table(sgt, npages, GFP_KERNEL)) { + pr_info("sgt OOM.\n"); + return -ENOMEM; + } + + sg = sgt->sgl; + for (i = 0; i < npages; i++, sg = sg_next(sg)) { + struct page *pg = alloc_page(GFP_KERNEL); + + if (!pg) { + pr_info("%d/%u, page OOM.\n", i, npages); + goto err_out; + } + + if (pdev) { + dma_addr_t bus = pci_map_page(pdev, pg, 0, PAGE_SIZE, + dir); + if (unlikely(pci_dma_mapping_error(pdev, bus))) { + pr_info("%d/%u, page 0x%p map err.\n", + i, npages, pg); + __free_page(pg); + goto err_out; + } + sg_dma_address(sg) = bus; + sg_dma_len(sg) = PAGE_SIZE; + } + sg_set_page(sg, pg, PAGE_SIZE, 0); + } + + sgt->orig_nents = sgt->nents = npages; + + return 0; + +err_out: + sgt_free_with_pages(sgt, dir, pdev); + return -ENOMEM; +} + +int xdma_cyclic_transfer_setup(struct xdma_engine *engine) +{ + struct xdma_dev *xdev; + struct xdma_transfer *xfer; + dma_addr_t bus; + unsigned long flags; + int i; + int rc; + + BUG_ON(!engine); + xdev = engine->xdev; + BUG_ON(!xdev); + + if (engine->cyclic_req) { + pr_info("%s: exclusive access already taken.\n", + engine->name); + return -EBUSY; + } + + spin_lock_irqsave(&engine->lock, flags); + + engine->rx_tail = 0; + engine->rx_head = 0; + engine->rx_overrun = 0; + engine->eop_found = 0; + + rc = sgt_alloc_with_pages(&engine->cyclic_sgt, CYCLIC_RX_PAGES_MAX, + engine->dir, xdev->pdev); + if (rc < 0) { + pr_info("%s cyclic pages %u OOM.\n", + engine->name, CYCLIC_RX_PAGES_MAX); + goto err_out; + } + + engine->cyclic_req = xdma_init_request(&engine->cyclic_sgt, 0); + if (!engine->cyclic_req) { + pr_info("%s cyclic request OOM.\n", engine->name); + rc = -ENOMEM; + goto err_out; + } + +#ifdef __LIBXDMA_DEBUG__ + xdma_request_cb_dump(engine->cyclic_req); +#endif + + rc = transfer_init(engine, engine->cyclic_req); + if (rc < 0) + goto err_out; + + xfer = &engine->cyclic_req->xfer; + + /* replace source addresses with result write-back addresses */ + memset(engine->cyclic_result, 0, + CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result)); + bus = engine->cyclic_result_bus; + for (i = 0; i < xfer->desc_num; i++) { + xfer->desc_virt[i].src_addr_lo = cpu_to_le32(PCI_DMA_L(bus)); + xfer->desc_virt[i].src_addr_hi = cpu_to_le32(PCI_DMA_H(bus)); + bus += sizeof(struct xdma_result); + } + /* set control of all descriptors */ + for (i = 0; i < xfer->desc_num; i++) { + xdma_desc_control_clear(xfer->desc_virt + i, LS_BYTE_MASK); + xdma_desc_control_set(xfer->desc_virt + i, + XDMA_DESC_EOP | XDMA_DESC_COMPLETED); + } + + /* make this a cyclic transfer */ + xdma_transfer_cyclic(xfer); + +#ifdef __LIBXDMA_DEBUG__ + transfer_dump(xfer); +#endif + + if(enable_credit_mp){ + //write_register(RX_BUF_PAGES,&engine->sgdma_regs->credits); + write_register(128, &engine->sgdma_regs->credits, 0); + } + + spin_unlock_irqrestore(&engine->lock, flags); + + /* start cyclic transfer */ + transfer_queue(engine, xfer); + + return 0; + + /* unwind on errors */ +err_out: + if (engine->cyclic_req) { + xdma_request_free(engine->cyclic_req); + engine->cyclic_req = NULL; + } + + if (engine->cyclic_sgt.orig_nents) { + sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, + xdev->pdev); + engine->cyclic_sgt.orig_nents = 0; + engine->cyclic_sgt.nents = 0; + engine->cyclic_sgt.sgl = NULL; + } + + spin_unlock_irqrestore(&engine->lock, flags); + + return rc; +} + + +static int cyclic_shutdown_polled(struct xdma_engine *engine) +{ + BUG_ON(!engine); + + spin_lock(&engine->lock); + + dbg_tfr("Polling for shutdown completion\n"); + do { + engine_status_read(engine, 1, 0); + schedule(); + } while (engine->status & XDMA_STAT_BUSY); + + if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { + dbg_tfr("Engine has stopped\n"); + + if (!list_empty(&engine->transfer_list)) + engine_transfer_dequeue(engine); + + engine_service_shutdown(engine); + } + + dbg_tfr("Shutdown completion polling done\n"); + spin_unlock(&engine->lock); + + return 0; +} + +static int cyclic_shutdown_interrupt(struct xdma_engine *engine) +{ + int rc; + + BUG_ON(!engine); + + rc = wait_event_interruptible_timeout(engine->shutdown_wq, + !engine->running, msecs_to_jiffies(10000)); + +#if 0 + if (rc) { + dbg_tfr("wait_event_interruptible=%d\n", rc); + return rc; + } +#endif + + if (engine->running) { + pr_info("%s still running?!, %d\n", engine->name, rc); + return -EINVAL; + } + + return rc; +} + +int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) +{ + int rc; + struct xdma_dev *xdev = engine->xdev; + struct xdma_transfer *transfer; + unsigned long flags; + + transfer = engine_cyclic_stop(engine); + + spin_lock_irqsave(&engine->lock, flags); + if (transfer) { + dbg_tfr("%s: stop transfer 0x%p.\n", engine->name, transfer); + if (transfer != &engine->cyclic_req->xfer) { + pr_info("%s unexpected transfer 0x%p/0x%p\n", + engine->name, transfer, + &engine->cyclic_req->xfer); + } + } + /* allow engine to be serviced after stop request */ + spin_unlock_irqrestore(&engine->lock, flags); + + /* wait for engine to be no longer running */ + if (poll_mode) + rc = cyclic_shutdown_polled(engine); + else + rc = cyclic_shutdown_interrupt(engine); + + /* obtain spin lock to atomically remove resources */ + spin_lock_irqsave(&engine->lock, flags); + + if (engine->cyclic_req) { + xdma_request_free(engine->cyclic_req); + engine->cyclic_req = NULL; + } + + if (engine->cyclic_sgt.orig_nents) { + sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, + xdev->pdev); + engine->cyclic_sgt.orig_nents = 0; + engine->cyclic_sgt.nents = 0; + engine->cyclic_sgt.sgl = NULL; + } + + spin_unlock_irqrestore(&engine->lock, flags); + + return 0; +} + +int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg) +{ + int rv; + unsigned long dst; + u32 w = XDMA_CTRL_NON_INCR_ADDR; + + dbg_perf("IOCTL_XDMA_ADDRMODE_SET\n"); + rv = get_user(dst, (int __user *)arg); + + if (rv == 0) { + engine->non_incr_addr = !!dst; + if (engine->non_incr_addr) + write_register(w, &engine->regs->control_w1s, + (unsigned long)(&engine->regs->control_w1s) - + (unsigned long)(&engine->regs)); + else + write_register(w, &engine->regs->control_w1c, + (unsigned long)(&engine->regs->control_w1c) - + (unsigned long)(&engine->regs)); + } + engine_alignments(engine); + + return rv; +} + diff --git a/sdk/linux_kernel_drivers/edma/libxdma.h b/sdk/linux_kernel_drivers/edma/libxdma.h index d61814f09..48c8e9c34 100644 --- a/sdk/linux_kernel_drivers/edma/libxdma.h +++ b/sdk/linux_kernel_drivers/edma/libxdma.h @@ -1,7 +1,32 @@ +/******************************************************************************* + * + * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". + * + * Karen Xie + * + ******************************************************************************/ #ifndef XDMA_LIB_H #define XDMA_LIB_H +#include #include +#include #include #include #include @@ -10,7 +35,6 @@ #include #include #include -#include /* Switch debug printing on/off */ #define XDMA_DEBUG 0 @@ -21,6 +45,9 @@ /* maximum amount of register space to map */ #define XDMA_BAR_SIZE (0x8000UL) +/* Use this definition to poll several times between calls to schedule */ +#define NUM_POLLS_PER_SCHED 100 + #define XDMA_CHANNEL_NUM_MAX (4) /* * interrupts per engine, rad2_vul.sv:237 @@ -38,7 +65,8 @@ #define XDMA_TRANSFER_MAX_DESC (2048) /* maximum size of a single DMA transfer descriptor */ -#define XDMA_DESC_MAX_BYTES ((1 << 18) - 1) +#define XDMA_DESC_BLEN_BITS 28 +#define XDMA_DESC_BLEN_MAX ((1 << (XDMA_DESC_BLEN_BITS)) - 1) /* bits of the SG DMA control register */ #define XDMA_CTRL_RUN_STOP (1UL << 0) @@ -58,10 +86,59 @@ #define XDMA_STAT_DESC_COMPLETED (1UL << 2) #define XDMA_STAT_ALIGN_MISMATCH (1UL << 3) #define XDMA_STAT_MAGIC_STOPPED (1UL << 4) -#define XDMA_STAT_FETCH_STOPPED (1UL << 5) +#define XDMA_STAT_INVALID_LEN (1UL << 5) #define XDMA_STAT_IDLE_STOPPED (1UL << 6) -#define XDMA_STAT_READ_ERROR (0x1FUL << 9) -#define XDMA_STAT_DESC_ERROR (0x1FUL << 19) + +#define XDMA_STAT_COMMON_ERR_MASK \ + (XDMA_STAT_ALIGN_MISMATCH | XDMA_STAT_MAGIC_STOPPED | \ + XDMA_STAT_INVALID_LEN) + +/* desc_error, C2H & H2C */ +#define XDMA_STAT_DESC_UNSUPP_REQ (1UL << 19) +#define XDMA_STAT_DESC_COMPL_ABORT (1UL << 20) +#define XDMA_STAT_DESC_PARITY_ERR (1UL << 21) +#define XDMA_STAT_DESC_HEADER_EP (1UL << 22) +#define XDMA_STAT_DESC_UNEXP_COMPL (1UL << 23) + +#define XDMA_STAT_DESC_ERR_MASK \ + (XDMA_STAT_DESC_UNSUPP_REQ | XDMA_STAT_DESC_COMPL_ABORT | \ + XDMA_STAT_DESC_PARITY_ERR | XDMA_STAT_DESC_HEADER_EP | \ + XDMA_STAT_DESC_UNEXP_COMPL) + +/* read error: H2C */ +#define XDMA_STAT_H2C_R_UNSUPP_REQ (1UL << 9) +#define XDMA_STAT_H2C_R_COMPL_ABORT (1UL << 10) +#define XDMA_STAT_H2C_R_PARITY_ERR (1UL << 11) +#define XDMA_STAT_H2C_R_HEADER_EP (1UL << 12) +#define XDMA_STAT_H2C_R_UNEXP_COMPL (1UL << 13) + +#define XDMA_STAT_H2C_R_ERR_MASK \ + (XDMA_STAT_H2C_R_UNSUPP_REQ | XDMA_STAT_H2C_R_COMPL_ABORT | \ + XDMA_STAT_H2C_R_PARITY_ERR | XDMA_STAT_H2C_R_HEADER_EP | \ + XDMA_STAT_H2C_R_UNEXP_COMPL) + +/* write error, H2C only */ +#define XDMA_STAT_H2C_W_DECODE_ERR (1UL << 14) +#define XDMA_STAT_H2C_W_SLAVE_ERR (1UL << 15) + +#define XDMA_STAT_H2C_W_ERR_MASK \ + (XDMA_STAT_H2C_W_DECODE_ERR | XDMA_STAT_H2C_W_SLAVE_ERR) + +/* read error: C2H */ +#define XDMA_STAT_C2H_R_DECODE_ERR (1UL << 9) +#define XDMA_STAT_C2H_R_SLAVE_ERR (1UL << 10) + +#define XDMA_STAT_C2H_R_ERR_MASK \ + (XDMA_STAT_C2H_R_DECODE_ERR | XDMA_STAT_C2H_R_SLAVE_ERR) + +/* all combined */ +#define XDMA_STAT_H2C_ERR_MASK \ + (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ + XDMA_STAT_H2C_R_ERR_MASK | XDMA_STAT_H2C_W_ERR_MASK) + +#define XDMA_STAT_C2H_ERR_MASK \ + (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ + XDMA_STAT_C2H_R_ERR_MASK) /* bits of the SGDMA descriptor control field */ #define XDMA_DESC_STOPPED (1UL << 0) @@ -79,16 +156,13 @@ #define XDMA_ID_H2C 0x1fc0U #define XDMA_ID_C2H 0x1fc1U -/* Specifies buffer size used for C2H AXI-ST mode */ -#define RX_BUF_BLOCK 4096 -#define RX_BUF_PAGES 256 -#define RX_BUF_SIZE (RX_BUF_PAGES * RX_BUF_BLOCK) -#define RX_RESULT_BUF_SIZE (RX_BUF_PAGES * sizeof(struct xdma_result)) +/* for C2H AXI-ST mode */ +#define CYCLIC_RX_PAGES_MAX 256 #define LS_BYTE_MASK 0x000000FFUL -#define BLOCK_ID_MASK 0xFFF00000UL -#define BLOCK_ID_HEAD 0x1FC00000UL +#define BLOCK_ID_MASK 0xFFF00000 +#define BLOCK_ID_HEAD 0x1FC00000 #define IRQ_BLOCK_ID 0x1fc20000UL #define CONFIG_BLOCK_ID 0x1fc30000UL @@ -109,6 +183,7 @@ #define H2C_CHANNEL_OFFSET 0x1000 #define SGDMA_OFFSET_FROM_CHANNEL 0x4000 #define CHANNEL_SPACING 0x100 +#define TARGET_SPACING 0x1000 #define BYPASS_MODE_SPACING 0x0100 @@ -123,25 +198,25 @@ #define VMEM_FLAGS (VM_IO | VM_RESERVED) #endif -/* disable debugging */ -#if (XDMA_DEBUG == 0) - #define dbg_io(...) - #define dbg_fops(...) - #define dbg_perf(fmt, ...) - #define dbg_sg(...) - #define dbg_tfr(...) - #define dbg_irq(...) - #define dbg_init(...) - #define dbg_desc(...) +#ifdef __LIBXDMA_DEBUG__ +#define dbg_io pr_err +#define dbg_fops pr_err +#define dbg_perf pr_err +#define dbg_sg pr_err +#define dbg_tfr pr_err +#define dbg_irq pr_err +#define dbg_init pr_err +#define dbg_desc pr_err #else - #define dbg_io pr_info - #define dbg_fops pr_info - #define dbg_perf pr_info - #define dbg_sg pr_info - #define dbg_tfr pr_info - #define dbg_irq pr_info - #define dbg_init pr_info - #define dbg_desc pr_info +/* disable debugging */ +#define dbg_io(...) +#define dbg_fops(...) +#define dbg_perf(...) +#define dbg_sg(...) +#define dbg_tfr(...) +#define dbg_irq(...) +#define dbg_init(...) +#define dbg_desc(...) #endif /* SECTION: Enum definitions */ @@ -149,7 +224,8 @@ enum transfer_state { TRANSFER_STATE_NEW = 0, TRANSFER_STATE_SUBMITTED, TRANSFER_STATE_COMPLETED, - TRANSFER_STATE_FAILED + TRANSFER_STATE_FAILED, + TRANSFER_STATE_ABORTED }; enum shutdown_state { @@ -203,6 +279,14 @@ struct engine_regs { u32 interrupt_enable_mask_w1s; u32 interrupt_enable_mask_w1c; u32 reserved_3[9]; /* padding */ + + u32 perf_ctrl; + u32 perf_cyc_lo; + u32 perf_cyc_hi; + u32 perf_dat_lo; + u32 perf_dat_hi; + u32 perf_pnd_lo; + u32 perf_pnd_hi; } __packed; struct engine_sgdma_regs { @@ -214,6 +298,7 @@ struct engine_sgdma_regs { u32 first_desc_hi; /* number of adjacent descriptors at first_desc */ u32 first_desc_adjacent; + u32 credits; } __packed; struct msix_vec_table_entry { @@ -247,6 +332,21 @@ struct interrupt_regs { u32 channel_msi_vector[8]; } __packed; +struct sgdma_common_regs { + u32 padding[8]; + u32 credit_mode_enable; + u32 credit_mode_enable_w1s; + u32 credit_mode_enable_w1c; +} __packed; + + +/* Structure for polled mode descriptor writeback */ +struct xdma_poll_wb { + u32 completed_desc_count; + u32 reserved_1[7]; +} __packed; + + /** * Descriptor for a single contiguous memory block transfer. * @@ -279,6 +379,11 @@ struct xdma_result { u32 reserved_1[6]; /* padding */ } __packed; +struct sw_desc { + dma_addr_t addr; + unsigned int len; +}; + /* Describes a (SG DMA) single transfer for the engine */ struct xdma_transfer { struct list_head entry; /* queue of non-completed transfers */ @@ -292,14 +397,27 @@ struct xdma_transfer { enum transfer_state state; /* state of the transfer */ unsigned int flags; #define XFER_FLAG_NEED_UNMAP 0x1 + int cyclic; /* flag if transfer is cyclic */ int last_in_request; /* flag if last within request */ - unsigned int xfer_len; + unsigned int len; struct sg_table *sgt; }; +struct xdma_request_cb { + struct sg_table *sgt; + unsigned int total_len; + u64 ep_addr; + + struct xdma_transfer xfer; + + unsigned int sw_desc_idx; + unsigned int sw_desc_cnt; + struct sw_desc sdesc[0]; +}; + struct xdma_engine { unsigned long magic; /* structure ID for sanity checks */ - struct xdma_dev *lro; /* parent device */ + struct xdma_dev *xdev; /* parent device */ char name[5]; /* name of this engine */ int version; /* version of this engine */ //dev_t cdevno; /* character device major:minor */ @@ -312,9 +430,11 @@ struct xdma_engine { /* Engine state, configuration and flags */ enum shutdown_state shutdown; /* engine shutdown mode */ + enum dma_data_direction dir; + int device_open; /* flag if engine node open, ST mode only */ int running; /* flag if the driver started engine */ int non_incr_addr; /* flag if non-incremental addressing used */ - enum dma_data_direction dir; + int streaming; int addr_align; /* source/dest alignment in bytes */ int len_granularity; /* transfer length multiple */ int addr_bits; /* HW datapath address width */ @@ -327,6 +447,24 @@ struct xdma_engine { /* Transfer list management */ struct list_head transfer_list; /* queue of transfers */ + /* Members applicable to AXI-ST C2H (cyclic) transfers */ + struct xdma_result *cyclic_result; + dma_addr_t cyclic_result_bus; /* bus addr for transfer */ + struct xdma_request_cb *cyclic_req; + struct sg_table cyclic_sgt; + u8 eop_found; /* used only for cyclic(rx:c2h) */ + + int rx_tail; /* follows the HW */ + int rx_head; /* where the SW reads from */ + int rx_overrun; /* flag if overrun occured */ + + /* for copy from cyclic buffer to user buffer */ + unsigned int user_buffer_index; + + /* Members associated with polled mode support */ + u8 *poll_mode_addr_virt; /* virt addr for descriptor writeback */ + dma_addr_t poll_mode_bus; /* bus addr for descriptor writeback */ + /* Members associated with interrupt mode support */ wait_queue_head_t shutdown_wq; /* wait queue for shutdown sync */ spinlock_t lock; /* protects concurrent access */ @@ -334,29 +472,42 @@ struct xdma_engine { int msix_irq_line; /* MSI-X vector for this engine */ u32 irq_bitmask; /* IRQ bit mask for this engine */ struct work_struct work; /* Work queue for interrupt handling */ + + spinlock_t desc_lock; /* protects concurrent access */ + dma_addr_t desc_bus; + struct xdma_desc *desc; + + /* for performance test support */ + struct xdma_performance_ioctl *xdma_perf; /* perf test control */ + wait_queue_head_t xdma_perf_wq; /* Perf test sync */ }; -struct xdma_irq { - struct xdma_dev *lro; /* parent device */ - u8 user_idx; +struct xdma_user_irq { + struct xdma_dev *xdev; /* parent device */ + u8 user_idx; /* 0 ~ 15 */ u8 events_irq; /* accumulated IRQs */ spinlock_t events_lock; /* lock to safely update events_irq */ wait_queue_head_t events_wq; /* wait queue to sync waiting threads */ irq_handler_t handler; - const char *name; void *dev; }; /* XDMA PCIe device specific book-keeping */ +#define XDEV_FLAG_OFFLINE 0x1 struct xdma_dev { struct list_head list_head; struct list_head rcu_node; unsigned long magic; /* structure ID for sanity checks */ - struct pci_dev *pci_dev; /* pci device struct from probe() */ + struct pci_dev *pdev; /* pci device struct from probe() */ int idx; /* dev index */ + const char *mod_name; /* name of module owning the dev */ + + spinlock_t lock; /* protects concurrent access */ + unsigned int flags; + /* PCIe BAR management */ void *__iomem bar[XDMA_BAR_NUM]; /* addresses for mapped BARs */ int user_bar_idx; /* BAR index of user logic */ @@ -365,16 +516,25 @@ struct xdma_dev { int regions_in_use; /* flag if dev was in use during probe() */ int got_regions; /* flag if probe() obtained the regions */ + int user_max; + int c2h_channel_max; + int h2c_channel_max; + /* Interrupt management */ int irq_count; /* interrupt counter */ int irq_line; /* flag if irq allocated successfully */ int msi_enabled; /* flag if msi was enabled for the device */ int msix_enabled; /* flag if msi-x was enabled for the device */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0) struct msix_entry entry[32]; /* msi-x vector/entry table */ - struct xdma_irq user_irq[16]; /* user IRQ management */ +#endif + struct xdma_user_irq user_irq[16]; /* user IRQ management */ + unsigned int mask_irq_user; /* XDMA engine management */ int engines_num; /* Total engine count */ + u32 mask_irq_h2c; + u32 mask_irq_c2h; struct xdma_engine engine_h2c[XDMA_CHANNEL_NUM_MAX]; struct xdma_engine engine_c2h[XDMA_CHANNEL_NUM_MAX]; @@ -383,10 +543,72 @@ struct xdma_dev { u64 feature_id; }; +static inline int xdma_device_flag_check(struct xdma_dev *xdev, unsigned int f) +{ + unsigned long flags; + + spin_lock_irqsave(&xdev->lock, flags); + if (xdev->flags & f) { + spin_unlock_irqrestore(&xdev->lock, flags); + return 1; + } + spin_unlock_irqrestore(&xdev->lock, flags); + return 0; +} + +static inline int xdma_device_flag_test_n_set(struct xdma_dev *xdev, + unsigned int f) +{ + unsigned long flags; + int rv = 0; + + spin_lock_irqsave(&xdev->lock, flags); + if (xdev->flags & f) { + spin_unlock_irqrestore(&xdev->lock, flags); + rv = 1; + } else + xdev->flags |= f; + spin_unlock_irqrestore(&xdev->lock, flags); + return rv; +} + +static inline void xdma_device_flag_set(struct xdma_dev *xdev, unsigned int f) +{ + unsigned long flags; + + spin_lock_irqsave(&xdev->lock, flags); + xdev->flags |= f; + spin_unlock_irqrestore(&xdev->lock, flags); +} + +static inline void xdma_device_flag_clear(struct xdma_dev *xdev, unsigned int f) +{ + unsigned long flags; + + spin_lock_irqsave(&xdev->lock, flags); + xdev->flags &= ~f; + spin_unlock_irqrestore(&xdev->lock, flags); +} + void write_register(u32 value, void *iomem); u32 read_register(void *iomem); struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev); +void xdma_device_offline(struct pci_dev *pdev, void *dev_handle); +void xdma_device_online(struct pci_dev *pdev, void *dev_handle); + +#ifdef INTERNAL_TESTING +int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine); +struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine); +void enable_perf(struct xdma_engine *engine); +void get_perf_stats(struct xdma_engine *engine); + +int xdma_cyclic_transfer_setup(struct xdma_engine *engine); +int xdma_cyclic_transfer_teardown(struct xdma_engine *engine); +ssize_t xdma_engine_read_cyclic(struct xdma_engine *, char __user *, size_t, + int); +#endif +int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg); #endif /* XDMA_LIB_H */ diff --git a/sdk/linux_kernel_drivers/edma/libxdma_api.h b/sdk/linux_kernel_drivers/edma/libxdma_api.h index cfab2bbd5..bf043eb12 100644 --- a/sdk/linux_kernel_drivers/edma/libxdma_api.h +++ b/sdk/linux_kernel_drivers/edma/libxdma_api.h @@ -1,19 +1,26 @@ +/******************************************************************************* + * + * Xilinx XDMA IP Core Linux Driver + * + * Copyright(c) Sidebranch. + * Copyright(c) Xilinx, Inc. + * + * Karen Xie + * Leon Woestenberg + * + ******************************************************************************/ + #ifndef __XDMA_BASE_API_H__ #define __XDMA_BASE_API_H__ #include -#include #include #include + /* * functions exported by the xdma driver */ -typedef struct { - void* h2c; - void* c2h; -} xdma_channel_tuple; - typedef struct { u64 write_submitted; u64 write_completed; @@ -39,13 +46,17 @@ typedef struct { * user interrupt will not enabled until xdma_user_isr_enable() * is called * @pdev: ptr to pci_dev - * @channel_list: a list of pointers to the xdma channels - * each channle is a tupple of C2H and H2C + * @mod_name: the module name to be used for request_irq + * @user_max: max # of user/event (interrupts) to be configured + * @channel_max: max # of c2h and h2c channels to be configured + * NOTE: if the user/channel provisioned is less than the max specified, + * libxdma will update the user_max/channel_max * returns - * < 0, error in initialization - * >=0, # of xdma channels (i.e., the size of the channel_list) + * a opaque handle (for libxdma to identify the device) + * NULL, in case of error */ -int xdma_device_open(struct pci_dev *pdev, xdma_channel_tuple **tuples); +void *xdma_device_open(const char *mod_name, struct pci_dev *pdev, + int *user_max, int *h2c_channel_max, int *c2h_channel_max); /* * xdma_device_close - prepare fpga for removal: disable all interrupts (users @@ -54,8 +65,7 @@ int xdma_device_open(struct pci_dev *pdev, xdma_channel_tuple **tuples); * @pdev: ptr to struct pci_dev * @tuples: from xdma_device_open() */ -void xdma_device_close(struct pci_dev *pdev, xdma_channel_tuple *tuples); - +void xdma_device_close(struct pci_dev *pdev, void *dev_handle); /* * xdma_device_restart - restart the fpga @@ -65,7 +75,7 @@ void xdma_device_close(struct pci_dev *pdev, xdma_channel_tuple *tuples); * return < 0 in case of error * TODO: exact error code will be defined later */ -int xdma_device_restart(struct pci_dev *pdev); +int xdma_device_restart(struct pci_dev *pdev, void *dev_handle); /* * xdma_user_isr_register - register a user ISR handler @@ -86,8 +96,8 @@ int xdma_device_restart(struct pci_dev *pdev); * return < 0 in case of error * TODO: exact error code will be defined later */ -int xdma_user_isr_register(struct pci_dev *pdev, unsigned int mask, - irq_handler_t handler, const char *name, void *dev); +int xdma_user_isr_register(void *dev_hndl, unsigned int mask, + irq_handler_t handler, void *dev); /* * xdma_user_isr_enable/disable - enable or disable user interrupt @@ -96,13 +106,15 @@ int xdma_user_isr_register(struct pci_dev *pdev, unsigned int mask, * return < 0 in case of error * TODO: exact error code will be defined later */ -int xdma_user_isr_enable(struct pci_dev *pdev, unsigned int mask); -int xdma_user_isr_disable(struct pci_dev *pdev, unsigned int mask); +int xdma_user_isr_enable(void *dev_hndl, unsigned int mask); +int xdma_user_isr_disable(void *dev_hndl, unsigned int mask); /* * xdma_xfer_submit - submit data for dma operation (for both read and write) * This is a blocking call - * @channel: ptr to channel (obtained via xdma_device_open) + * @channel: channle number (< channel_max) + * == channel_max means libxdma can pick any channel available:q + * @dir: DMA_FROM/TO_DEVICE * @offset: offset into the DDR/BRAM memory to read from or write to * @sg_tbl: the scatter-gather list of data buffers @@ -111,8 +123,8 @@ int xdma_user_isr_disable(struct pci_dev *pdev, unsigned int mask); * < 0 in case of error * TODO: exact error code will be defined later */ -int xdma_xfer_submit(void *channel, enum dma_data_direction dir, u64 offset, - struct sg_table *sgt, int dma_mapped, int timeout_ms); +ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, + struct sg_table *sgt, bool dma_mapped, int timeout_ms); /////////////////////missing API//////////////////// diff --git a/sdk/linux_kernel_drivers/edma/user_defined_interrupts_README.md b/sdk/linux_kernel_drivers/edma/user_defined_interrupts_README.md index 2365b361f..a98e47ae2 100644 --- a/sdk/linux_kernel_drivers/edma/user_defined_interrupts_README.md +++ b/sdk/linux_kernel_drivers/edma/user_defined_interrupts_README.md @@ -2,7 +2,7 @@ AWS FPGA provides options for Custom Logic (CL) to generate user-defined interrupt events, sent to the instance via MSI-X message. -At the hardware level, these interrupt event are defined in [AWS Shell Interface Specification](https://github.com/aws/aws-fpga/master/blob/hdl/docs/AWS_Shell_Interface_Specification.md) +At the hardware level, these interrupt event are defined in [AWS Shell Interface Specification](../../../hdk/docs/AWS_Shell_Interface_Specification.md) @@ -69,9 +69,7 @@ The next example shows how an application can register to two events (aka user-d **Q: How can I toggle an interrupt event from within the CL?** -Toggling of user interrupt event by toggling the `cl_sh_apppf_int_req` interface to an MSI-X, which gets translated to an event in Linux userspace that an application can poll() on. Follow [AWS Shell Interface Spec](../hdk/docs/AWS_Shell_Interface_Specification.md) for the hardware interface details. - - +Toggling of user interrupt event by toggling the `cl_sh_apppf_int_req` interface to an MSI-X, which gets translated to an event in Linux userspace that an application can poll() on. Follow [AWS Shell Interface Spec](../../../hdk/docs/AWS_Shell_Interface_Specification.md) for the hardware interface details. **Q: How do I stop interrupts/events?** @@ -132,10 +130,3 @@ EDMA implementation keeps a state per interrupt event that indicates it has been **Q: Which MSI-X entries are used for the user-defined interrupts?** The EDMA Linux kernel driver maps the CL user-defined interrupts to MSI-X entries 16 to 31, which is mapped to /dev/fpgaX/event0 through 15. This is handled by the EDMA driver and user intervention is not required. - - - -**Q: What if I want to build an in-kernel interrupt service routine for some of the user-defined interrupts?** - -A reference in-kernel driver interrupt handler is provided for user modification in [EDMA source directory](./src/example_kernel_interrupt.c). - diff --git a/sdk/linux_kernel_drivers/xdma/10-xdma.rules b/sdk/linux_kernel_drivers/xdma/10-xdma.rules old mode 100644 new mode 100755 index 77eca9f4e..6085f70e7 --- a/sdk/linux_kernel_drivers/xdma/10-xdma.rules +++ b/sdk/linux_kernel_drivers/xdma/10-xdma.rules @@ -1 +1 @@ -KERNEL=="xdma*",MODE="0666",SYMLINK="%k" +KERNEL=="xdma*",MODE="0666",SYMLINK="%k" diff --git a/sdk/linux_kernel_drivers/xdma/Makefile b/sdk/linux_kernel_drivers/xdma/Makefile old mode 100644 new mode 100755 index 9c5d223d6..78a83d076 --- a/sdk/linux_kernel_drivers/xdma/Makefile +++ b/sdk/linux_kernel_drivers/xdma/Makefile @@ -17,12 +17,12 @@ EXTRA_CFLAGS := -I$(topdir)/include $(XVC_FLAGS) #EXTRA_CFLAGS += -D__LIBXDMA_DEBUG__ ifneq ($(KERNELRELEASE),) - $(TARGET_MODULE)-objs := libxdma.o xdma_cdev.o cdev_ctrl.o cdev_events.o cdev_sgdma.o cdev_xvc.o xdma_mod.o + $(TARGET_MODULE)-objs := libxdma.o xdma_cdev.o cdev_ctrl.o cdev_events.o cdev_sgdma.o cdev_xvc.o cdev_bypass.o xdma_mod.o obj-m := $(TARGET_MODULE).o else BUILDSYSTEM_DIR:=/lib/modules/$(shell uname -r)/build PWD:=$(shell pwd) -all : +all : $(MAKE) -C $(BUILDSYSTEM_DIR) M=$(PWD) modules clean: @@ -30,9 +30,13 @@ clean: install: all $(MAKE) -C $(BUILDSYSTEM_DIR) M=$(PWD) modules_install - depmod -a + depmod -a install -m 644 10-xdma.rules /etc/udev/rules.d -rmmod -s xdma || true - -modprobe xdma + -modprobe xdma endif + +ifneq ($(ec2),1) + CFLAGS_xdma_mod.o := -DINTERNAL_TESTING +endif diff --git a/sdk/linux_kernel_drivers/xdma/cdev_bypass.c b/sdk/linux_kernel_drivers/xdma/cdev_bypass.c new file mode 100644 index 000000000..6f07c2a27 --- /dev/null +++ b/sdk/linux_kernel_drivers/xdma/cdev_bypass.c @@ -0,0 +1,168 @@ +#include "libxdma_api.h" +#include "xdma_cdev.h" + +#define write_register(v,mem,off) iowrite32(v, mem) + +static int copy_desc_data(struct xdma_transfer *transfer, char __user *buf, + size_t *buf_offset, size_t buf_size) +{ + int i; + int copy_err; + int rc = 0; + + BUG_ON(!buf); + BUG_ON(!buf_offset); + + /* Fill user buffer with descriptor data */ + for (i = 0; i < transfer->desc_num; i++) { + if (*buf_offset + sizeof(struct xdma_desc) <= buf_size) { + copy_err = copy_to_user(&buf[*buf_offset], + transfer->desc_virt + i, + sizeof(struct xdma_desc)); + + if (copy_err) { + dbg_sg("Copy to user buffer failed\n"); + *buf_offset = buf_size; + rc = -EINVAL; + } else { + *buf_offset += sizeof(struct xdma_desc); + } + } else { + rc = -ENOMEM; + } + } + + return rc; +} + +static ssize_t char_bypass_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct xdma_dev *xdev; + struct xdma_engine *engine; + struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; + struct xdma_transfer *transfer; + struct list_head *idx; + size_t buf_offset = 0; + int rc = 0; + + rc = xcdev_check(__func__, xcdev, 1); + if (rc < 0) + return rc; + xdev = xcdev->xdev; + engine = xcdev->engine; + + dbg_sg("In char_bypass_read()\n"); + + if (count & 3) { + dbg_sg("Buffer size must be a multiple of 4 bytes\n"); + return -EINVAL; + } + + if (!buf) { + dbg_sg("Caught NULL pointer\n"); + return -EINVAL; + } + + if (xdev->bypass_bar_idx < 0) { + dbg_sg("Bypass BAR not present - unsupported operation\n"); + return -ENODEV; + } + + spin_lock(&engine->lock); + + if (!list_empty(&engine->transfer_list)) { + list_for_each(idx, &engine->transfer_list) { + transfer = list_entry(idx, struct xdma_transfer, entry); + + rc = copy_desc_data(transfer, buf, &buf_offset, count); + } + } + + spin_unlock(&engine->lock); + + if (rc < 0) + return rc; + else + return buf_offset; +} + +static ssize_t char_bypass_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct xdma_dev *xdev; + struct xdma_engine *engine; + struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; + + u32 desc_data; + u32 *bypass_addr; + size_t buf_offset = 0; + int rc = 0; + int copy_err; + + rc = xcdev_check(__func__, xcdev, 1); + if (rc < 0) + return rc; + xdev = xcdev->xdev; + engine = xcdev->engine; + + if (count & 3) { + dbg_sg("Buffer size must be a multiple of 4 bytes\n"); + return -EINVAL; + } + + if (!buf) { + dbg_sg("Caught NULL pointer\n"); + return -EINVAL; + } + + if (xdev->bypass_bar_idx < 0) { + dbg_sg("Bypass BAR not present - unsupported operation\n"); + return -ENODEV; + } + + dbg_sg("In char_bypass_write()\n"); + + spin_lock(&engine->lock); + + /* Write descriptor data to the bypass BAR */ + bypass_addr = (u32 *)xdev->bar[xdev->bypass_bar_idx]; + bypass_addr += engine->bypass_offset; + while (buf_offset < count) { + copy_err = copy_from_user(&desc_data, &buf[buf_offset], + sizeof(u32)); + if (!copy_err) { + write_register(desc_data, bypass_addr, bypass_addr - engine->bypass_offset); + buf_offset += sizeof(u32); + rc = buf_offset; + } else { + dbg_sg("Error reading data from userspace buffer\n"); + rc = -EINVAL; + break; + } + } + + spin_unlock(&engine->lock); + + + return rc; +} + + +/* + * character device file operations for bypass operation + */ + +static const struct file_operations bypass_fops = { + .owner = THIS_MODULE, + .open = char_open, + .release = char_close, + .read = char_bypass_read, + .write = char_bypass_write, + .mmap = bridge_mmap, +}; + +void cdev_bypass_init(struct xdma_cdev *xcdev) +{ + cdev_init(&xcdev->cdev, &bypass_fops); +} diff --git a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c index 31d823a34..404bbd7fa 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -124,7 +136,7 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return rv; xdev = xcdev->xdev; -// pr_info("cmd 0x%x, xdev 0x%p, pdev 0x%p.\n", cmd, xdev, xdev->pdev); + pr_info("cmd 0x%x, xdev 0x%p, pdev 0x%p.\n", cmd, xdev, xdev->pdev); if (_IOC_TYPE(cmd) != XDMA_IOC_MAGIC) { pr_err("cmd %u, bad magic 0x%x/0x%x.\n", @@ -181,7 +193,7 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } /* maps the PCIe BAR into user space for memory-like access using mmap() */ -static int bridge_mmap(struct file *file, struct vm_area_struct *vma) +int bridge_mmap(struct file *file, struct vm_area_struct *vma) { struct xdma_dev *xdev; struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; diff --git a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h index 1b9387b42..47e697cd6 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef _XDMA_IOCALLS_POSIX_H_ diff --git a/sdk/linux_kernel_drivers/xdma/cdev_events.c b/sdk/linux_kernel_drivers/xdma/cdev_events.c index fb29b42d9..514aaf43b 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_events.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_events.c @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ diff --git a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c index 5f78527ed..31854f92a 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -16,6 +28,11 @@ #include "xdma_cdev.h" #include "cdev_sgdma.h" +/* Module Parameters */ +unsigned int sgdma_timeout = 10; +module_param(sgdma_timeout, uint, 0644); +MODULE_PARM_DESC(sgdma_timeout, "timeout in seconds for sgdma, default is 10 sec."); + /* * character device file operations for SG DMA engine */ @@ -129,7 +146,7 @@ static inline void xdma_io_cb_release(struct xdma_io_cb *cb) memset(cb, 0, sizeof(*cb)); } -static void char_sgdma_unmap_user_buf(struct xdma_io_cb *cb) +static void char_sgdma_unmap_user_buf(struct xdma_io_cb *cb, bool write) { int i; @@ -139,9 +156,11 @@ static void char_sgdma_unmap_user_buf(struct xdma_io_cb *cb) return; for (i = 0; i < cb->pages_nr; i++) { - if (cb->pages[i]) + if (cb->pages[i]) { + if (!write) + set_page_dirty_lock(cb->pages[i]); put_page(cb->pages[i]); - else + } else break; } @@ -215,7 +234,7 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) flush_dcache_page(cb->pages[i]); sg_set_page(sg, cb->pages[i], nbytes, offset); -//pr_err("sg %d,0x%p, off %u, len %u, page 0x%p.\n", i, sg, offset, nbytes, cb->pages[i]); + buf += nbytes; len -= nbytes; } @@ -225,7 +244,7 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) return 0; err_out: - char_sgdma_unmap_user_buf(cb); + char_sgdma_unmap_user_buf(cb, write); return rv; } @@ -271,12 +290,12 @@ static ssize_t char_sgdma_read_write(struct file *file, char __user *buf, return rv; res = xdma_xfer_submit(xdev, engine->channel, write, *pos, &cb.sgt, - 0, 10000); + 0, sgdma_timeout * 1000); //pr_err("xfer_submit return=%lld.\n", (s64)res); //interrupt_status(xdev); - char_sgdma_unmap_user_buf(&cb); + char_sgdma_unmap_user_buf(&cb, write); return res; } @@ -301,9 +320,13 @@ static ssize_t char_sgdma_read(struct file *file, char __user *buf, engine = xcdev->engine; - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) + if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { + rv = xdma_cyclic_transfer_setup(engine); + if (rv < 0 && rv != -EBUSY) + return rv; /* 600 sec. timeout */ return xdma_engine_read_cyclic(engine, buf, count, 600000); + } return char_sgdma_read_write(file, (char *)buf, count, pos, 0); } @@ -413,6 +436,33 @@ static int ioctl_do_perf_get(struct xdma_engine *engine, unsigned long arg) return 0; } +static int ioctl_do_addrmode_set(struct xdma_engine *engine, unsigned long arg) +{ + return engine_addrmode_set(engine, arg); +} + +static int ioctl_do_addrmode_get(struct xdma_engine *engine, unsigned long arg) +{ + int rv; + unsigned long src; + + BUG_ON(!engine); + src = !!engine->non_incr_addr; + + dbg_perf("IOCTL_XDMA_ADDRMODE_GET\n"); + rv = put_user(src, (int __user *)arg); + + return rv; +} + +static int ioctl_do_align_get(struct xdma_engine *engine, unsigned long arg) +{ + BUG_ON(!engine); + + dbg_perf("IOCTL_XDMA_ALIGN_GET\n"); + return put_user(engine->addr_align, (int __user *)arg); +} + static long char_sgdma_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -433,15 +483,21 @@ static long char_sgdma_ioctl(struct file *file, unsigned int cmd, case IOCTL_XDMA_PERF_START: rv = ioctl_do_perf_start(engine, arg); break; - case IOCTL_XDMA_PERF_STOP: rv = ioctl_do_perf_stop(engine, arg); break; - case IOCTL_XDMA_PERF_GET: rv = ioctl_do_perf_get(engine, arg); break; - + case IOCTL_XDMA_ADDRMODE_SET: + rv = ioctl_do_addrmode_set(engine, arg); + break; + case IOCTL_XDMA_ADDRMODE_GET: + rv = ioctl_do_addrmode_get(engine, arg); + break; + case IOCTL_XDMA_ALIGN_GET: + rv = ioctl_do_align_get(engine, arg); + break; default: dbg_perf("Unsupported operation\n"); rv = -EINVAL; @@ -461,8 +517,12 @@ static int char_sgdma_open(struct inode *inode, struct file *file) xcdev = (struct xdma_cdev *)file->private_data; engine = xcdev->engine; - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) - return xdma_cyclic_transfer_setup(engine); + if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { + if (engine->device_open == 1) + return -EBUSY; + else + engine->device_open = 1; + } return 0; } @@ -479,8 +539,11 @@ static int char_sgdma_close(struct inode *inode, struct file *file) engine = xcdev->engine; - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) - return xdma_cyclic_transfer_teardown(engine); + if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { + engine->device_open = 0; + if (engine->cyclic_req) + return xdma_cyclic_transfer_teardown(engine); + } return 0; } diff --git a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h index 395da22d7..c67bf99f5 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef _XDMA_IOCALLS_POSIX_H_ diff --git a/sdk/linux_kernel_drivers/xdma/cdev_xvc.c b/sdk/linux_kernel_drivers/xdma/cdev_xvc.c index 3a98dc1f6..adafa7fc8 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_xvc.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_xvc.c @@ -1,13 +1,26 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie * ******************************************************************************/ - #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include "xdma_cdev.h" diff --git a/sdk/linux_kernel_drivers/xdma/cdev_xvc.h b/sdk/linux_kernel_drivers/xdma/cdev_xvc.h index ac07650b2..de9473a37 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_xvc.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_xvc.h @@ -1,15 +1,26 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ - #ifndef __XVC_IOCTL_H__ #define __XVC_IOCTL_H__ diff --git a/sdk/linux_kernel_drivers/xdma/libxdma.c b/sdk/linux_kernel_drivers/xdma/libxdma.c old mode 100644 new mode 100755 index 3db6c6bf4..32523e500 --- a/sdk/linux_kernel_drivers/xdma/libxdma.c +++ b/sdk/linux_kernel_drivers/xdma/libxdma.c @@ -1,14 +1,27 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ + #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include @@ -53,6 +66,10 @@ static unsigned int enable_credit_mp; module_param(enable_credit_mp, uint, 0644); MODULE_PARM_DESC(enable_credit_mp, "Set 1 to enable creidt feature, default is 0 (no credit control)"); +unsigned int desc_blen_max = XDMA_DESC_BLEN_MAX; +module_param(desc_blen_max, uint, 0644); +MODULE_PARM_DESC(desc_blen_max, "per descriptor max. buffer length, default is (1 << 28) - 1"); + /* * xdma device management * maintains a list of the xdma devices @@ -287,7 +304,11 @@ void get_perf_stats(struct xdma_engine *engine) u32 lo; BUG_ON(!engine); - BUG_ON(!engine->xdma_perf); + + if (!engine->xdma_perf) { + pr_info("%s perf struct not set up.\n", engine->name); + return; + } hi = 0; lo = read_register(&engine->regs->completed_desc_count); @@ -362,7 +383,7 @@ static void engine_status_dump(struct xdma_engine *engine) int len = 0; len = sprintf(buf, "SG engine %s status: 0x%08x: ", engine->name, v); - + if ((v & XDMA_STAT_BUSY)) len += sprintf(buf + len, "BUSY,"); if ((v & XDMA_STAT_DESC_STOPPED)) @@ -370,7 +391,7 @@ static void engine_status_dump(struct xdma_engine *engine) if ((v & XDMA_STAT_DESC_COMPLETED)) len += sprintf(buf + len, "DESC_COMPL,"); - /* common H2C & C2H */ + /* common H2C & C2H */ if ((v & XDMA_STAT_COMMON_ERR_MASK)) { if ((v & XDMA_STAT_ALIGN_MISMATCH)) len += sprintf(buf + len, "ALIGN_MISMATCH "); @@ -408,7 +429,7 @@ static void engine_status_dump(struct xdma_engine *engine) len += sprintf(buf + len, "SLAVE_ERR "); buf[len - 1] = ','; } - + } else { /* C2H only */ if ((v & XDMA_STAT_C2H_R_ERR_MASK)) { @@ -421,7 +442,7 @@ static void engine_status_dump(struct xdma_engine *engine) } } - /* common H2C & C2H */ + /* common H2C & C2H */ if ((v & XDMA_STAT_DESC_ERR_MASK)) { len += sprintf(buf + len, "DESC_ERR:"); if ((v & XDMA_STAT_DESC_UNSUPP_REQ)) @@ -531,7 +552,7 @@ static void engine_start_mode_config(struct xdma_engine *engine) if (poll_mode) { w |= (u32)XDMA_CTRL_POLL_MODE_WB; - } else { + } else { w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; @@ -659,7 +680,11 @@ struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, struct xdma_transfer *transfer) { BUG_ON(!engine); - BUG_ON(!transfer); + + if (unlikely(!transfer)) { + pr_info("%s: xfer empty.\n", engine->name); + return NULL; + } /* synchronous I/O? */ /* awake task on transfer's wait queue */ @@ -674,7 +699,7 @@ struct xdma_transfer *engine_service_transfer_list(struct xdma_engine *engine, BUG_ON(!engine); BUG_ON(!pdesc_completed); - if (!transfer) { + if (unlikely(!transfer)) { pr_info("%s xfer empty, pdesc completed %u.\n", engine->name, *pdesc_completed); return NULL; @@ -737,7 +762,7 @@ static void engine_err_handle(struct xdma_engine *engine, engine->name, engine->status, transfer, desc_completed, transfer->desc_num); } - + /* mark transfer as failed */ transfer->state = TRANSFER_STATE_FAILED; xdma_engine_stop(engine); @@ -747,11 +772,14 @@ struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, struct xdma_transfer *transfer, u32 *pdesc_completed) { BUG_ON(!engine); - BUG_ON(!transfer); BUG_ON(!pdesc_completed); /* inspect the current transfer */ - if (transfer) { + if (unlikely(!transfer)) { + pr_info("%s xfer empty, pdesc completed %u.\n", + engine->name, *pdesc_completed); + return NULL; + } else { if (((engine->dir == DMA_FROM_DEVICE) && (engine->status & XDMA_STAT_C2H_ERR_MASK)) || ((engine->dir == DMA_TO_DEVICE) && @@ -764,7 +792,7 @@ struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, } if (engine->status & XDMA_STAT_BUSY) - dbg_tfr("Engine %s is unexpectedly busy - ignoring\n", + pr_debug("engine %s is unexpectedly busy - ignoring\n", engine->name); /* the engine stopped on current transfer? */ @@ -801,7 +829,7 @@ struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, * transfer has completed */ transfer = engine_transfer_completion(engine, transfer); - } + } return transfer; } @@ -841,8 +869,11 @@ static void engine_transfer_dequeue(struct xdma_engine *engine) /* pick first transfer on the queue (was submitted to the engine) */ transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, entry); - BUG_ON(!transfer); - BUG_ON(transfer != &engine->cyclic_req->xfer); + if (!transfer || transfer != &engine->cyclic_req->xfer) { + pr_info("%s, xfer 0x%p != 0x%p.\n", + engine->name, transfer, &engine->cyclic_req->xfer); + return; + } dbg_tfr("%s engine completed cyclic transfer 0x%p (%d desc).\n", engine->name, transfer, transfer->desc_num); /* remove completed transfer from list */ @@ -1006,7 +1037,7 @@ static void engine_service_resume(struct xdma_engine *engine) if (!list_empty(&engine->transfer_list)) { /* (re)start engine */ transfer_started = engine_start(engine); - dbg_tfr("re-started %s engine with pending xfer 0x%p\n", + pr_info("re-started %s engine with pending xfer 0x%p\n", engine->name, transfer_started); /* engine was requested to be shutdown? */ } else if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { @@ -1149,7 +1180,7 @@ static void engine_service_work(struct work_struct *work) (unsigned long)(&engine->regs)); } else channel_interrupts_enable(engine->xdev, engine->irq_bitmask); - + /* unlock the engine */ spin_unlock_irqrestore(&engine->lock, flags); } @@ -1180,7 +1211,7 @@ static u32 engine_service_wb_monitor(struct xdma_engine *engine, break; else if (desc_wb == expected_wb) break; - + /* RTO - prevent system from hanging in polled mode */ if (time_after(jiffies, timeout)) { dbg_tfr("Polling timeout occurred"); @@ -1633,7 +1664,7 @@ static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) } /* - * MSI-X interrupt: + * MSI-X interrupt: * vectors, followed by vectors */ @@ -1778,7 +1809,7 @@ static void prog_irq_msix_user(struct xdma_dev *xdev, bool clear) else for (k = 0; k < 4 && i < max; i++, k++, shift += 8) val |= (i & 0x1f) << shift; - + write_register(val, &int_regs->user_msi_vector[j], XDMA_OFS_INT_CTRL + ((unsigned long)&int_regs->user_msi_vector[j] - @@ -1788,7 +1819,7 @@ static void prog_irq_msix_user(struct xdma_dev *xdev, bool clear) } } -static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) +static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) { struct interrupt_regs *int_regs = (struct interrupt_regs *) (xdev->bar[xdev->config_bar_idx] + @@ -1808,7 +1839,7 @@ static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) else for (k = 0; k < 4 && i < max; i++, k++, shift += 8) val |= (i & 0x1f) << shift; - + write_register(val, &int_regs->channel_msi_vector[j], XDMA_OFS_INT_CTRL + ((unsigned long)&int_regs->channel_msi_vector[j] - @@ -1925,7 +1956,7 @@ static int irq_msix_user_setup(struct xdma_dev *xdev) { int i; int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - int rv = 0; + int rv = 0; /* vectors set in probe_scan_for_msi() */ for (i = 0; i < xdev->user_max; i++, j++) { @@ -2457,7 +2488,7 @@ static void engine_destroy(struct xdma_dev *xdev, struct xdma_engine *engine) u32 reg_value = (0x1 << engine->channel) << 16; struct sgdma_common_regs *reg = (struct sgdma_common_regs *) (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); + (0x6*TARGET_SPACING)); write_register(reg_value, ®->credit_mode_enable_w1c, 0); } @@ -2535,12 +2566,12 @@ static int engine_writeback_setup(struct xdma_engine *engine) dbg_init("Setting writeback location to 0x%llx for engine %p", engine->poll_mode_bus, engine); w = cpu_to_le32(PCI_DMA_L(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_lo, - (unsigned long)(&engine->regs->poll_mode_wb_lo) - + write_register(w, &engine->regs->poll_mode_wb_lo, + (unsigned long)(&engine->regs->poll_mode_wb_lo) - (unsigned long)(&engine->regs)); w = cpu_to_le32(PCI_DMA_H(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_hi, - (unsigned long)(&engine->regs->poll_mode_wb_hi) - + write_register(w, &engine->regs->poll_mode_wb_hi, + (unsigned long)(&engine->regs->poll_mode_wb_hi) - (unsigned long)(&engine->regs)); return 0; @@ -2608,7 +2639,7 @@ static int engine_init_regs(struct xdma_engine *engine) u32 reg_value = (0x1 << engine->channel) << 16; struct sgdma_common_regs *reg = (struct sgdma_common_regs *) (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); + (0x6*TARGET_SPACING)); write_register(reg_value, ®->credit_mode_enable_w1s, 0); } @@ -2759,7 +2790,7 @@ static int transfer_build(struct xdma_engine *engine, if (!engine->non_incr_addr) req->ep_addr += sdesc->len; } - req->sw_desc_idx += desc_max; + req->sw_desc_idx += desc_max; return 0; } @@ -2785,7 +2816,7 @@ static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req xfer->desc_bus = engine->desc_bus; transfer_desc_init(xfer, desc_max); - + dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)xfer->desc_bus); transfer_build(engine, req, desc_max); @@ -2821,7 +2852,7 @@ static void sgt_dump(struct sg_table *sgt) for (i = 0; i < sgt->orig_nents; i++, sg = sg_next(sg)) pr_info("%d, 0x%p, pg 0x%p,%u+%u, dma 0x%llx,%u.\n", i, sg, sg_page(sg), sg->offset, sg->length, - sg_dma_address(sg), sg_dma_len(sg)); + sg_dma_address(sg), sg_dma_len(sg)); } static void xdma_request_cb_dump(struct xdma_request_cb *req) @@ -2879,8 +2910,8 @@ static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, for (i = 0; i < max; i++, sg = sg_next(sg)) { unsigned int len = sg_dma_len(sg); - if (unlikely(len > XDMA_DESC_BLEN_MAX)) - extra += len >> XDMA_DESC_BLEN_BITS; + if (unlikely(len > desc_blen_max)) + extra += (len + desc_blen_max - 1) / desc_blen_max; } //pr_info("ep 0x%llx, desc %u+%u.\n", ep_addr, max, extra); @@ -2890,20 +2921,20 @@ static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, if (!req) return NULL; - req->sgt = sgt; + req->sgt = sgt; req->ep_addr = ep_addr; for (i = 0, sg = sgt->sgl; i < sgt->nents; i++, sg = sg_next(sg)) { unsigned int tlen = sg_dma_len(sg); - dma_addr_t addr = sg_dma_address(sg); + dma_addr_t addr = sg_dma_address(sg); req->total_len += tlen; while (tlen) { req->sdesc[j].addr = addr; - if (tlen > XDMA_DESC_BLEN_MAX) { - req->sdesc[j].len = XDMA_DESC_BLEN_MAX; - addr += XDMA_DESC_BLEN_MAX; - tlen -= XDMA_DESC_BLEN_MAX; + if (tlen > desc_blen_max) { + req->sdesc[j].len = desc_blen_max; + addr += desc_blen_max; + tlen -= desc_blen_max; } else { req->sdesc[j].len = tlen; tlen = 0; @@ -3002,7 +3033,7 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, /* one transfer at a time */ spin_lock(&engine->desc_lock); - /* build transfer */ + /* build transfer */ rv = transfer_init(engine, req); if (rv < 0) { spin_unlock(&engine->desc_lock); @@ -3095,9 +3126,10 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, rv = -ERESTARTSYS; break; } - spin_unlock(&engine->desc_lock); transfer_destroy(xdev, xfer); + spin_unlock(&engine->desc_lock); + if (rv < 0) goto unmap_sgl; } /* while (sg) */ @@ -3508,12 +3540,12 @@ void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, if (xdev->user_max == 0 || xdev->user_max > MAX_USER_IRQ) xdev->user_max = MAX_USER_IRQ; if (xdev->h2c_channel_max == 0 || - xdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) + xdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) xdev->h2c_channel_max = XDMA_CHANNEL_NUM_MAX; if (xdev->c2h_channel_max == 0 || - xdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) + xdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) xdev->c2h_channel_max = XDMA_CHANNEL_NUM_MAX; - + rv = pci_enable_device(pdev); if (rv) { dbg_init("pci_enable_device() failed, %d.\n", rv); @@ -3666,7 +3698,7 @@ pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); unsigned long flags; engine = &xdev->engine_h2c[i]; - + if (engine->magic == MAGIC_ENGINE) { spin_lock_irqsave(&engine->lock, flags); engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; @@ -3744,7 +3776,7 @@ pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); user_interrupts_enable(xdev, xdev->mask_irq_user); read_interrupts(xdev); } - + xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); pr_info("xdev 0x%p, done.\n", xdev); } @@ -3820,7 +3852,7 @@ int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) return -EINVAL; - + xdev->mask_irq_user &= ~mask; user_interrupts_disable(xdev, mask); read_interrupts(xdev); @@ -3867,7 +3899,7 @@ static int transfer_monitor_cyclic(struct xdma_engine *engine, BUG_ON(!result); if (poll_mode) { - int i ; + int i ; for (i = 0; i < 5; i++) { rc = engine_service_poll(engine, 0); if (rc) { @@ -3939,7 +3971,7 @@ static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, /* EOP found? Transfer anything from head to EOP */ while (more) { - unsigned int copy = more > PAGE_SIZE ? PAGE_SIZE : more; + unsigned int copy = more > PAGE_SIZE ? PAGE_SIZE : more; unsigned int blen = count - engine->user_buffer_index; int rv; @@ -3965,7 +3997,7 @@ static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, /* user buffer used up */ break; } - + head++; if (head >= CYCLIC_RX_PAGES_MAX) { head = 0; @@ -4004,7 +4036,7 @@ static int complete_cyclic(struct xdma_engine *engine, char __user *buf, WARN_ON(result[engine->rx_head].status==0); dbg_tfr("%s, result[%d].status = 0x%x length = 0x%x.\n", - engine->name, engine->rx_head, + engine->name, engine->rx_head, result[engine->rx_head].status, result[engine->rx_head].length); @@ -4026,7 +4058,7 @@ static int complete_cyclic(struct xdma_engine *engine, char __user *buf, /* valid result */ } else { pkt_length += result[engine->rx_head].length; - num_credit++; + num_credit++; /* seen eop? */ //if (result[engine->rx_head].status & RX_STATUS_EOP) if (result[engine->rx_head].status & RX_STATUS_EOP){ @@ -4054,9 +4086,9 @@ static int complete_cyclic(struct xdma_engine *engine, char __user *buf, if (fault) return -EIO; - + rc = copy_cyclic_to_user(engine, pkt_length, head, buf, count); - engine->rx_overrun = 0; + engine->rx_overrun = 0; /* if copy is successful, release credits */ if(rc > 0) write_register(num_credit,&engine->sgdma_regs->credits, 0); @@ -4079,7 +4111,7 @@ ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, BUG_ON(!transfer); engine->user_buffer_index = 0; - + do { rc = transfer_monitor_cyclic(engine, transfer, timeout_ms); if (rc < 0) @@ -4163,7 +4195,7 @@ static int sgt_alloc_with_pages(struct sg_table *sgt, unsigned int npages, err_out: sgt_free_with_pages(sgt, dir, pdev); - return -ENOMEM; + return -ENOMEM; } int xdma_cyclic_transfer_setup(struct xdma_engine *engine) @@ -4255,10 +4287,10 @@ int xdma_cyclic_transfer_setup(struct xdma_engine *engine) /* unwind on errors */ err_out: if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); + xdma_request_free(engine->cyclic_req); engine->cyclic_req = NULL; } - + if (engine->cyclic_sgt.orig_nents) { sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, xdev->pdev); @@ -4346,7 +4378,7 @@ int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) spin_unlock_irqrestore(&engine->lock, flags); /* wait for engine to be no longer running */ - if (poll_mode) + if (poll_mode) rc = cyclic_shutdown_polled(engine); else rc = cyclic_shutdown_interrupt(engine); @@ -4355,7 +4387,7 @@ int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) spin_lock_irqsave(&engine->lock, flags); if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); + xdma_request_free(engine->cyclic_req); engine->cyclic_req = NULL; } @@ -4371,3 +4403,29 @@ int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) return 0; } + +int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg) +{ + int rv; + unsigned long dst; + u32 w = XDMA_CTRL_NON_INCR_ADDR; + + dbg_perf("IOCTL_XDMA_ADDRMODE_SET\n"); + rv = get_user(dst, (int __user *)arg); + + if (rv == 0) { + engine->non_incr_addr = !!dst; + if (engine->non_incr_addr) + write_register(w, &engine->regs->control_w1s, + (unsigned long)(&engine->regs->control_w1s) - + (unsigned long)(&engine->regs)); + else + write_register(w, &engine->regs->control_w1c, + (unsigned long)(&engine->regs->control_w1c) - + (unsigned long)(&engine->regs)); + } + engine_alignments(engine); + + return rv; +} + diff --git a/sdk/linux_kernel_drivers/xdma/libxdma.h b/sdk/linux_kernel_drivers/xdma/libxdma.h old mode 100644 new mode 100755 index 3b58991d0..07d016c28 --- a/sdk/linux_kernel_drivers/xdma/libxdma.h +++ b/sdk/linux_kernel_drivers/xdma/libxdma.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef XDMA_LIB_H @@ -419,6 +431,7 @@ struct xdma_engine { /* Engine state, configuration and flags */ enum shutdown_state shutdown; /* engine shutdown mode */ enum dma_data_direction dir; + int device_open; /* flag if engine node open, ST mode only */ int running; /* flag if the driver started engine */ int non_incr_addr; /* flag if non-incremental addressing used */ int streaming; @@ -594,5 +607,6 @@ int xdma_cyclic_transfer_setup(struct xdma_engine *engine); int xdma_cyclic_transfer_teardown(struct xdma_engine *engine); ssize_t xdma_engine_read_cyclic(struct xdma_engine *, char __user *, size_t, int); +int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg); #endif /* XDMA_LIB_H */ diff --git a/sdk/linux_kernel_drivers/xdma/version.h b/sdk/linux_kernel_drivers/xdma/version.h old mode 100644 new mode 100755 index c7e2e3683..64b91799f --- a/sdk/linux_kernel_drivers/xdma/version.h +++ b/sdk/linux_kernel_drivers/xdma/version.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef __XDMA_VERSION_H__ @@ -14,7 +26,7 @@ #define DRV_MOD_MAJOR 2017 #define DRV_MOD_MINOR 1 -#define DRV_MOD_PATCHLEVEL 38 +#define DRV_MOD_PATCHLEVEL 47 #define DRV_MODULE_VERSION \ __stringify(DRV_MOD_MAJOR) "." \ diff --git a/sdk/linux_kernel_drivers/xdma/xdma_cdev.c b/sdk/linux_kernel_drivers/xdma/xdma_cdev.c index ce0c331d6..8a3311618 100644 --- a/sdk/linux_kernel_drivers/xdma/xdma_cdev.c +++ b/sdk/linux_kernel_drivers/xdma/xdma_cdev.c @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -88,10 +100,13 @@ static int config_kobject(struct xdma_cdev *xcdev, enum cdev_type type) switch (type) { case CHAR_XDMA_H2C: case CHAR_XDMA_C2H: + case CHAR_BYPASS_H2C: + case CHAR_BYPASS_C2H: BUG_ON(!engine); rv = kobject_set_name(&xcdev->cdev.kobj, devnode_names[type], xdev->idx, engine->channel); break; + case CHAR_BYPASS: case CHAR_USER: case CHAR_CTRL: case CHAR_XVC: @@ -286,6 +301,18 @@ static int create_xcdev(struct xdma_pci_dev *xpdev, struct xdma_cdev *xcdev, minor = 10 + bar; cdev_event_init(xcdev); break; + case CHAR_BYPASS_H2C: + minor = 64 + engine->channel; + cdev_bypass_init(xcdev); + break; + case CHAR_BYPASS_C2H: + minor = 68 + engine->channel; + cdev_bypass_init(xcdev); + break; + case CHAR_BYPASS: + minor = 100; + cdev_bypass_init(xcdev); + break; default: pr_info("type 0x%x NOT supported.\n", type); return -EINVAL; @@ -354,6 +381,16 @@ void xpdev_destroy_interfaces(struct xdma_pci_dev *xpdev) destroy_xcdev(&xpdev->xvc_cdev); } + if (xpdev_flag_test(xpdev, XDF_CDEV_BYPASS)) { + /* iterate over channels */ + for (i = 0; i < xpdev->h2c_channel_max; i++) + /* remove DMA Bypass character device */ + destroy_xcdev(&xpdev->bypass_h2c_cdev[i]); + for (i = 0; i < xpdev->c2h_channel_max; i++) + destroy_xcdev(&xpdev->bypass_c2h_cdev[i]); + destroy_xcdev(&xpdev->bypass_cdev_base); + } + if (xpdev->major) unregister_chrdev_region(MKDEV(xpdev->major, XDMA_MINOR_BASE), XDMA_MINOR_COUNT); } @@ -416,6 +453,46 @@ int xpdev_create_interfaces(struct xdma_pci_dev *xpdev) xpdev_flag_set(xpdev, XDF_CDEV_SG); /* ??? Bypass */ + /* Initialize Bypass Character Device */ + if (xdev->bypass_bar_idx > 0){ + for (i = 0; i < xpdev->h2c_channel_max; i++) { + engine = &xdev->engine_h2c[i]; + + if (engine->magic != MAGIC_ENGINE) + continue; + + rv = create_xcdev(xpdev, &xpdev->bypass_h2c_cdev[i], i, + engine, CHAR_BYPASS_H2C); + if (rv < 0) { + pr_err("create h2c %d bypass I/F failed, %d.\n", + i, rv); + goto fail; + } + } + + for (i = 0; i < xpdev->c2h_channel_max; i++) { + engine = &xdev->engine_c2h[i]; + + if (engine->magic != MAGIC_ENGINE) + continue; + + rv = create_xcdev(xpdev, &xpdev->bypass_c2h_cdev[i], i, + engine, CHAR_BYPASS_C2H); + if (rv < 0) { + pr_err("create c2h %d bypass I/F failed, %d.\n", + i, rv); + goto fail; + } + } + + rv = create_xcdev(xpdev, &xpdev->bypass_cdev_base, + xdev->bypass_bar_idx, NULL, CHAR_BYPASS); + if (rv < 0) { + pr_err("create bypass failed %d.\n", rv); + goto fail; + } + xpdev_flag_set(xpdev, XDF_CDEV_BYPASS); + } /* initialize user character device */ if (xdev->user_bar_idx >= 0) { diff --git a/sdk/linux_kernel_drivers/xdma/xdma_cdev.h b/sdk/linux_kernel_drivers/xdma/xdma_cdev.h index b26de5130..47441fcaf 100644 --- a/sdk/linux_kernel_drivers/xdma/xdma_cdev.h +++ b/sdk/linux_kernel_drivers/xdma/xdma_cdev.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef __XDMA_CHRDEV_H__ @@ -33,8 +45,11 @@ void cdev_ctrl_init(struct xdma_cdev *xcdev); void cdev_xvc_init(struct xdma_cdev *xcdev); void cdev_event_init(struct xdma_cdev *xcdev); void cdev_sgdma_init(struct xdma_cdev *xcdev); +void cdev_bypass_init(struct xdma_cdev *xcdev); void xpdev_destroy_interfaces(struct xdma_pci_dev *xpdev); int xpdev_create_interfaces(struct xdma_pci_dev *xpdev); +int bridge_mmap(struct file *file, struct vm_area_struct *vma); + #endif /* __XDMA_CHRDEV_H__ */ diff --git a/sdk/linux_kernel_drivers/xdma/xdma_ioctl.h b/sdk/linux_kernel_drivers/xdma/xdma_ioctl.h old mode 100644 new mode 100755 diff --git a/sdk/linux_kernel_drivers/xdma/xdma_mod.c b/sdk/linux_kernel_drivers/xdma/xdma_mod.c old mode 100644 new mode 100755 index 2974ce2fa..b1b66176b --- a/sdk/linux_kernel_drivers/xdma/xdma_mod.c +++ b/sdk/linux_kernel_drivers/xdma/xdma_mod.c @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -80,8 +92,8 @@ static const struct pci_device_id pci_ids[] = { { PCI_DEVICE(0x10ee, 0x6828), }, { PCI_DEVICE(0x10ee, 0x6830), }, - { PCI_DEVICE(0x10ee, 0x6930), }, { PCI_DEVICE(0x10ee, 0x6928), }, + { PCI_DEVICE(0x10ee, 0x6930), }, { PCI_DEVICE(0x10ee, 0x6A28), }, { PCI_DEVICE(0x10ee, 0x6A30), }, { PCI_DEVICE(0x10ee, 0x6D30), }, @@ -94,10 +106,9 @@ static const struct pci_device_id pci_ids[] = { { PCI_DEVICE(0x10ee, 0x2808), }, +#ifdef INTERNAL_TESTING { PCI_DEVICE(0x1d0f, 0x1042), 0}, - - { PCI_DEVICE(0x1d0f, 0xF000), 0}, - { PCI_DEVICE(0x1d0f, 0xF001), 0}, +#endif {0,} }; MODULE_DEVICE_TABLE(pci, pci_ids); @@ -260,7 +271,24 @@ static void xdma_error_resume(struct pci_dev *pdev) pci_cleanup_aer_uncorrect_error_status(pdev); } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,13,0) +static void xdma_reset_prepare(struct pci_dev *pdev) +{ + struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); + + pr_info("dev 0x%p,0x%p.\n", pdev, xpdev); + xdma_device_offline(pdev, xpdev->xdev); +} + +static void xdma_reset_done(struct pci_dev *pdev) +{ + struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); + + pr_info("dev 0x%p,0x%p.\n", pdev, xpdev); + xdma_device_online(pdev, xpdev->xdev); +} + +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) static void xdma_reset_notify(struct pci_dev *pdev, bool prepare) { struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); @@ -278,7 +306,10 @@ static const struct pci_error_handlers xdma_err_handler = { .error_detected = xdma_error_detected, .slot_reset = xdma_slot_reset, .resume = xdma_error_resume, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,13,0) + .reset_prepare = xdma_reset_prepare, + .reset_done = xdma_reset_done, +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) .reset_notify = xdma_reset_notify, #endif }; @@ -294,9 +325,16 @@ static struct pci_driver pci_driver = { static int __init xdma_mod_init(void) { int rv; + extern unsigned int desc_blen_max; + extern unsigned int sgdma_timeout; pr_info("%s", version); + if (desc_blen_max > XDMA_DESC_BLEN_MAX) + desc_blen_max = XDMA_DESC_BLEN_MAX; + pr_info("desc_blen_max: 0x%x/%u, sgdma_timeout: %u sec.\n", + desc_blen_max, desc_blen_max, sgdma_timeout); + rv = xdma_cdev_init(); if (rv < 0) return rv; diff --git a/sdk/linux_kernel_drivers/xdma/xdma_mod.h b/sdk/linux_kernel_drivers/xdma/xdma_mod.h old mode 100644 new mode 100755 index cbf0328a0..0ede7a080 --- a/sdk/linux_kernel_drivers/xdma/xdma_mod.h +++ b/sdk/linux_kernel_drivers/xdma/xdma_mod.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2017 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ #ifndef __XDMA_MODULE_H__ diff --git a/sdk/tests/SDK_TESTING.md b/sdk/tests/SDK_TESTING.md new file mode 100644 index 000000000..f71bb9fdf --- /dev/null +++ b/sdk/tests/SDK_TESTING.md @@ -0,0 +1,3 @@ +# SDK Testing + +## Overview diff --git a/sdk/tests/conftest.py b/sdk/tests/conftest.py new file mode 100644 index 000000000..eb298bf9e --- /dev/null +++ b/sdk/tests/conftest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +''' +pytest configuration +''' + +import pytest diff --git a/sdk/tests/test_sdk_scripts.py b/sdk/tests/test_sdk_scripts.py new file mode 100644 index 000000000..ed44c6126 --- /dev/null +++ b/sdk/tests/test_sdk_scripts.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from os.path import dirname, realpath +import pytest +import subprocess +import sys +import traceback +try: + import aws_fpga_utils + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print "error: {}\nMake sure to source hdk_setup.sh".format(sys.exc_info()[1]) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestSdkScripts(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + ''' + + @staticmethod + def setup_class(self): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(self, __file__) + return + + @pytest.mark.skip(reason="Not implemented") + def test_sdk_setupe(self): + logger.info(self) + assert False + \ No newline at end of file diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c index 24580d09e..6576ca243 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c @@ -250,7 +250,12 @@ int fpga_mgmt_clear_local_image_sync(int slot_id, return ret; } -int fpga_mgmt_load_local_image(int slot_id, char *afi_id) +int fpga_mgmt_load_local_image(int slot_id, char *afi_id) +{ + return fpga_mgmt_load_local_image_flags(slot_id, afi_id, 0); +} + +int fpga_mgmt_load_local_image_flags(int slot_id, char *afi_id, uint32_t flags) { int ret; uint32_t len; @@ -263,7 +268,7 @@ int fpga_mgmt_load_local_image(int slot_id, char *afi_id) memset(&rsp, 0, sizeof(union afi_cmd)); /* initialize the command structure */ - fpga_mgmt_cmd_init_load(&cmd, &len, afi_id); + fpga_mgmt_cmd_init_load(&cmd, &len, afi_id, flags); /* send the command and wait for the response */ ret = fpga_mgmt_process_cmd(slot_id, &cmd, &rsp, &len); @@ -274,7 +279,15 @@ int fpga_mgmt_load_local_image(int slot_id, char *afi_id) return ret; } -int fpga_mgmt_load_local_image_sync(int slot_id, char *afi_id, +int fpga_mgmt_load_local_image_sync(int slot_id, char *afi_id, + uint32_t timeout, uint32_t delay_msec, + struct fpga_mgmt_image_info *info) +{ + return fpga_mgmt_load_local_image_sync_flags(slot_id, afi_id, 0, + timeout, delay_msec, info); +} + +int fpga_mgmt_load_local_image_sync_flags(int slot_id, char *afi_id, uint32_t flags, uint32_t timeout, uint32_t delay_msec, struct fpga_mgmt_image_info *info) { @@ -301,7 +314,7 @@ int fpga_mgmt_load_local_image_sync(int slot_id, char *afi_id, fail_on(ret != 0, out, "fpga_pci_get_resource_map failed"); /** Load the FPGA image (async completion) */ - ret = fpga_mgmt_load_local_image(slot_id, afi_id); + ret = fpga_mgmt_load_local_image_flags(slot_id, afi_id, flags); fail_on(ret, out, "fpga_mgmt_load_local_image failed"); /** Wait until the status is "loaded" or timeout */ diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c index 098bc6039..233690a89 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c @@ -127,7 +127,8 @@ afi_get_next_id(void) * @param[in,out] len cmd len */ void -fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, const char *afi_id) +fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, const char *afi_id, + uint32_t flags) { assert(cmd); assert(len); @@ -145,7 +146,7 @@ fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, const char *afi_id) strncpy(req->ids.afi_id, afi_id, sizeof(req->ids.afi_id)); req->ids.afi_id[sizeof(req->ids.afi_id) - 1] = 0; - req->fpga_cmd_flags = 0; + req->fpga_cmd_flags = flags; *len = sizeof(struct afi_cmd_hdr) + payload_len; } diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h index 75efa5c11..cae0a8a21 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h @@ -52,7 +52,7 @@ int fpga_mgmt_process_cmd(int slot_id, void fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, uint32_t flags); void fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, - const char *afi_id); + const char *afi_id, uint32_t flags); void fpga_mgmt_cmd_init_clear(union afi_cmd *cmd, uint32_t *len); int diff --git a/sdk/userspace/fpga_mgmt_tools/README.md b/sdk/userspace/fpga_mgmt_tools/README.md index b8d896fc0..c9fadf8b3 100644 --- a/sdk/userspace/fpga_mgmt_tools/README.md +++ b/sdk/userspace/fpga_mgmt_tools/README.md @@ -9,7 +9,7 @@ AWS provides the following set of command-line tools for Amazon FPGA Image (AFI) * Returns the status of the FPGA image for a specified FPGA image slot number. The *fpga-image-slot* parameter is an index that represents a given FPGA within an instance. Use `fpga-describe-local-image-slots` to return the available FPGA image slots for the instance. * **`fpga-load-local-image`** - * Loads the specified FPGA image to the specified slot number, and returns the status of the command. Note that to ensure your AFI is loaded to a consistent state, a loaded FPGA slot must be cleared with `fpga-clear-local-image` before loading another FPGA image. The *fpga-image-slot* parameter is an index that represents a given FPGA within an instance. Use `fpga-describe-local-image` to return the FPGA image status, and `fpga-describe-local-image-slots` to return the available FPGA image slots for the instance. + * Loads the specified FPGA image to the specified slot number, and returns the status of the command. The *fpga-image-slot* parameter is an index that represents a given FPGA within an instance. Use `fpga-describe-local-image` to return the FPGA image status, and `fpga-describe-local-image-slots` to return the available FPGA image slots for the instance. * **`fpga-clear-local-image`** * Clears the specified FPGA image slot, including FPGA internal and external memories that are used by the slot. The *fpga-image-slot* parameter is an index that represents a given FPGA within an instance. Use `fpga-describe-local-image` to return the FPGA image status, and `fpga-describe-local-image-slots` to return the available FPGA image slots for the instance. diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c index 54109ef61..061882e4e 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c @@ -108,6 +108,7 @@ cli_show_image_info(struct fpga_mgmt_image_info *info) struct fpga_slot_spec slot_spec; int ret = FPGA_ERR_FAIL; uint32_t i; + uint64_t frequency; if (f1.show_headers) { printf("Type FpgaImageSlot FpgaImageId StatusName StatusCode ErrorName ErrorCode ShVersion\n"); @@ -232,6 +233,23 @@ cli_show_image_info(struct fpga_mgmt_image_info *info) printf(" write-count=%" PRIu64 "\n", ddr_if->write_count); printf(" read-count=%" PRIu64 "\n", ddr_if->read_count); } + + printf("Clock Group A Frequency (Mhz)\n"); + for (i = 0; i < CLOCK_COUNT_A; i++) { + frequency = fmc->clocks[0].frequency[i] / 1000000; + printf("%" PRIu64 " ", frequency); + } + printf("\nClock Group B Frequency (Mhz)\n"); + for (i = 0; i < CLOCK_COUNT_B; i++) { + frequency = fmc->clocks[1].frequency[i] / 1000000; + printf("%" PRIu64 " ", frequency); + } + printf("\nClock Group C Frequency (Mhz)\n"); + for (i = 0; i < CLOCK_COUNT_C; i++) { + frequency = fmc->clocks[2].frequency[i] / 1000000; + printf("%" PRIu64 " ", frequency); + } + printf("\n"); } return 0; @@ -293,14 +311,17 @@ static int command_load(void) { int ret; + uint32_t flags = (f1.force_shell_reload) ? FPGA_CMD_FORCE_SHELL_RELOAD : 0; + + if (f1.async) { - ret = fpga_mgmt_load_local_image(f1.afi_slot, f1.afi_id); + ret = fpga_mgmt_load_local_image_flags(f1.afi_slot, f1.afi_id, flags); fail_on(ret != 0, err, "fpga_mgmt_load_local_image failed"); } else { struct fpga_mgmt_image_info info; memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - ret = fpga_mgmt_load_local_image_sync(f1.afi_slot, f1.afi_id, + ret = fpga_mgmt_load_local_image_sync_flags(f1.afi_slot, f1.afi_id, flags, f1.sync_timeout, f1.sync_delay_msec, &info); fail_on(ret != 0, err, "fpga_mgmt_load_local_image_sync failed"); diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h index 00ea2998c..9969ef2df 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h @@ -99,6 +99,8 @@ struct ec2_fpga_cmd { bool rescan; /** Show mailbox device option */ bool show_mbox_device; + /** Reload the shell even if not required for AFI */ + bool force_shell_reload; /** Virtual DIP switch */ uint16_t v_dip_switch; /** Virtual JTAG TCP port */ diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c index 00c5011f1..10fd0b058 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c @@ -158,7 +158,10 @@ static const char *load_afi_usage[] = { " --sync-timeout TIMEOUT", " Specify a timeout TIMEOUT (in seconds) for the sequence", " of operations that are performed in the synchronous (blocking)", - " mode", + " mode.", + " -F, --force-shell-reload", + " Reload the FPGA shell on AFI load, even if the next AFI", + " doesn't require it.", }; static const char *clear_afi_usage[] = { @@ -452,11 +455,12 @@ parse_args_load_afi(int argc, char *argv[]) {"headers", no_argument, 0, 'H' }, {"help", no_argument, 0, 'h' }, {"version", no_argument, 0, 'V' }, + {"force-shell-reload", no_argument, 0, 'F' }, {0, 0, 0, 0 }, }; int long_index = 0; - while ((opt = getopt_long(argc, argv, "S:I:r:s:AH?hV", + while ((opt = getopt_long(argc, argv, "S:I:r:s:AH?hVF", long_options, &long_index)) != -1) { switch (opt) { case 'S': { @@ -495,6 +499,10 @@ parse_args_load_afi(int argc, char *argv[]) f1.show_headers = true; break; } + case 'F': { + f1.force_shell_reload = true; + break; + } case 'V': { print_version(); get_parser_completed(opt); diff --git a/sdk/userspace/include/fpga_mgmt.h b/sdk/userspace/include/fpga_mgmt.h index 86ae60584..2c1b60310 100644 --- a/sdk/userspace/include/fpga_mgmt.h +++ b/sdk/userspace/include/fpga_mgmt.h @@ -139,15 +139,30 @@ int fpga_mgmt_clear_local_image_sync(int slot_id, uint32_t timeout, uint32_t delay_msec, struct fpga_mgmt_image_info *info); + +/** + * Wrapper for fpga_mgmt_load_local_image_flags, with flags set to 0 as a default + */ +int fpga_mgmt_load_local_image(int slot_id, char *afi_id); + /** * Asynchronously loads the specified FPGA image to the specified slot number. * * @param[in] slot_id the logical slot index * @param[in] afi_id The Amazon FGPA Image id to be loaded + * @param[in] flags flags to select various options from Common FPGA + * command flags * @returns 0 on success, non-zero on error */ -int fpga_mgmt_load_local_image(int slot_id, char *afi_id); +int fpga_mgmt_load_local_image_flags(int slot_id, char *afi_id, uint32_t flags); +/** + * Wrapper for fpga_mgmt_laod_local_image_sync_flags, with flags set to 0 as a + * default. +*/ +int fpga_mgmt_load_local_image_sync(int slot_id, char *afi_id, + uint32_t timeout, uint32_t delay_msec, + struct fpga_mgmt_image_info *info); /** * Synchronously loads the specified FPGA image slot to the specified slot * number. @@ -160,7 +175,7 @@ int fpga_mgmt_load_local_image(int slot_id, char *afi_id); * @param[in/out] info struct to populate with the slot description (or NULL) * @returns 0 on success, non-zero on error */ -int fpga_mgmt_load_local_image_sync(int slot_id, char *afi_id, +int fpga_mgmt_load_local_image_sync_flags(int slot_id, char *afi_id, uint32_t flags, uint32_t timeout, uint32_t delay_msec, struct fpga_mgmt_image_info *info); diff --git a/sdk/userspace/include/hal/fpga_common.h b/sdk/userspace/include/hal/fpga_common.h index d48c2990e..10351fb74 100644 --- a/sdk/userspace/include/hal/fpga_common.h +++ b/sdk/userspace/include/hal/fpga_common.h @@ -26,6 +26,17 @@ #define AFI_ID_STR_MAX 64 #define FPGA_DDR_IFS_MAX 4 +/** + * FPGA Mixed Mode Clock Manager (MMCM) config. + * + * MMCM Groups A, B, C are 0, 1, 2 respectively + */ +#define FPGA_MMCM_GROUP_MAX 3 +#define FPGA_MMCM_OUT_CLKS_MAX 7 +#define CLOCK_COUNT_A 4 +#define CLOCK_COUNT_B 2 +#define CLOCK_COUNT_C 2 + /** * Common FPGA command flags. */ @@ -36,9 +47,11 @@ enum { FPGA_CMD_GET_HW_METRICS = 1 << 1, /** return FPGA image hardware metrics (clear on read */ FPGA_CMD_CLEAR_HW_METRICS = 1 << 2, + FPGA_CMD_FORCE_SHELL_RELOAD = 1 << 3, FPGA_CMD_ALL_FLAGS = FPGA_CMD_GET_HW_METRICS | - FPGA_CMD_CLEAR_HW_METRICS, + FPGA_CMD_CLEAR_HW_METRICS| + FPGA_CMD_FORCE_SHELL_RELOAD, }; /** @@ -212,6 +225,12 @@ struct fpga_ddr_if_metrics_common { uint64_t read_count; } __attribute__((packed)); +/** FPGA clock metrics common */ +struct fpga_clocks_common { + uint64_t frequency[FPGA_MMCM_OUT_CLKS_MAX]; +} __attribute__((packed)); + + /** FPGA metrics */ struct fpga_metrics_common { /** See FPGA_INT_STATUS_XYZ below */ @@ -247,6 +266,9 @@ struct fpga_metrics_common { /** FPGA DDR interface metrics */ struct fpga_ddr_if_metrics_common ddr_ifs[FPGA_DDR_IFS_MAX]; + + /** FPGA clock metrics */ + struct fpga_clocks_common clocks[FPGA_MMCM_GROUP_MAX]; } __attribute__((packed)); /** Common int_status */ diff --git a/sdk/userspace/include/utils/sh_dpi_tasks.h b/sdk/userspace/include/utils/sh_dpi_tasks.h new file mode 100644 index 000000000..a412fa379 --- /dev/null +++ b/sdk/userspace/include/utils/sh_dpi_tasks.h @@ -0,0 +1,54 @@ +// Amazon FPGA Hardware Development Kit +// +// Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Amazon Software License (the "License"). You may not use +// this file except in compliance with the License. A copy of the License is +// located at +// +// http://aws.amazon.com/asl/ +// +// or in the "license" file accompanying this file. This file is distributed on +// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +// implied. See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SH_DPI_TASKS +#define SH_DPI_TASKS + +#include +#include +#include +#include + +#ifdef SV_TEST + #ifndef VIVADO_SIM + #include "svdpi.h" + #endif +#endif +#include + +extern void sv_printf(char *msg); +extern void sv_map_host_memory(uint8_t *memory); + +extern void cl_peek(uint64_t addr, uint32_t *data); +extern void cl_poke(uint64_t addr, uint32_t data); +extern void sv_int_ack(uint32_t int_num); +extern void sv_pause(uint32_t x); + +void test_main(uint32_t *exit_code); + +void host_memory_putc(uint64_t addr, uint8_t data); + +uint8_t host_memory_getc(uint64_t addr); + + +void cosim_printf(const char *format, ...); + + +void int_handler(uint32_t int_num); + +#define LOW_32b(a) ((uint32_t)((uint64_t)(a) & 0xffffffff)) +#define HIGH_32b(a) ((uint32_t)(((uint64_t)(a)) >> 32L)) + +#endif diff --git a/sdk/userspace/utils/sh_dpi_tasks.c b/sdk/userspace/utils/sh_dpi_tasks.c new file mode 100644 index 000000000..ab8aebdf3 --- /dev/null +++ b/sdk/userspace/utils/sh_dpi_tasks.c @@ -0,0 +1,46 @@ +#include + +void host_memory_putc(uint64_t addr, uint8_t data) +{ + *(uint8_t *)addr = data; +} + +//void host_memory_getc(uint64_t addr, uint8_t *data) +uint8_t host_memory_getc(uint64_t addr) +{ + return *(uint8_t *)addr; +} +void cosim_printf(const char *format, ...) +{ + static char sv_msg_buffer[256]; + va_list args; + + va_start(args, format); + vsprintf(sv_msg_buffer, format, args); +#ifdef SV_TEST + sv_printf(sv_msg_buffer); +#else + printf(sv_msg_buffer); +#endif + + va_end(args); +} + +void int_handler(uint32_t int_num) +{ +// Vivado does not support svGetScopeFromName +#ifdef SV_TEST + #ifndef VIVADO_SIM + svScope scope; + scope = svGetScopeFromName("tb"); + svSetScope(scope); + #endif +#endif + + cosim_printf("Received interrupt %2d", int_num); + +#ifdef SV_TEST + sv_int_ack(int_num); +#endif + +} diff --git a/sdk_setup.sh b/sdk_setup.sh old mode 100755 new mode 100644 index a80a1560e..019d56240 --- a/sdk_setup.sh +++ b/sdk_setup.sh @@ -1,5 +1,3 @@ -#!/bin/bash - # # Copyright 2015-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # @@ -15,7 +13,30 @@ # permissions and limitations under the License. # -export SDK_DIR=${SDK_DIR:=$(pwd)/sdk} +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) +current_dir=$(pwd) + +source $script_dir/shared/bin/message_functions.sh + +source $script_dir/shared/bin/set_AWS_FPGA_REPO_DIR.sh + +export SDK_DIR=${SDK_DIR:=$script_dir/sdk} + +# Update PYTHONPATH with libraries used for unit testing +python_lib=$AWS_FPGA_REPO_DIR/shared/lib +export PYTHONPATH=$(echo $PATH | sed -e 's/\(^\|:\)[^:]\+$python_lib\(:\|$\)/:/g; s/^://; s/:$//') +PYTHONPATH=$python_lib:$PYTHONPATH echo "Done setting environment variables." @@ -23,11 +44,11 @@ echo "Done setting environment variables." # Execute sdk_install.sh inside a subshell so the user's current # shell does not exit on errors from the install. # -bash $SDK_DIR/sdk_install.sh -RET=$? - -if [ $RET != 0 ]; then - echo "Error: AWS SDK install was unsuccessful, sdk_install.sh returned $RET" -else - echo "Done with AWS SDK setup." +cd $script_dir +if ! bash $SDK_DIR/sdk_install.sh; then + echo "Error: AWS SDK install was unsuccessful, sdk_install.sh returned $?" + return 1 fi + +cd $current_dir +info_msg "$script_name PASSED" diff --git a/shared/bin/message_functions.sh b/shared/bin/message_functions.sh new file mode 100644 index 000000000..72a752fe6 --- /dev/null +++ b/shared/bin/message_functions.sh @@ -0,0 +1,33 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +function info_msg { + echo -e "INFO: $1" +} + +function debug_msg { + if [[ $debug == 0 ]]; then + return + fi + echo -e "DEBUG: $1" +} + +function err_msg { + echo -e >&2 "ERROR: $1" +} + +function warn_msg { + echo -e "WARNING: $1" +} diff --git a/shared/bin/set_AWS_FPGA_REPO_DIR.sh b/shared/bin/set_AWS_FPGA_REPO_DIR.sh new file mode 100644 index 000000000..6ea57859a --- /dev/null +++ b/shared/bin/set_AWS_FPGA_REPO_DIR.sh @@ -0,0 +1,25 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Make sure that AWS_FPGA_REPO_DIR is set to the location of this script. +if [[ ":$AWS_FPGA_REPO_DIR" == ':' ]]; then + debug_msg "AWS_FPGA_REPO_DIR not set so setting to $script_dir" + export AWS_FPGA_REPO_DIR=$script_dir +elif [[ $AWS_FPGA_REPO_DIR != $script_dir ]]; then + info_msg "Changing AWS_FPGA_REPO_DIR from $AWS_FPGA_REPO_DIR to $script_dir" + export AWS_FPGA_REPO_DIR=$script_dir +else + debug_msg "AWS_FPGA_REPO_DIR=$AWS_FPGA_REPO_DIR" +fi diff --git a/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py new file mode 100644 index 000000000..0d3467899 --- /dev/null +++ b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +''' +Base class for pytest modules + +See TESTING.md for details. +''' + +import boto3 +import os +from os.path import basename, dirname, realpath +import pytest +import re +import subprocess +import sys +import traceback +try: + import aws_fpga_test_utils + from aws_fpga_test_utils import get_git_repo_root + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print "error: {}\nMake sure to source hdk_setup.sh".format(sys.exc_info()[1]) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class AwsFpgaTestBase(object): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + + Load AFI created by test_create_afi.py + ''' + + @classmethod + def setup_class(cls, derived_cls, filename_of_test_class): + AwsFpgaTestBase.s3_bucket = 'aws-fpga-jenkins-testing' + AwsFpgaTestBase.__ec2_client = None + AwsFpgaTestBase.__s3_client = None + AwsFpgaTestBase.test_dir = dirname(realpath(filename_of_test_class)) + AwsFpgaTestBase.git_repo_dir = get_git_repo_root(dirname(filename_of_test_class)) + AwsFpgaTestBase.WORKSPACE = AwsFpgaTestBase.git_repo_dir + + # SDAccel locations + # Need to move to either a config file somewhere or a subclass + AwsFpgaTestBase.xilinx_sdaccel_examples_dir = AwsFpgaTestBase.git_repo_dir + "/SDAccel/examples/xilinx" + AwsFpgaTestBase.xilinx_sdaccel_examples_list_file = AwsFpgaTestBase.WORKSPACE + "/sdaccel_examples_list.json" + + if 'WORKSPACE' in os.environ: + assert os.environ['WORKSPACE'] == AwsFpgaTestBase.git_repo_dir, "WORKSPACE incorrect" + else: + os.environ['WORKSPACE'] = AwsFpgaTestBase.WORKSPACE + AwsFpgaTestBase.instance_type = aws_fpga_test_utils.get_instance_type() + AwsFpgaTestBase.num_slots = aws_fpga_test_utils.get_num_fpga_slots(AwsFpgaTestBase.instance_type) + return + + @staticmethod + def ec2_client(): + if not AwsFpgaTestBase.__ec2_client: + AwsFpgaTestBase.__ec2_client = boto3.client('ec2') + return AwsFpgaTestBase.__ec2_client + + @staticmethod + def s3_client(): + if not AwsFpgaTestBase.__s3_client: + AwsFpgaTestBase.__s3_client = boto3.client('s3') + return AwsFpgaTestBase.__s3_client + + @staticmethod + def assert_hdk_setup(): + assert 'AWS_FPGA_REPO_DIR' in os.environ, "AWS_FPGA_REPO_DIR not set. source {}/hdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['AWS_FPGA_REPO_DIR'] == AwsFpgaTestBase.git_repo_dir, "AWS_FPGA_REPO_DIR not set to the repo dir. source {}/hdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'HDK_DIR' in os.environ, "HDK_DIR not set. source {}/hdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['HDK_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'hdk'), "HDK_DIR incorrect. source {}/hdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + + @staticmethod + def assert_sdk_setup(): + assert 'SDK_DIR' in os.environ, "SDK_DIR not set. source {}/sdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['SDK_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'sdk'), "SDK_DIR incorrect. source {}/sdk_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + + @staticmethod + def assert_sdaccel_setup(): + assert 'AWS_FPGA_REPO_DIR' in os.environ, "AWS_FPGA_REPO_DIR not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['AWS_FPGA_REPO_DIR'] == AwsFpgaTestBase.git_repo_dir, "AWS_FPGA_REPO_DIR not set to the repo dir. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'HDK_DIR' in os.environ, "HDK_DIR not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['HDK_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'hdk'), "HDK_DIR incorrect. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'SDK_DIR' in os.environ, "SDK_DIR not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['SDK_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'sdk'), "SDK_DIR incorrect. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'SDACCEL_DIR' in os.environ, "SDACCEL_DIR not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['SDACCEL_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'SDAccel'), "SDACCEL_DIR incorrect. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ.get('AWS_PLATFORM') != 'None', "Environment Var AWS_PLATFORM not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ.get('XILINX_SDX') != 'None', "Environment Var XILINX_SDX not set. Please check the AMI." + + @staticmethod + def running_on_f1_instance(): + ''' + Check to see if running on an F1 instance + ''' + instance_type = aws_fpga_test_utils.get_instance_type() + return re.match(r'f1\.', instance_type) + + @staticmethod + def run_cmd(cmd, echo=False, check=True): + if echo: + logger.info("Running: {}".format(cmd)) + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout_data, stderr_data) = p.communicate() + stdout_lines = stdout_data.split('\n') + stderr_lines = stderr_data.split('\n') + if check and p.returncode: + logger.error("Cmd failed with rc={}\ncmd: {}\nstdout:\n{}\nstderr:\n{}".format( + p.returncode, cmd, stdout_data, stderr_data)) + elif echo: + logger.info("rc={}\nstdout:\n{}\nstderr:\n{}\n".format(p.returncode, cmd, stdout_data, stderr_data)) + return (p.returncode, stdout_lines, stderr_lines) + + @staticmethod + def run_hdk_cmd(cmd, echo=False, check=True): + source_hdk_cmd = "source {}/hdk_setup.sh &> /dev/null".format(AwsFpgaTestBase.git_repo_dir) + cmd = source_hdk_cmd + " && " + cmd + return AwsFpgaTestBase.run_cmd(cmd, echo, check) + + @staticmethod + def run_sdk_cmd(cmd, echo=False, check=True): + source_sdk_cmd = "source {}/sdk_setup.sh &> /dev/null".format(AwsFpgaTestBase.git_repo_dir) + cmd = source_sdk_cmd + " && " + cmd + return AwsFpgaTestBase.run_cmd(cmd, echo, check) + + @staticmethod + def run_sdaccel_cmd(cmd, echo=False, check=True): + source_sdaccel_cmd = "source {}/sdaccel_setup.sh &> /dev/null".format(AwsFpgaTestBase.git_repo_dir) + cmd = source_sdaccel_cmd + " && " + cmd + return AwsFpgaTestBase.run_cmd(cmd, echo, check) + + @staticmethod + def get_cl_dir(cl): + return "{}/hdk/cl/examples/{}".format(AwsFpgaTestBase.WORKSPACE, cl) + + @staticmethod + def get_cl_to_aws_dir(cl): + return os.path.join(AwsFpgaTestBase.get_cl_dir(cl), 'build/checkpoints/to_aws') + + @staticmethod + def get_cl_afi_id_filename(cl): + return os.path.join(AwsFpgaTestBase.get_cl_dir(cl), 'build/create-afi/afi_ids.txt') + + @staticmethod + def get_cl_scripts_dir(cl): + return os.path.join(AwsFpgaTestBase.get_cl_dir(cl), 'build/scripts') + + @staticmethod + def get_cl_s3_dcp_tag(cl, option_tag): + ''' + @param option_tag: A tag that is unique for each build. + Required because a CL can be built with different options such as clock recipes. + ''' + assert option_tag != '' + return "jenkins/{}/{}/{}/dcp".format(os.environ['BUILD_TAG'], cl, option_tag) + + @staticmethod + def get_cl_s3_afi_tag(cl, option_tag): + ''' + @param option_tag: A tag that is unique for each build. + Required because a CL can be built with different options such as clock recipes. + ''' + assert option_tag != '' + return "jenkins/{}/{}/{}/create-afi/afi_ids.txt".format(os.environ['BUILD_TAG'], cl, option_tag) + + @staticmethod + def assert_afi_available(afi): + # Check the status of the afi + logger.info("Checking the status of {}".format(afi)) + afi_state = AwsFpgaTestBase.ec2_client().describe_fpga_images(FpgaImageIds=[afi])['FpgaImages'][0]['State']['Code'] + logger.info("{} state={}".format(afi, afi_state)) + assert afi_state == 'available' + + @staticmethod + def assert_afi_public(afi): + # Check the status of the afi + logger.info("Checking that {} is public".format(afi)) + loadPermissions = AwsFpgaTestBase.ec2_client().describe_fpga_image_attribute(FpgaImageId=afi, Attribute='loadPermission')['FpgaImageAttribute']['LoadPermissions'] + logger.info("{} loadPermissions:".format(afi)) + for loadPermission in loadPermissions: + if 'UserId' in loadPermission: + logger.info(" UserId={}".format(loadPermission['UserId'])) + else: + logger.info(" Group={}".format(loadPermission['Group'])) + is_public = AwsFpgaTestBase.ec2_client().describe_fpga_images(FpgaImageIds=[afi])['FpgaImages'][0]['Public'] + logger.info(" Public={}".format(is_public)) + assert is_public, "{} is not public. To make public:\n{}".format(afi, + "aws ec2 modify-fpga-image-attribute --fpga-image-id {} --load-permission \'Add=[{{Group=all}}]\'".format(afi)) + + @staticmethod + def fpga_clear_local_image(slot): + logger.info("Clearing FPGA slot {}".format(slot)) + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("sudo fpga-clear-local-image -S {}".format(slot)) + assert rc == 0, "Clearing FPGA slot {} failed.".format(slot) + + @staticmethod + def fpga_load_local_image(agfi, slot): + logger.info("Loading {} into slot {}".format(agfi, slot)) + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("sudo fpga-load-local-image -S {} -I {}".format(slot, agfi)) + assert rc == 0, "Failed to load {} in slot {}.".format(agfi, slot) + + @staticmethod + def check_fpga_afi_loaded(agfi, slot): + fpgaLocalImage = aws_fpga_test_utils.fpga_describe_local_image(slot) + assert fpgaLocalImage.statusName == 'loaded', "{} FPGA StatusName != loaded: {}".format(agfi, fpgaLocalImage.statusName) + assert fpgaLocalImage.statusCode == '0', "{} status code != 0: {}".format(agfi, fpgaLocalImage.statusCode) + assert fpgaLocalImage.errorName == 'ok', "{} FPGA ErrorName != ok: {}".format(agfi, fpgaLocalImage.ErrorName) + assert fpgaLocalImage.errorCode == '0', "{} ErrorCode != 0: {}".format(agfi, fpgaLocalImage.errorCode) + assert fpgaLocalImage.agfi == agfi, "Expected {}, actual {}".format(agfi, fpgaLocalImage.agfi) + return fpgaLocalImage + + @staticmethod + def fpga_get_virtual_led(slot, remove_dashes=False): + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("sudo fpga-get-virtual-led -S {}".format(slot)) + assert rc == 0, "Failed to get virtual LEDs from slot {}.".format(slot) + value = stdout_lines[1] + if remove_dashes: + value= re.sub('-', '', value) + return value + + @staticmethod + def fpga_get_virtual_dip_switch(slot, remove_dashes=False): + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("sudo fpga-get-virtual-dip-switch -S {}".format(slot)) + assert rc == 0, "Failed to get virtual DIP switches from slot {}.".format(slot) + value = stdout_lines[1] + if remove_dashes: + value= re.sub('-', '', value) + return value + + @staticmethod + def fpga_set_virtual_dip_switch(value, slot): + value= re.sub('-', '', value) + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("sudo fpga-set-virtual-dip-switch -S {} -D {}".format(slot, value)) + assert rc == 0, "Failed to set virtual DIP switches in slot {} to {}.".format(slot, value) + diff --git a/shared/lib/aws_fpga_test_utils/__init__.py b/shared/lib/aws_fpga_test_utils/__init__.py new file mode 100644 index 000000000..02f631293 --- /dev/null +++ b/shared/lib/aws_fpga_test_utils/__init__.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and + +import git +import logging +import os +import re +import subprocess +import sys +import traceback +import urllib2 +try: + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print "error: {}\nMake sure to source hdk_setup.sh".format(sys.exc_info()[1]) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__file__) + +def get_git_repo_root(path=None): + if not path: + path = os.getcwd() + repo = git.Repo(path, search_parent_directories=True) + repo_dir = repo.git.rev_parse("--show-toplevel") + return repo_dir + +def remove_edma_driver(): + logger.info("Removing the edma driver") + assert os.system('sudo rmmod edma-drv') == 0 + assert os.system('sudo rm -f /lib/modules/`uname -r`/edma-drv.ko') == 0 + assert os.system('sudo rm -f /etc/modules-load.d/edma.conf') == 0 + +def edma_driver_install_steps(): + logger.info("Running edma driver install steps") + assert os.system('echo \'edma\' | sudo tee -a /etc/modules-load.d/edma.conf') == 0 + assert os.system('cd $WORKSPACE/sdk/linux_kernel_drivers/edma && \ + make clean && \ + make && \ + sudo cp edma-drv.ko /lib/modules/`uname -r`/ && \ + sudo depmod && \ + sudo modprobe edma-drv') == 0 + +# Function to install the edma drivers +def install_edma_driver(): + logger.info("Installing the edma drivers") + + # Check if the file exists + if os.path.exists('/etc/modules-load.d/edma.conf'): + logger.info("Edma driver is already installed.") + remove_edma_driver() + edma_driver_install_steps() + +class FpgaLocalImage: + def __init__(self): + self.type = None + self.slot = None + self.agfi = None + self.statusName = None + self.statusCode = None + self.errorName = None + self.errorCode = None + self.shVersion = None + self.vendorId = None + self.deviceId = None + self.dbdf = None + return + + def describe_local_image(self, slot): + ''' +Example output: +$ sudo fpga-describe-local-image -S 0 -R -H +Type FpgaImageSlot FpgaImageId StatusName StatusCode ErrorName ErrorCode ShVersion +AFI 0 agfi-09c2a21805a8b9257 loaded 0 ok 0 0x0729172b +Type FpgaImageSlot VendorId DeviceId DBDF +AFIDEVICE 0 0x1d0f 0xf001 0000:00:1d.0 +''' + p = subprocess.Popen(['sudo', 'fpga-describe-local-image', '-S', str(slot), '-R', '-H'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + (stdout_lines, stderr_lines) = p.communicate() + rc = p.returncode + if rc: + raise RuntimeError("fpga-describe-local-image failed with rc={}\nstdout:\n{}\nstderr:{}".format(rc, stdout_lines, stderr_lines)) + stdout_lines = stdout_lines.split('\n') + (self.type, self.slot, self.agfi, self.statusName, self.statusCode, self.errorName, self.errorCode, self.shVersion) = stdout_lines[1].split() + (type2, slot2, self.vendorId, self.deviceId, self.dbdf) = stdout_lines[3].split() + return + +def fpga_describe_local_image(slot): + fpgaLocalImage = FpgaLocalImage() + fpgaLocalImage.describe_local_image(slot) + return fpgaLocalImage + +def get_instance_id(): + instance_id = urllib2.urlopen('http://169.254.169.254/latest/meta-data/instance-id').read() + return instance_id + +def get_instance_type(): + instance_type = urllib2.urlopen('http://169.254.169.254/latest/meta-data/instance-type').read() + return instance_type + +def get_num_fpga_slots(instance_type): + if re.match(r'f1\.2xlarge', instance_type): + return 1 + elif re.match(r'f1\.16xlarge', instance_type): + return 8 + return 0 diff --git a/shared/lib/aws_fpga_utils/__init__.py b/shared/lib/aws_fpga_utils/__init__.py new file mode 100644 index 000000000..914ee9d0c --- /dev/null +++ b/shared/lib/aws_fpga_utils/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +import boto3 +import datetime +from datetime import datetime, timedelta +import logging +import re +import time + +def get_logger(name): + logger = logging.getLogger(name) + logger_console_handler = logging.StreamHandler() + logger_formatter = logging.Formatter('%(levelname)s:%(asctime)s: %(message)s') + logger_console_handler.setFormatter(logger_formatter) + logger.addHandler(logger_console_handler) + logger.setLevel(logging.INFO) + return logger + +logger = get_logger(__file__) + +def create_sns_subscription(topic_name, email, wait_for_confirmation=True): + WAIT_FOR_CONFIRMATION_DELAY = 10 + MAX_WAIT_FOR_CONFIRMATION_DELAY = timedelta(minutes=10) + sns_client = boto3.client('sns') + # Create the topic if it doesn't exist + # If it already exists just returns the ARN of the existing topic. + topic_arn = sns_client.create_topic(Name=topic_name)['TopicArn'] + logger.debug("topic_arn={}".format(topic_arn)) + + # Search for existing subscription + subscription_found = False + list_resp = sns_client.list_subscriptions_by_topic(TopicArn=topic_arn) + if 'Subscriptions' in list_resp: + for subscription in list_resp['Subscriptions']: + if subscription['Endpoint'] == email and subscription['Protocol'] == 'email': + subscription_found = True + logger.debug("{} already subscribed to {}".format(email, topic_name)) + subscription_arn = subscription['SubscriptionArn'] + + # Create subscription if it doesn't already exist + if not subscription_found: + logger.info("Subscribing {} to the {} topic".format(email, topic_name)) + sub_resp = sns_client.subscribe(TopicArn=topic_arn, Protocol='email', Endpoint=email) + subscription_arn = sub_resp['SubscriptionArn'] + logger.info("Subscription created.") + logger.debug("Subscription ARN={}".format(subscription_arn)) + + if wait_for_confirmation: + # Make sure that subscription has been confirmed + arn_re = re.compile(r'^arn:aws:sns:') + subscription_confirmed = arn_re.match(subscription_arn) + if not subscription_confirmed: + logger.info("Waiting for subscription confirmation before continuing. Check your email.") + start_time = datetime.utcnow() + while not subscription_confirmed: + time.sleep(WAIT_FOR_CONFIRMATION_DELAY) + subscription_found = False + list_resp = sns_client.list_subscriptions_by_topic(TopicArn=topic_arn) + for subscription in list_resp['Subscriptions']: + if subscription['Endpoint'] == email and subscription['Protocol'] == 'email': + subscription_found = True + subscription_arn = subscription['SubscriptionArn'] + if not subscription_found: + logger.error("Subscription not found") + raise RuntimeError("Subscription not found") + subscription_confirmed = arn_re.match(subscription_arn) + if subscription_confirmed: + logger.info("Subscription confirmed") + else: + current_time = datetime.utcnow() + if (current_time - start_time) > MAX_WAIT_FOR_CONFIRMATION_DELAY: + logger.error("Timed out waiting for SNS subscription confirmation.") + raise RuntimeError("Timed out waiting for SNS subscription confirmation.") + + return topic_arn diff --git a/shared/tests/.gitignore b/shared/tests/.gitignore new file mode 100644 index 000000000..bbb4d0a70 --- /dev/null +++ b/shared/tests/.gitignore @@ -0,0 +1,2 @@ +# Output directory used by tests +output/ diff --git a/shared/tests/TESTING.md b/shared/tests/TESTING.md new file mode 100644 index 000000000..1e0e5aebf --- /dev/null +++ b/shared/tests/TESTING.md @@ -0,0 +1,122 @@ +# AWS-FPGA Testing + +## Table of Contents + +* [Overview](#overview) + * [Prerequisities](#prerequisities) + * [Running Tests](#running-tests) + * [References](#references) +* [Shared Testing](#shared-testing) +* [HDK Testing](#hdk-testing) +* [SDK Testing](#sdk-testing) +* [SDAccel Testing](#sdaccel-testing) +* [Jenkins Steps](#jenkins-steps) + +## Overview + +This repository is tested using [pytest](https://docs.pytest.org/en/latest/). +The pytest framework enables and encourages +[Test Driven Development](https://wiki.python.org/moin/TestDrivenDevelopment) so +that development of new features includes unit tests that verify that the feature is +implemented correctly. + +The release process will automatically run all unit tests on all pull requests and all +the unit tests must pass before the pull request can be merged. + +All of the tests are contained in the following directories: +* shared/tests +* hdk/tests +* SDAccel/tests +* sdk/tests + +### Prerequisities + +The testing framework uses the following packages which must be installed prior to running tests: +* [pytest](#pytest) +* [GitPython](#gitpython) +* [boto3](#boto3) + +#### Pytest + +This package contains the pytest program and package that is used to run the tests. +If it is not installed on your system you can install it using the following command: + +```sudo pip install pytest --upgrade``` + +#### GitPython + +This package allows programmatic access to the git repository. +It is used to find the root directory of the repository as well as information about which files have +been changed by a pull request. + +Use the following command to install the package: + +```sudo pip install GitPython --upgrade``` + +#### Boto3 + +The boto3 package is the AWS Python API. +It can be used to start and terminate instances and any other API operation that you have +permissions for. + +Configuration of account credentials is explained in the [Quickstart](http://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration). + +The following command will install the latest release. + +```sudo pip install boto3 --upgrade``` + +### Running Tests + +Run tests in a module: + +`pytest -v `*`test_module.py`* + +Run tests in a directory: + +`pytest -v `*`test-dir`* + +To get a list of tests that will run without running them just add the `--collect-only` option: + +`pytest -v `*`test-dir`*` --collect-only` + +More details can be found on the [pytest web site](https://docs.pytest.org/en/latest/usage.html#specifying-tests-selecting-tests). + +### References + +* [pytest web site](https://docs.pytest.org/en/latest/index.html) +* [python.org PyTest Wiki](https://wiki.python.org/moin/PyTest) + +## Shared Testing + +``` +pytest shared/tests +``` + +### Markdown Broken Links Checking + +The following command will check all markdown files (*.md) in the repository for broken hyperlinks. +It first renders the markdown into HTML and the finds all of the links in the HTML and verifies +that the links point to valid URLs. + +``` +shared/test/bin/check_md_files.py +``` + +This script can also be run as part of pytest using: +``` +pytest shared/tests/test_md_links.py +``` + +## HDK Testing + +## SDK Testing + +## SDAccel Testing + +## Jenkins Steps + +The commands for each Jenkins pipeline step are: + +* ``pytest -v shared/tests/test_md_links.py`` +* ``pytest -v hdk/tests/simulation_tests`` +* ``pytest -v hdk/tests/dcp_generation_tests -k test_cl_hello_world --input_key input_key --output_key output_key`` diff --git a/shared/tests/bin/check_md_links.py b/shared/tests/bin/check_md_links.py new file mode 100755 index 000000000..563bfe18b --- /dev/null +++ b/shared/tests/bin/check_md_links.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and + +# This script looks for broken hyperlinks in all markdown files (*.md) in the repository. +# It returns 0 if it didn't find any broken or non-zero if it found broken links. +# +# Specifics: +# Run at the top of the aws-fpga* repository you cloned. +# The algorithm is: +# 1) find all *.md files in the repo +# 2) For each md file: +# - Render the markdown to xhtml5 +# - Scan the html for links and anchors save them in lists +# 3) Check all of the links: +# - If it is an http link then use urllib2 to try to open the link. +# Exception: Doesn't test links to the AWS forum because that requires a login to access +# - Check each link to other markdown files to make sure that the file exists +# and that if an anchor is specified in the link that the anchor exists. +# - Print out the details of each broken link or missing anchor. +# 4) Display summary of results +# 5) return non-zero if there are broken links. +# + +import argparse +import git +from HTMLParser import HTMLParser +import io +import logging +import markdown +import os +import os.path +from os.path import dirname, realpath +import re +import sys +import urllib2 +try: + import aws_fpga_test_utils + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print "error: {}\nMake sure to source hdk_setup.sh".format(sys.exc_info()[1]) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class HtmlAnchorParser(HTMLParser): + ''' + Class for parsing html to extract links and anchors. + + It handles the start of each tag it finds and parses the tag type and its atrributes. + A link is an "a" tag with an "href" attribute. + An anchor is any tag with an 'id' or 'name' attribute. + + It saves the links in an array and it saves the anchors in a dict so that it is easy + and efficient to check to see if an anchor exists. + ''' + def __init__(self): + HTMLParser.__init__(self) + self.anchors = {} + self.links = [] + return + + def handle_starttag(self, tag, attrs): + #logger.info("started {}".format(tag)) + if tag == 'a': + for attr in attrs: + if attr[0] == 'href': + #logger.info('link: {}'.format(attr[1])) + self.links.append(attr[1]) + for attr in attrs: + if attr[0] in ['id', 'name']: + #logger.info("{} attr: {}".format(tag, attr)) + self.anchors[attr[1]] = 1 + return + +def check_link(url): + ''' + Checks a link whose URL starts with 'http'. + + Ignores links that start with: + * https://forums.aws.amazon.com + because you have to be signed in to the forum for the link to be valid. + + Uses urllib2 to parse the URL and check that it is valid. + + @returns True if the link is valid, False otherwise. + ''' + logger.debug("Checking {}".format(url)) + if re.match(r'https://forums\.aws\.amazon\.com/', url): + return True + try: + if not urllib2.urlparse.urlparse(url).netloc: + return False + + website = urllib2.urlopen(url) + html = website.read() + + if website.code != 200: + return False + except Exception, e: + logger.exception("") + return False + return True + +def contains_link(path): + parent_dir = dirname(path) + if parent_dir == path: + return False + if os.path.islink(path): + logger.debug("Found link: {}".format(path)) + return True + return contains_link(parent_dir) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--exclude', action='store', nargs='*', default=[], help="Paths to ignore") + parser.add_argument('--debug', action='store_true', default=False, help="Enable debug messages") + args = parser.parse_args() + if args.debug: + logger.setLevel(logging.DEBUG) + + # Make sure running at root of repo + repo_dir = aws_fpga_test_utils.get_git_repo_root(dirname(__file__)) + os.chdir(repo_dir) + + num_links = 0 # total number of links we've found in .md files + num_broken = 0 # total number of links which are broken + + # Get a list of markdown files + logger.debug("Getting list of .md files") + md_files = [] + topdir = '.' + for root, dirs, files in os.walk(topdir): + for name in files: + if name.lower().endswith('.md'): + path = os.path.join(root, name) + path = os.path.relpath(path) + exclude = False + for exclude_path in args.exclude: + if re.match(exclude_path, path): + exclude = True + break + if exclude: + logger.warning("Ignoring {}".format(path)) + continue + md_files.append(path) + logger.debug ("Found {} .md files".format(len(md_files))) + + # Render the markdown files to xhtml5 and parse the HTML for links and anchors + md_info = {} + for md_file in md_files: + md_info[md_file] = {} + logger.debug("Rendering {} to html".format(md_file)) + md_info[md_file]['html'] = markdown.markdown(io.open(md_file, 'r', encoding='utf-8').read(), extensions=['markdown.extensions.toc'], output='xhtml5') + html_parser = HtmlAnchorParser() + logger.debug(" Parsing out anchors and links") + html_parser.feed(md_info[md_file]['html']) + md_info[md_file]['anchors'] = html_parser.anchors + md_info[md_file]['links'] = html_parser.links + num_links += len(html_parser.links) + + # Check links + for md_file in md_files: + logger.debug("Checking {}".format(md_file)) + for link in md_info[md_file]['links']: + if re.match(r'http', link): + # Check using urllib2 + if not check_link(link): + logger.error("Broken link in {}: {}".format(md_file, link)) + num_broken += 1 + else: + # File reference + # Split out the anchor in the file, if it exists. + matches = re.search(r'^(.*)#(.+)$', link) + if matches: + link_only = matches.group(1) + anchor = matches.group(2) + else: + link_only = link + anchor = None + file_exists = True + if len(link_only): + # Link points to a different file + md_file_dir = dirname(md_file) + link_path = os.path.join(md_file_dir, link_only) + # github doesn't resolve paths that contain symbolic links + if contains_link(link_path): + logger.error("Broken link in {}: {}".format(md_file, link)) + logger.error(" Link contains a symbolic link.") + num_broken += 1 + link_path = os.path.relpath(link_path) + if not os.path.exists(link_path): + logger.error("Broken link in {}: {}".format(md_file, link)) + logger.error(" File doesn't exist: {}".format(link_path)) + file_exists = False + num_broken += 1 + else: + # Links is an anchor only that points to the same file. + link_path = md_file + if file_exists and anchor: + # If there is an anchor check to make sure it is valid + if not link_path in md_info: + logger.error("Broken link in {}: {}".format(md_file, link)) + logger.error(" No anchors found for {}".format(link_path)) + num_broken += 1 + elif not anchor in md_info[link_path]['anchors']: + logger.error("Broken link in {}: {}".format(md_file, link)) + logger.error(" Anchor missing in {}".format(link_path)) + num_broken += 1 + + logger.info("NUM doc files (.md) : {}".format(len(md_files))) + logger.info("NUM links in doc files: {}".format(num_links)) + logger.info("NUM brokenlinks : {}".format(num_broken)) + + # if no broken links, return code is 0. Else it's the number of broken links. + sys.exit(num_broken) diff --git a/shared/tests/bin/setup_test_build_sdaccel_env.sh b/shared/tests/bin/setup_test_build_sdaccel_env.sh new file mode 100644 index 000000000..d09635f8a --- /dev/null +++ b/shared/tests/bin/setup_test_build_sdaccel_env.sh @@ -0,0 +1,40 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +if ! source $script_dir/setup_test_env.sh; then + return 1 +fi + +if ! source $WORKSPACE/sdaccel_setup.sh; then + return 1 +fi + +if ! source $XILINX_SDX/settings64.sh; then + return 1 +fi diff --git a/shared/tests/bin/setup_test_env.sh b/shared/tests/bin/setup_test_env.sh new file mode 100644 index 000000000..2dd471632 --- /dev/null +++ b/shared/tests/bin/setup_test_env.sh @@ -0,0 +1,55 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +if ! pip2.7 list --format columns | grep pytest; then + if ! sudo pip2.7 install pytest; then + echo "error: Install of pytest failed" + return 1 + fi +fi +if ! pip2.7 list --format columns | grep GitPython; then + if ! sudo pip install GitPython; then + echo "error: Install of GitPython failed" + return 1 + fi +fi +if ! pip2.7 list --format columns | grep boto3; then + if ! sudo pip install boto3; then + echo "error: Install of boto3 failed" + return 1 + fi +fi + +if [ ":$WORKSPACE" == ":" ]; then + export WORKSPACE=$(git rev-parse --show-toplevel) +fi + +export PYTHONPATH=$WORKSPACE/shared/lib:$PYTHONPATH + +export AWS_DEFAULT_REGION=us-east-1 diff --git a/shared/tests/bin/setup_test_hdk_env.sh b/shared/tests/bin/setup_test_hdk_env.sh new file mode 100644 index 000000000..1e1f9852d --- /dev/null +++ b/shared/tests/bin/setup_test_hdk_env.sh @@ -0,0 +1,36 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +if ! source $script_dir/setup_test_env.sh; then + return 1 +fi + +if ! source $WORKSPACE/hdk_setup.sh; then + return 1 +fi diff --git a/shared/tests/bin/setup_test_sdk_env.sh b/shared/tests/bin/setup_test_sdk_env.sh new file mode 100644 index 000000000..828f8f314 --- /dev/null +++ b/shared/tests/bin/setup_test_sdk_env.sh @@ -0,0 +1,36 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +if ! source $script_dir/setup_test_env.sh; then + return 1 +fi + +if ! source $WORKSPACE/sdk_setup.sh; then + return 1 +fi diff --git a/shared/tests/test_md_links.py b/shared/tests/test_md_links.py new file mode 100755 index 000000000..adcdf6d5f --- /dev/null +++ b/shared/tests/test_md_links.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +''' +Pytest module: + +Call using ```pytest test_md_links.py``` + +See TESTING.md for details. +''' + +import os +from os.path import dirname, realpath +import pytest +import sys +import traceback +try: + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print "error: {}\nMake sure to source hdk_setup.sh or shared/tests/bin/setup_test_env*.sh".format(sys.exc_info()[1]) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestMdLinks(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + ''' + + @staticmethod + def setup_class(self): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(self, __file__) + return + + def test_md_links(self): + rc = os.system(self.test_dir + "/bin/check_md_links.py --exclude SDAccel/examples/xilinx") + assert rc == 0 + \ No newline at end of file