Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
...
  • 7 commits
  • 15 files changed
  • 0 commit comments
  • 1 contributor
View
9 CMakeLists.txt
@@ -26,6 +26,15 @@ set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
# Some compiler options used globally
set(CMAKE_C_FLAGS "-Wall -Wextra -Werror -std=gnu99 ${CMAKE_C_FLAGS}")
+if (NOT CMAKE_CROSSCOMPILING)
+ # Detect and use optimised compiler flags for the host architecture,
+ # this is specific to x86 family CPUs.
+ include(cmake/OptimizeForArchitecture.cmake)
+ OptimizeForArchitecture()
+ message(STATUS "Adding architecture flags: ${Vc_ARCHITECTURE_FLAGS}")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${Vc_ARCHITECTURE_FLAGS}")
+endif ()
+
add_subdirectory(src)
add_subdirectory(docs)
add_subdirectory(tests)
View
51 cmake/AddCompilerFlag.cmake
@@ -0,0 +1,51 @@
+# Originally developed by Matthias Kretz as part of the Vc library.
+# http://gitorious.org/vc/vc/trees/master/cmake.
+#
+# Licensed under the terms of the GNU LGPL
+
+get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
+include("${_currentDir}/CheckCCompilerFlag.cmake")
+include("${_currentDir}/CheckCXXCompilerFlag.cmake")
+macro(AddCompilerFlag _flag)
+ string(REGEX REPLACE "[-.+/:= ]" "_" _flag_esc "${_flag}")
+ check_c_compiler_flag("${_flag}" check_c_compiler_flag_${_flag_esc})
+ check_cxx_compiler_flag("${_flag}" check_cxx_compiler_flag_${_flag_esc})
+
+ set(_c_flags "CMAKE_C_FLAGS")
+ set(_cxx_flags "CMAKE_CXX_FLAGS")
+ if(${ARGC} EQUAL 2)
+ set(${ARGV1} "${check_cxx_compiler_flag_${_flag_esc}}")
+ elseif(${ARGC} GREATER 2)
+ set(state 0)
+ unset(_c_flags)
+ unset(_cxx_flags)
+ foreach(_arg ${ARGN})
+ if(_arg STREQUAL "C_FLAGS")
+ set(state 1)
+ elseif(_arg STREQUAL "CXX_FLAGS")
+ set(state 2)
+ elseif(_arg STREQUAL "C_RESULT")
+ set(state 3)
+ elseif(_arg STREQUAL "CXX_RESULT")
+ set(state 4)
+ elseif(state EQUAL 1)
+ set(_c_flags "${_arg}")
+ elseif(state EQUAL 2)
+ set(_cxx_flags "${_arg}")
+ elseif(state EQUAL 3)
+ set(${_arg} ${check_c_compiler_flag_${_flag_esc}})
+ elseif(state EQUAL 4)
+ set(${_arg} ${check_cxx_compiler_flag_${_flag_esc}})
+ else()
+ message(FATAL_ERROR "Syntax error for AddCompilerFlag")
+ endif()
+ endforeach()
+ endif()
+
+ if(check_c_compiler_flag_${_flag_esc} AND DEFINED _c_flags)
+ set(${_c_flags} "${${_c_flags}} ${_flag}")
+ endif()
+ if(check_cxx_compiler_flag_${_flag_esc} AND DEFINED _cxx_flags)
+ set(${_cxx_flags} "${${_cxx_flags}} ${_flag}")
+ endif()
+endmacro(AddCompilerFlag)
View
45 cmake/CheckCCompilerFlag.cmake
@@ -0,0 +1,45 @@
+# - Check whether the C compiler supports a given flag.
+# CHECK_C_COMPILER_FLAG(<flag> <var>)
+# <flag> - the compiler flag
+# <var> - variable to store the result
+# This internally calls the check_c_source_compiles macro.
+# See help for CheckCSourceCompiles for a listing of variables
+# that can modify the build.
+
+#=============================================================================
+# Copyright 2006-2009 Kitware, Inc.
+# Copyright 2006 Alexander Neundorf <neundorf@kde.org>
+# Copyright 2011 Matthias Kretz <kretz@kde.org>
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distributed this file outside of CMake, substitute the full
+# License text for the above reference.)
+
+INCLUDE(CheckCSourceCompiles)
+
+MACRO (CHECK_C_COMPILER_FLAG _FLAG _RESULT)
+ SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
+ SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}")
+ CHECK_C_SOURCE_COMPILES("int main() { return 0;}" ${_RESULT}
+ # Some compilers do not fail with a bad flag
+ FAIL_REGEX "error: bad value (.*) for .* switch" # GNU
+ FAIL_REGEX "argument unused during compilation" # clang
+ FAIL_REGEX "is valid for .* but not for C" # GNU
+ FAIL_REGEX "unrecognized .*option" # GNU
+ FAIL_REGEX "ignored for target" # GNU
+ FAIL_REGEX "ignoring unknown option" # MSVC
+ FAIL_REGEX "[Uu]nknown option" # HP
+ FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro
+ FAIL_REGEX "command option .* is not recognized" # XL
+ FAIL_REGEX "WARNING: unknown flag:" # Open64
+ FAIL_REGEX " #10159: " # ICC
+ )
+ SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
+ENDMACRO (CHECK_C_COMPILER_FLAG)
+
View
45 cmake/CheckCXXCompilerFlag.cmake
@@ -0,0 +1,45 @@
+# - Check whether the CXX compiler supports a given flag.
+# CHECK_CXX_COMPILER_FLAG(<flag> <var>)
+# <flag> - the compiler flag
+# <var> - variable to store the result
+# This internally calls the check_cxx_source_compiles macro. See help
+# for CheckCXXSourceCompiles for a listing of variables that can
+# modify the build.
+
+#=============================================================================
+# Copyright 2006-2009 Kitware, Inc.
+# Copyright 2006 Alexander Neundorf <neundorf@kde.org>
+# Copyright 2011 Matthias Kretz <kretz@kde.org>
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distributed this file outside of CMake, substitute the full
+# License text for the above reference.)
+
+INCLUDE(CheckCXXSourceCompiles)
+
+MACRO (CHECK_CXX_COMPILER_FLAG _FLAG _RESULT)
+ SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
+ SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}")
+ CHECK_CXX_SOURCE_COMPILES("int main() { return 0;}" ${_RESULT}
+ # Some compilers do not fail with a bad flag
+ FAIL_REGEX "error: bad value (.*) for .* switch" # GNU
+ FAIL_REGEX "argument unused during compilation" # clang
+ FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU
+ FAIL_REGEX "unrecognized .*option" # GNU
+ FAIL_REGEX "ignored for target" # GNU
+ FAIL_REGEX "ignoring unknown option" # MSVC
+ FAIL_REGEX "[Uu]nknown option" # HP
+ FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro
+ FAIL_REGEX "command option .* is not recognized" # XL
+ FAIL_REGEX "WARNING: unknown flag:" # Open64
+ FAIL_REGEX " #10159: " # ICC
+ )
+ SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
+ENDMACRO (CHECK_CXX_COMPILER_FLAG)
+
View
109 cmake/FindSSE.cmake
@@ -0,0 +1,109 @@
+# Originally developed by Matthias Kretz as part of the Vc library.
+# http://gitorious.org/vc/vc/trees/master/cmake.
+#
+# Licensed under the terms of the GNU LGPL
+
+# Check if SSE instructions are available on the machine where
+# the project is compiled.
+
+IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
+
+ STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
+ IF (SSE2_TRUE)
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ ELSE (SSE2_TRUE)
+ set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+ ENDIF (SSE2_TRUE)
+
+ # /proc/cpuinfo apparently omits sse3 :(
+ STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
+ IF (NOT SSE3_TRUE)
+ STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
+ ENDIF (NOT SSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
+ IF (SSE3_TRUE OR SSSE3_TRUE)
+ set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+ ELSE (SSE3_TRUE OR SSSE3_TRUE)
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ ENDIF (SSE3_TRUE OR SSSE3_TRUE)
+ IF (SSSE3_TRUE)
+ set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+ ELSE (SSSE3_TRUE)
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ ENDIF (SSSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
+ IF (SSE41_TRUE)
+ set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+ ELSE (SSE41_TRUE)
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ ENDIF (SSE41_TRUE)
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+ EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
+ CPUINFO)
+
+ STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
+ IF (SSE2_TRUE)
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ ELSE (SSE2_TRUE)
+ set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+ ENDIF (SSE2_TRUE)
+
+ STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
+ IF (SSE3_TRUE)
+ set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+ ELSE (SSE3_TRUE)
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ ENDIF (SSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
+ IF (SSSE3_TRUE)
+ set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+ ELSE (SSSE3_TRUE)
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ ENDIF (SSSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
+ IF (SSE41_TRUE)
+ set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+ ELSE (SSE41_TRUE)
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ ENDIF (SSE41_TRUE)
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+ # TODO
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+
+if(NOT SSE2_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.")
+endif(NOT SSE2_FOUND)
+if(NOT SSE3_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.")
+endif(NOT SSE3_FOUND)
+if(NOT SSSE3_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.")
+endif(NOT SSSE3_FOUND)
+if(NOT SSE4_1_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
+endif(NOT SSE4_1_FOUND)
+
+mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND)
View
414 cmake/OptimizeForArchitecture.cmake
@@ -0,0 +1,414 @@
+# Originally developed by Matthias Kretz as part of the Vc library.
+# http://gitorious.org/vc/vc/trees/master/cmake.
+#
+# Licensed under the terms of the GNU LGPL
+
+get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
+include("${_currentDir}/AddCompilerFlag.cmake")
+include(CheckIncludeFile)
+
+macro(_my_find _list _value _ret)
+ list(FIND ${_list} "${_value}" _found)
+ if(_found EQUAL -1)
+ set(${_ret} FALSE)
+ else(_found EQUAL -1)
+ set(${_ret} TRUE)
+ endif(_found EQUAL -1)
+endmacro(_my_find)
+
+macro(AutodetectHostArchitecture)
+ set(TARGET_ARCHITECTURE "generic")
+ set(Vc_ARCHITECTURE_FLAGS)
+ set(_vendor_id)
+ set(_cpu_family)
+ set(_cpu_model)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ file(READ "/proc/cpuinfo" _cpuinfo)
+ string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
+ string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
+ string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
+ string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_flags "${_cpuinfo}")
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
+ exec_program("/usr/sbin/sysctl -n machdep.cpu.model" OUTPUT_VARIABLE _cpu_model)
+ exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
+ exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
+ string(TOLOWER "${_cpu_flags}" _cpu_flags)
+ string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
+ get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
+ mark_as_advanced(_vendor_id _cpu_id)
+ string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
+ string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
+ endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ if(_vendor_id STREQUAL "GenuineIntel")
+ if(_cpu_family EQUAL 6)
+ # Any recent Intel CPU except NetBurst
+ if(_cpu_model EQUAL 58)
+ set(TARGET_ARCHITECTURE "ivy-bridge")
+ elseif(_cpu_model EQUAL 46) # Xeon 7500 series
+ set(TARGET_ARCHITECTURE "westmere")
+ elseif(_cpu_model EQUAL 45) # Xeon TNG
+ set(TARGET_ARCHITECTURE "sandy-bridge")
+ elseif(_cpu_model EQUAL 44) # Xeon 5600 series
+ set(TARGET_ARCHITECTURE "westmere")
+ elseif(_cpu_model EQUAL 42) # Core TNG
+ set(TARGET_ARCHITECTURE "sandy-bridge")
+ elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
+ set(TARGET_ARCHITECTURE "westmere")
+ elseif(_cpu_model EQUAL 31) # Core i7/i5
+ set(TARGET_ARCHITECTURE "westmere")
+ elseif(_cpu_model EQUAL 30) # Core i7/i5
+ set(TARGET_ARCHITECTURE "westmere")
+ elseif(_cpu_model EQUAL 29)
+ set(TARGET_ARCHITECTURE "penryn")
+ elseif(_cpu_model EQUAL 28)
+ set(TARGET_ARCHITECTURE "atom")
+ elseif(_cpu_model EQUAL 26)
+ set(TARGET_ARCHITECTURE "nehalem")
+ elseif(_cpu_model EQUAL 23)
+ set(TARGET_ARCHITECTURE "penryn")
+ elseif(_cpu_model EQUAL 15)
+ set(TARGET_ARCHITECTURE "merom")
+ elseif(_cpu_model EQUAL 14)
+ set(TARGET_ARCHITECTURE "core")
+ elseif(_cpu_model LESS 14)
+ message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
+ set(TARGET_ARCHITECTURE "generic")
+ else()
+ message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
+ set(TARGET_ARCHITECTURE "merom")
+ endif()
+ elseif(_cpu_family EQUAL 7) # Itanium (not supported)
+ message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
+ elseif(_cpu_family EQUAL 15) # NetBurst
+ list(APPEND _available_vector_units_list "sse" "sse2")
+ if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
+ endif(_cpu_model GREATER 2)
+ endif(_cpu_family EQUAL 6)
+ elseif(_vendor_id STREQUAL "AuthenticAMD")
+ if(_cpu_family EQUAL 21) # 15h
+ set(TARGET_ARCHITECTURE "bulldozer")
+ elseif(_cpu_family EQUAL 20) # 14h
+ elseif(_cpu_family EQUAL 18) # 12h
+ elseif(_cpu_family EQUAL 16) # 10h
+ set(TARGET_ARCHITECTURE "barcelona")
+ elseif(_cpu_family EQUAL 15)
+ set(TARGET_ARCHITECTURE "k8")
+ if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
+ set(TARGET_ARCHITECTURE "k8-sse3")
+ endif(_cpu_model GREATER 64)
+ endif()
+ endif(_vendor_id STREQUAL "GenuineIntel")
+endmacro()
+
+macro(OptimizeForArchitecture)
+ set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
+ set(_force)
+ if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
+ message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
+ set(_force FORCE)
+ endif()
+ set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
+ mark_as_advanced(_last_target_arch)
+ string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
+
+ set(_march_flag_list)
+ set(_available_vector_units_list)
+
+ if(TARGET_ARCHITECTURE STREQUAL "auto")
+ AutodetectHostArchitecture()
+ message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
+ endif(TARGET_ARCHITECTURE STREQUAL "auto")
+
+ if(TARGET_ARCHITECTURE STREQUAL "core")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
+ elseif(TARGET_ARCHITECTURE STREQUAL "merom")
+ list(APPEND _march_flag_list "merom")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
+ elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
+ list(APPEND _march_flag_list "penryn")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
+ message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
+ if(_cpu_flags MATCHES "sse4_1")
+ message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
+ list(APPEND _available_vector_units_list "sse4.1")
+ else()
+ message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
+ endif()
+ elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
+ list(APPEND _march_flag_list "nehalem")
+ list(APPEND _march_flag_list "corei7")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
+ elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
+ list(APPEND _march_flag_list "westmere")
+ list(APPEND _march_flag_list "corei7")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
+ elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
+ list(APPEND _march_flag_list "core-avx-i")
+ list(APPEND _march_flag_list "corei7-avx")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
+ elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
+ list(APPEND _march_flag_list "sandybridge")
+ list(APPEND _march_flag_list "corei7-avx")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
+ elseif(TARGET_ARCHITECTURE STREQUAL "atom")
+ list(APPEND _march_flag_list "atom")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
+ elseif(TARGET_ARCHITECTURE STREQUAL "k8")
+ list(APPEND _march_flag_list "k8")
+ list(APPEND _available_vector_units_list "sse" "sse2")
+ elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
+ list(APPEND _march_flag_list "k8-sse3")
+ list(APPEND _march_flag_list "k8")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
+ elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
+ list(APPEND _march_flag_list "bdver1")
+ list(APPEND _march_flag_list "bulldozer")
+ list(APPEND _march_flag_list "barcelona")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
+ elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
+ list(APPEND _march_flag_list "bdver1")
+ list(APPEND _march_flag_list "bulldozer")
+ list(APPEND _march_flag_list "barcelona")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
+ elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
+ list(APPEND _march_flag_list "barcelona")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
+ elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
+ list(APPEND _march_flag_list "barcelona")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
+ elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
+ list(APPEND _march_flag_list "barcelona")
+ list(APPEND _march_flag_list "core2")
+ list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
+ elseif(TARGET_ARCHITECTURE STREQUAL "generic")
+ list(APPEND _march_flag_list "generic")
+ elseif(TARGET_ARCHITECTURE STREQUAL "none")
+ # add this clause to remove it from the else clause
+ else(TARGET_ARCHITECTURE STREQUAL "core")
+ message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
+ endif(TARGET_ARCHITECTURE STREQUAL "core")
+
+ if(NOT TARGET_ARCHITECTURE STREQUAL "none")
+ set(_disable_vector_unit_list)
+ set(_enable_vector_unit_list)
+ _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
+ _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
+ _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
+ _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
+ _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
+ _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
+ if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
+ UserWarning("AVX disabled per default because of old/broken compiler")
+ set(AVX_FOUND false)
+ set(XOP_FOUND false)
+ set(FMA4_FOUND false)
+ else()
+ _my_find(_available_vector_units_list "avx" AVX_FOUND)
+ if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
+ UserWarning("FMA4 disabled per default because of old/broken compiler")
+ set(FMA4_FOUND false)
+ else()
+ _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
+ endif()
+ if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
+ UserWarning("XOP disabled per default because of old/broken compiler")
+ set(XOP_FOUND false)
+ else()
+ _my_find(_available_vector_units_list "xop" XOP_FOUND)
+ endif()
+ endif()
+ set(USE_SSE2 ${SSE2_FOUND} CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
+ set(USE_SSE3 ${SSE3_FOUND} CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
+ set(USE_SSSE3 ${SSSE3_FOUND} CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
+ set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
+ set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
+ set(USE_SSE4a ${SSE4a_FOUND} CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
+ set(USE_AVX ${AVX_FOUND} CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
+ set(USE_XOP ${XOP_FOUND} CACHE BOOL "Use XOP." ${_force})
+ set(USE_FMA4 ${FMA4_FOUND} CACHE BOOL "Use FMA4." ${_force})
+ mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
+ if(USE_SSE2)
+ list(APPEND _enable_vector_unit_list "sse2")
+ else(USE_SSE2)
+ list(APPEND _disable_vector_unit_list "sse2")
+ endif(USE_SSE2)
+ if(USE_SSE3)
+ list(APPEND _enable_vector_unit_list "sse3")
+ else(USE_SSE3)
+ list(APPEND _disable_vector_unit_list "sse3")
+ endif(USE_SSE3)
+ if(USE_SSSE3)
+ list(APPEND _enable_vector_unit_list "ssse3")
+ else(USE_SSSE3)
+ list(APPEND _disable_vector_unit_list "ssse3")
+ endif(USE_SSSE3)
+ if(USE_SSE4_1)
+ list(APPEND _enable_vector_unit_list "sse4.1")
+ else(USE_SSE4_1)
+ list(APPEND _disable_vector_unit_list "sse4.1")
+ endif(USE_SSE4_1)
+ if(USE_SSE4_2)
+ list(APPEND _enable_vector_unit_list "sse4.2")
+ else(USE_SSE4_2)
+ list(APPEND _disable_vector_unit_list "sse4.2")
+ endif(USE_SSE4_2)
+ if(USE_SSE4a)
+ list(APPEND _enable_vector_unit_list "sse4a")
+ else(USE_SSE4a)
+ list(APPEND _disable_vector_unit_list "sse4a")
+ endif(USE_SSE4a)
+ if(USE_AVX)
+ list(APPEND _enable_vector_unit_list "avx")
+ # we want SSE intrinsics to result in instructions using the VEX prefix.
+ # Otherwise integer ops (which require the older SSE intrinsics) would
+ # always have a large penalty.
+ list(APPEND _enable_vector_unit_list "sse2avx")
+ else(USE_AVX)
+ list(APPEND _disable_vector_unit_list "avx")
+ endif(USE_AVX)
+ if(USE_XOP)
+ list(APPEND _enable_vector_unit_list "xop")
+ else()
+ list(APPEND _disable_vector_unit_list "xop")
+ endif()
+ if(USE_FMA4)
+ list(APPEND _enable_vector_unit_list "fma4")
+ else()
+ list(APPEND _disable_vector_unit_list "fma4")
+ endif()
+ if(MSVC)
+ # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
+ # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
+ _my_find(_enable_vector_unit_list "avx" _avx)
+ set(_avx_flag FALSE)
+ if(_avx)
+ AddCompilerFlag("/arch:AVX" C_FLAGS Vc_ARCHITECTURE_FLAGS C_RESULT _avx_flag)
+ endif()
+ if(NOT _avx_flag)
+ _my_find(_enable_vector_unit_list "sse2" _found)
+ if(_found)
+ AddCompilerFlag("/arch:SSE2" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ endif()
+ endif()
+ foreach(_flag ${_enable_vector_unit_list})
+ string(TOUPPER "${_flag}" _flag)
+ string(REPLACE "." "_" _flag "__${_flag}__")
+ add_definitions("-D${_flag}")
+ endforeach(_flag)
+ elseif(CMAKE_C_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
+ _my_find(_available_vector_units_list "avx2" _found)
+ if(_found)
+ AddCompilerFlag("-xCORE-AVX2" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "f16c" _found)
+ if(_found)
+ AddCompilerFlag("-xCORE-AVX-I" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "avx" _found)
+ if(_found)
+ AddCompilerFlag("-xAVX" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "sse4.2" _found)
+ if(_found)
+ AddCompilerFlag("-xSSE4.2" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "sse4.1" _found)
+ if(_found)
+ AddCompilerFlag("-xSSE4.1" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "ssse3" _found)
+ if(_found)
+ AddCompilerFlag("-xSSSE3" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ _my_find(_available_vector_units_list "sse3" _found)
+ if(_found)
+ # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
+ _my_find(_march_flag_list "barcelona" _found)
+ if(NOT _found)
+ _my_find(_march_flag_list "k8-sse3" _found)
+ endif(NOT _found)
+ if(_found)
+ AddCompilerFlag("-xSSE2" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ else(_found)
+ AddCompilerFlag("-xSSE3" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ endif(_found)
+ else(_found)
+ _my_find(_available_vector_units_list "sse2" _found)
+ if(_found)
+ AddCompilerFlag("-xSSE2" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ endif(_found)
+ else() # not MSVC and not ICC => GCC, Clang, Open64
+ foreach(_flag ${_march_flag_list})
+ AddCompilerFlag("-march=${_flag}" C_RESULT _good C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ if(_good)
+ break()
+ endif(_good)
+ endforeach(_flag)
+ foreach(_flag ${_enable_vector_unit_list})
+ AddCompilerFlag("-m${_flag}" C_RESULT _result)
+ if(_result)
+ set(_header FALSE)
+ if(_flag STREQUAL "sse3")
+ set(_header "pmmintrin.h")
+ elseif(_flag STREQUAL "ssse3")
+ set(_header "tmmintrin.h")
+ elseif(_flag STREQUAL "sse4.1")
+ set(_header "smmintrin.h")
+ elseif(_flag STREQUAL "sse4.2")
+ set(_header "smmintrin.h")
+ elseif(_flag STREQUAL "sse4a")
+ set(_header "ammintrin.h")
+ elseif(_flag STREQUAL "avx")
+ set(_header "immintrin.h")
+ elseif(_flag STREQUAL "fma4")
+ set(_header "x86intrin.h")
+ elseif(_flag STREQUAL "xop")
+ set(_header "x86intrin.h")
+ endif()
+ set(_resultVar "HAVE_${_header}")
+ string(REPLACE "." "_" _resultVar "${_resultVar}")
+ if(_header)
+ CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
+ if(NOT ${_resultVar})
+ set(_useVar "USE_${_flag}")
+ string(TOUPPER "${_useVar}" _useVar)
+ string(REPLACE "." "_" _useVar "${_useVar}")
+ message(STATUS "disabling ${_useVar} because ${_header} is missing")
+ set(${_useVar} FALSE)
+ list(APPEND _disable_vector_unit_list "${_flag}")
+ endif()
+ endif()
+ if(NOT _header OR ${_resultVar})
+ set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
+ endif()
+ endif()
+ endforeach(_flag)
+ foreach(_flag ${_disable_vector_unit_list})
+ AddCompilerFlag("-mno-${_flag}" C_FLAGS Vc_ARCHITECTURE_FLAGS)
+ endforeach(_flag)
+ endif()
+ endif()
+endmacro(OptimizeForArchitecture)
View
10 cmake/Toolchain-gcc-arm-embedded.cmake
@@ -1,5 +1,15 @@
# CMake Toolchain file for the gcc-arm-embedded toolchain.
# https://launchpad.net/gcc-arm-embedded
+#
+# Copyright (c) 2013 Swift Navigation Inc.
+# Contact: Fergus Noble <fergus@swift-nav.com>
+#
+# This source is subject to the license found in the file 'LICENSE' which must
+# be be distributed together with this source. All other rights reserved.
+#
+# THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
include(CMakeForceCompiler)
View
4 docs/diagrams/2nd_order_loop_filter.tex → docs/diagrams/1st_order_loop_filter.tex
@@ -22,15 +22,15 @@
\node [sum, right of=split2] (sum2) {};
\node [coordinate, right of=sum2, node distance=1.5cm] (output) {};
- \draw [-] (input) -- (split);
+ \draw [-] (input) edge node {$\varepsilon_k$} (split);
\draw [->] (split) -- (ki);
\draw [->] (split) |- (kp);
\draw [->] (ki) -- (sum1);
\draw [-] (sum1) -- (split2);
\draw [->] (split2) -- (z);
\draw [->] (z) -| (sum1);
\draw [->] (split2) -- (sum2);
- \draw [->] (sum2) -- (output);
+ \draw [->] (sum2) edge node {$y_k$} (output);
\draw [->] (kp) -| (sum2);
\draw [-,thin] (sum1.north) -- (sum1.south);
View
4 docs/diagrams/CMakeLists.txt
@@ -29,7 +29,7 @@ function(build_diagrams out_var)
COMMENT "Building LaTeX diagram ${in_f}.tex"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${PDFLATEX_COMPILER}
- -interaction=batchmode
+ #-interaction=batchmode
-output-directory ${CMAKE_CURRENT_BINARY_DIR}
${in_f}.tex
COMMAND ${IMAGEMAGICK_CONVERT}
@@ -48,7 +48,7 @@ if (BUILD_DIAGRAMS)
build_diagrams(DIAGRAM_PNGS
costas_loop
2nd_order_dpll
- 2nd_order_loop_filter
+ 1st_order_loop_filter
wgsecef2azel
)
View
27 include/correlate.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2013 Swift Navigation Inc.
+ * Contact: Fergus Noble <fergus@swift-nav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef LIBSWIFTNAV_CORRELATE_H
+#define LIBSWIFTNAV_CORRELATE_H
+
+#include "common.h"
+
+void track_correlate(s8* samples, s8* code,
+ double* init_code_phase, double code_step,
+ double* init_carr_phase, double carr_step,
+ double* I_E, double* Q_E,
+ double* I_P, double* Q_P,
+ double* I_L, double* Q_L,
+ u32* num_samples);
+
+#endif /* LIBSWIFTNAV_CORRELATE_H */
+
View
42 include/track.h
@@ -16,10 +16,32 @@
#include "common.h"
#include "ephemeris.h"
+/** \addtogroup track
+ * \{ */
+
+/** \addtogroup track_loop
+ * \{ */
+
+/** State structure for the simple loop filter.
+ * Should be initialised with simple_lf_init().
+ */
typedef struct {
- double I, Q;
+ double pgain; /**< Proportional gain. */
+ double igain; /**< Integral gain. */
+ double prev_error; /**< Previous error. */
+ double y; /**< Output variable. */
+} simple_lf_state_t;
+
+/** \} */
+
+/** Structure representing a complex valued correlation. */
+typedef struct {
+ double I; /**< In-phase correlation. */
+ double Q; /**< Quadrature correlation. */
} correlation_t;
+/** \} */
+
typedef struct {
u8 prn;
double code_phase_chips;
@@ -39,8 +61,14 @@ typedef struct {
double sat_vel[3];
} navigation_measurement_t;
-void calc_loop_coeff(double BW, double zeta, double k, double *tau1,
- double *tau2);
+void calc_loop_gains(double bw, double zeta, double k, double sample_freq,
+ double *pgain, double *igain);
+double costas_discriminator(double I, double Q);
+double dll_discriminator(correlation_t cs[3]);
+
+void simple_lf_init(simple_lf_state_t *s, double y0,
+ double pgain, double igain);
+double simple_lf_update(simple_lf_state_t *s, double error);
void calc_navigation_measurement(u8 n_channels, channel_measurement_t meas[],
navigation_measurement_t nav_meas[],
@@ -49,13 +77,5 @@ void calc_navigation_measurement_(u8 n_channels, channel_measurement_t* meas[],
navigation_measurement_t* nav_meas[],
double nav_time, ephemeris_t* ephemerides[]);
-void track_correlate(s8* samples, s8* code,
- double* init_code_phase, double code_step,
- double* init_carr_phase, double carr_step,
- double* I_E, double* Q_E,
- double* I_P, double* Q_P,
- double* I_L, double* Q_L,
- u32* num_samples);
-
#endif /* LIBSWIFTNAV_TRACK_H */
View
1 src/CMakeLists.txt
@@ -12,6 +12,7 @@ set(libswiftnav_SRCS
pvt.c
tropo.c
track.c
+ correlate.c
coord_system.c
linear_algebra.c
prns.c
View
159 src/correlate.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2013 Swift Navigation Inc.
+ * Contact: Fergus Noble <fergus@swift-nav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <math.h>
+
+#ifdef __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#include "correlate.h"
+
+/** \defgroup corr Correlation
+ * Correlators used for tracking.
+ * \{ */
+
+#ifndef __SSSE3__
+
+void track_correlate(s8* samples, s8* code,
+ double* init_code_phase, double code_step, double* init_carr_phase, double carr_step,
+ double* I_E, double* Q_E, double* I_P, double* Q_P, double* I_L, double* Q_L, u32* num_samples)
+{
+ double code_phase = *init_code_phase;
+ double carr_phase = *init_carr_phase;
+
+ double carr_sin = sin(carr_phase);
+ double carr_cos = cos(carr_phase);
+ double sin_delta = sin(carr_step);
+ double cos_delta = cos(carr_step);
+
+ *I_E = *Q_E = *I_P = *Q_P = *I_L = *Q_L = 0;
+
+ double code_E, code_P, code_L;
+ double baseband_Q, baseband_I;
+
+ *num_samples = (int)ceil((1023.0 - code_phase) / code_step);
+
+ for (u32 i=0; i<*num_samples; i++) {
+ /*code_E = get_chip(code, (int)ceil(code_phase-0.5));*/
+ /*code_P = get_chip(code, (int)ceil(code_phase));*/
+ /*code_L = get_chip(code, (int)ceil(code_phase+0.5));*/
+ /*code_E = code[(int)ceil(code_phase-0.5)];*/
+ /*code_P = code[(int)ceil(code_phase)];*/
+ /*code_L = code[(int)ceil(code_phase+0.5)];*/
+ code_E = code[(int)(code_phase+0.5)];
+ code_P = code[(int)(code_phase+1.0)];
+ code_L = code[(int)(code_phase+1.5)];
+
+ baseband_Q = carr_cos * samples[i];
+ baseband_I = carr_sin * samples[i];
+
+ double carr_sin_ = carr_sin*cos_delta + carr_cos*sin_delta;
+ double carr_cos_ = carr_cos*cos_delta - carr_sin*sin_delta;
+ double i_mag = (3.0 - carr_sin_*carr_sin_ - carr_cos_*carr_cos_) / 2.0;
+ carr_sin = carr_sin_ * i_mag;
+ carr_cos = carr_cos_ * i_mag;
+
+ *I_E += code_E * baseband_I;
+ *Q_E += code_E * baseband_Q;
+ *I_P += code_P * baseband_I;
+ *Q_P += code_P * baseband_Q;
+ *I_L += code_L * baseband_I;
+ *Q_L += code_L * baseband_Q;
+
+ code_phase += code_step;
+ carr_phase += carr_step;
+ }
+ *init_code_phase = code_phase - 1023;
+ *init_carr_phase = fmod(carr_phase, 2*M_PI);
+}
+
+#else
+
+void track_correlate(s8* samples, s8* code,
+ double* init_code_phase, double code_step, double* init_carr_phase, double carr_step,
+ double* I_E, double* Q_E, double* I_P, double* Q_P, double* I_L, double* Q_L, u32* num_samples)
+{
+ double code_phase = *init_code_phase;
+
+ float carr_sin = sin(*init_carr_phase);
+ float carr_cos = cos(*init_carr_phase);
+ float sin_delta = sin(carr_step);
+ float cos_delta = cos(carr_step);
+
+ *num_samples = (int)ceil((1023.0 - code_phase) / code_step);
+
+ __m128 IE_QE_IP_QP;
+ __m128 CE_CE_CP_CP;
+ __m128 IL_QL_X_X;
+ __m128 CL_CL_X_X;
+ __m128 S_C_S_C;
+ __m128 BI_BQ_BI_BQ;
+ __m128 dC_dS_dS_dC;
+ __m128 a1, a2, a3;
+
+ IE_QE_IP_QP = _mm_set_ps(0, 0, 0, 0);
+ IL_QL_X_X = _mm_set_ps(0, 0, 0, 0);
+ S_C_S_C = _mm_set_ps(carr_sin, carr_cos, carr_sin, carr_cos);
+ dC_dS_dS_dC = _mm_set_ps(cos_delta, sin_delta, sin_delta, cos_delta);
+
+ for (u32 i=0; i<*num_samples; i++) {
+ CE_CE_CP_CP = _mm_set_ps(code[(int)(code_phase+0.5)],
+ code[(int)(code_phase+0.5)],
+ code[(int)(code_phase+1.0)],
+ code[(int)(code_phase+1.0)]);
+ CL_CL_X_X = _mm_set_ps(code[(int)(code_phase+1.5)],
+ code[(int)(code_phase+1.5)],
+ 0, 0);
+
+ /* Load sample and multiply by sin/cos carrier to mix down to baseband. */
+ a1 = _mm_set1_ps((float)samples[i]); // S, S, S, S
+ BI_BQ_BI_BQ = _mm_mul_ps(a1, S_C_S_C);
+
+ /* Update carrier sin/cos values by multiplying by the constant rotation
+ * matrix corresponding to carr_step. */
+ a1 = _mm_mul_ps(S_C_S_C, dC_dS_dS_dC); // SdC, CdS, SdS, CdC
+ a2 = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(3, 0, 3, 0)); // SdC_CdC_SdC_CdC
+ a3 = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(2, 1, 2, 1)); // CdS_SdS_CdS_SdS
+ // S = SdC + CdS, C = CdC - SdS
+ S_C_S_C = _mm_addsub_ps(a2, a3); // C_S_C_S
+
+ /* Multiply code and baseband signal. */
+ a1 = _mm_mul_ps(CE_CE_CP_CP, BI_BQ_BI_BQ);
+ a2 = _mm_mul_ps(CL_CL_X_X, BI_BQ_BI_BQ);
+
+ /* Increment accumulators. */
+ IE_QE_IP_QP = _mm_add_ps(IE_QE_IP_QP, a1);
+ IL_QL_X_X = _mm_add_ps(IL_QL_X_X, a2);
+
+ code_phase += code_step;
+ }
+ *init_code_phase = code_phase - 1023;
+ *init_carr_phase = fmod(*init_carr_phase + *num_samples*carr_step, 2*M_PI);
+
+ float res[8];
+ _mm_storeu_ps(res, IE_QE_IP_QP);
+ _mm_storeu_ps(res+4, IL_QL_X_X);
+
+ *I_E = res[3];
+ *Q_E = res[2];
+ *I_P = res[1];
+ *Q_P = res[0];
+ *I_L = res[7];
+ *Q_L = res[6];
+}
+
+#endif /* !__SSSE3__ */
+
+/** \} */
+
+
View
235 src/track.c
@@ -11,7 +11,6 @@
*/
#include <math.h>
-/*#include <tmmintrin.h>*/
#include "pvt.h"
#include "prns.h"
@@ -28,15 +27,6 @@
* Functions used by the tracking loops.
* \{ */
-void calc_loop_coeff(double BW, double zeta, double k, double *tau1,
- double *tau2) {
- /* Solve for the natural frequency. */
- double omega_n = BW*8*zeta / (4*zeta*zeta + 1);
-
- *tau1 = k / (omega_n*omega_n);
- *tau2 = 2*zeta/omega_n;
-}
-
/** Calculate coefficients for a 2nd order digital PLL / DLL loop filter.
*
* A second order digital PLL consists of a first-order filter and a
@@ -55,7 +45,7 @@ void calc_loop_coeff(double BW, double zeta, double k, double *tau1,
*
* The first-order loop filter is shown below:
*
- * \image html 2nd_order_loop_filter.png Digital loop filter block diagram.
+ * \image html 1st_order_loop_filter.png Digital loop filter block diagram.
*
* and has transfer function:
*
@@ -96,6 +86,9 @@ void calc_loop_coeff(double BW, double zeta, double k, double *tau1,
* B-Y. Chung, C. Chien, H. Samueli, and R. Jain.
* IEEE Journal on Selected Areas in Communications, 11:1096–1107, 1993.
*
+ * \todo This math is all wrong, these slides show the analysis we want:
+ * http://www.compdsp.com/presentations/Jacobsen/abineau_dpll_analysis.pdf
+ *
* \param bw The loop noise bandwidth, \f$B_L\f$.
* \param zeta The damping ratio, \f$\zeta\f$.
* \param k The loop gain, \f$k\f$.
@@ -105,16 +98,21 @@ void calc_loop_coeff(double BW, double zeta, double k, double *tau1,
* \param igain Where to store the calculated integral gain, \f$k_i\f$.
*/
void calc_loop_gains(double bw, double zeta, double k, double sample_freq,
- double *pgain, double *igain) {
+ double *pgain, double *igain)
+{
/* Find the natural frequency. */
double omega_n = bw*8*zeta / (4*zeta*zeta + 1);
/* Some intermmediate values. */
+/*
double T = 1. / sample_freq;
double denominator = k*(4 + 4*zeta*omega_n*T + omega_n*omega_n*T*T);
*pgain = 8*zeta*omega_n*T / denominator;
*igain = 4*omega_n*omega_n*T*T / denominator;
+*/
+ *igain = omega_n * omega_n / (k * sample_freq);
+ *pgain = 2.0 * zeta * omega_n / k;
}
/** Phase discriminator for a Costas loop.
@@ -127,19 +125,94 @@ void calc_loop_gains(double bw, double zeta, double k, double sample_freq,
* \varepsilon_k = \tan^{-1} \left(\frac{I_k}{Q_k}\right)
* \f]
*
+ * References:
+ * -# Understanding GPS: Principles and Applications.
+ * Elliott D. Kaplan. Artech House, 1996.
+ *
+ * \todo Fix potential divide by zero if Q is zero.
+ *
* \param I The prompt in-phase correlation, \f$I_k\f$.
* \param Q The prompt quadrature correlation, \f$Q_k\f$.
* \return The discriminator value, \f$\varepsilon_k\f$.
*/
-double costas_discriminator(double I, double Q) {
- return atan(I/Q)/(2*M_PI);
+double costas_discriminator(double I, double Q)
+{
+ return atan(Q / I) / (2*M_PI);
}
-double dll_discriminator(correlation_t cs[3]) {
+/** Normalised non-coherent early-minus-late envelope discriminator.
+ *
+ * Implements the normalised non-coherent early-minus-late envelope DLL
+ * discriminator.
+ *
+ * \f[
+ * \varepsilon_k = \frac{1}{2} \frac{E - L}{E + L}
+ * \f]
+ *
+ * where:
+ *
+ * \f[
+ * E = \sqrt{I^2_E + Q^2_E}
+ * \f]
+ * \f[
+ * L = \sqrt{I^2_L + Q^2_L}
+ * \f]
+ *
+ * References:
+ * -# Understanding GPS: Principles and Applications.
+ * Elliott D. Kaplan. Artech House, 1996.
+ *
+ * \param cs The prompt in-phase correlation, \f$I_k\f$.
+ * \return The discriminator value, \f$\varepsilon_k\f$.
+ */
+double dll_discriminator(correlation_t cs[3])
+{
double early_mag = sqrt((double)cs[0].I*cs[0].I + (double)cs[0].Q*cs[0].Q);
double late_mag = sqrt((double)cs[2].I*cs[2].I + (double)cs[2].Q*cs[2].Q);
- return (early_mag - late_mag) / (early_mag + late_mag);
+ return 0.5 * (early_mag - late_mag) / (early_mag + late_mag);
+}
+
+/** Initialise a simple first-order loop filter.
+ * The gains can be calculated using calc_loop_gains().
+ *
+ * \param s The loop filter state struct to initialise.
+ * \param y0 The initial value of the output variable, \f$y_0\f$.
+ * \param pgain The proportional gain, \f$k_p\f$.
+ * \param igain The integral gain, \f$k_i\f$.
+ */
+void simple_lf_init(simple_lf_state_t *s, double y0,
+ double pgain, double igain)
+{
+ s->y = y0;
+ s->prev_error = 0;
+ s->pgain = pgain;
+ s->igain = igain;
+}
+
+/** Update step for the simple first-order loop filter.
+ *
+ * Implements the first-order loop filter as shown below:
+ *
+ * \image html 1st_order_loop_filter.png Digital loop filter block diagram.
+ *
+ * with transfer function:
+ *
+ * \f[
+ * F[z] = \frac{(k_p+k_i) - k_p z^{-1}}{1 - z^{-1}}
+ * \f]
+ *
+ * \param s The loop filter state struct.
+ * \param error The error output from the discriminator, \f$\varepsilon_k\f$.
+ * \return The updated output variable, \f$y_k\f$.
+ */
+double simple_lf_update(simple_lf_state_t *s, double error)
+{
+ s->y += s->pgain * (error - s->prev_error) + \
+ s->igain * error;
+ s->prev_error = error;
+
+ return s->y;
}
/** \} */
@@ -196,135 +269,5 @@ void calc_navigation_measurement_(u8 n_channels, channel_measurement_t* meas[],
}
}
-void track_correlate(s8* samples, s8* code,
- double* init_code_phase, double code_step, double* init_carr_phase, double carr_step,
- double* I_E, double* Q_E, double* I_P, double* Q_P, double* I_L, double* Q_L, u32* num_samples)
-{
- double code_phase = *init_code_phase;
- double carr_phase = *init_carr_phase;
-
- double carr_sin = sin(carr_phase);
- double carr_cos = cos(carr_phase);
- double sin_delta = sin(carr_step);
- double cos_delta = cos(carr_step);
-
- *I_E = *Q_E = *I_P = *Q_P = *I_L = *Q_L = 0;
-
- double code_E, code_P, code_L;
- double baseband_Q, baseband_I;
-
- *num_samples = (int)ceil((1023.0 - code_phase) / code_step);
-
- for (u32 i=0; i<*num_samples; i++) {
- /*code_E = get_chip(code, (int)ceil(code_phase-0.5));*/
- /*code_P = get_chip(code, (int)ceil(code_phase));*/
- /*code_L = get_chip(code, (int)ceil(code_phase+0.5));*/
- /*code_E = code[(int)ceil(code_phase-0.5)];*/
- /*code_P = code[(int)ceil(code_phase)];*/
- /*code_L = code[(int)ceil(code_phase+0.5)];*/
- code_E = code[(int)(code_phase+0.5)];
- code_P = code[(int)(code_phase+1.0)];
- code_L = code[(int)(code_phase+1.5)];
-
- baseband_Q = carr_cos * samples[i];
- baseband_I = carr_sin * samples[i];
-
- double carr_sin_ = carr_sin*cos_delta + carr_cos*sin_delta;
- double carr_cos_ = carr_cos*cos_delta - carr_sin*sin_delta;
- double i_mag = (3.0 - carr_sin_*carr_sin_ - carr_cos_*carr_cos_) / 2.0;
- carr_sin = carr_sin_ * i_mag;
- carr_cos = carr_cos_ * i_mag;
-
- *I_E += code_E * baseband_I;
- *Q_E += code_E * baseband_Q;
- *I_P += code_P * baseband_I;
- *Q_P += code_P * baseband_Q;
- *I_L += code_L * baseband_I;
- *Q_L += code_L * baseband_Q;
-
- code_phase += code_step;
- carr_phase += carr_step;
- }
- *init_code_phase = code_phase - 1023;
- *init_carr_phase = fmod(carr_phase, 2*M_PI);
-}
-
-#if 0
-
-void track_correlate_sse(s8* samples, s8* code,
- double* init_code_phase, double code_step, double* init_carr_phase, double carr_step,
- double* I_E, double* Q_E, double* I_P, double* Q_P, double* I_L, double* Q_L, u32* num_samples)
-{
- double code_phase = *init_code_phase;
-
- float carr_sin = sin(*init_carr_phase);
- float carr_cos = cos(*init_carr_phase);
- float sin_delta = sin(carr_step);
- float cos_delta = cos(carr_step);
-
- *num_samples = (int)ceil((1023.0 - code_phase) / code_step);
-
- __m128 IE_QE_IP_QP;
- __m128 CE_CE_CP_CP;
- __m128 IL_QL_X_X;
- __m128 CL_CL_X_X;
- __m128 S_C_S_C;
- __m128 BI_BQ_BI_BQ;
- __m128 dC_dS_dS_dC;
- __m128 a1, a2, a3;
-
- IE_QE_IP_QP = _mm_set_ps(0, 0, 0, 0);
- IL_QL_X_X = _mm_set_ps(0, 0, 0, 0);
- S_C_S_C = _mm_set_ps(carr_sin, carr_cos, carr_sin, carr_cos);
- dC_dS_dS_dC = _mm_set_ps(cos_delta, sin_delta, sin_delta, cos_delta);
-
- for (u32 i=0; i<*num_samples; i++) {
- CE_CE_CP_CP = _mm_set_ps(code[(int)(code_phase+0.5)],
- code[(int)(code_phase+0.5)],
- code[(int)(code_phase+1.0)],
- code[(int)(code_phase+1.0)]);
- CL_CL_X_X = _mm_set_ps(code[(int)(code_phase+1.5)],
- code[(int)(code_phase+1.5)],
- 0, 0);
-
- /* Load sample and multiply by sin/cos carrier to mix down to baseband. */
- a1 = _mm_set1_ps((float)samples[i]); // S, S, S, S
- BI_BQ_BI_BQ = _mm_mul_ps(a1, S_C_S_C);
-
- /* Update carrier sin/cos values by multiplying by the constant rotation
- * matrix corresponding to carr_step. */
- a1 = _mm_mul_ps(S_C_S_C, dC_dS_dS_dC); // SdC, CdS, SdS, CdC
- a2 = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(3, 0, 3, 0)); // SdC_CdC_SdC_CdC
- a3 = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(2, 1, 2, 1)); // CdS_SdS_CdS_SdS
- // S = SdC + CdS, C = CdC - SdS
- S_C_S_C = _mm_addsub_ps(a2, a3); // C_S_C_S
-
- /* Multiply code and baseband signal. */
- a1 = _mm_mul_ps(CE_CE_CP_CP, BI_BQ_BI_BQ);
- a2 = _mm_mul_ps(CL_CL_X_X, BI_BQ_BI_BQ);
-
- /* Increment accumulators. */
- IE_QE_IP_QP = _mm_add_ps(IE_QE_IP_QP, a1);
- IL_QL_X_X = _mm_add_ps(IL_QL_X_X, a2);
-
- code_phase += code_step;
- }
- *init_code_phase = code_phase - 1023;
- *init_carr_phase = fmod(*init_carr_phase + *num_samples*carr_step, 2*M_PI);
-
- float res[8];
- _mm_storeu_ps(res, IE_QE_IP_QP);
- _mm_storeu_ps(res+4, IL_QL_X_X);
-
- *I_E = res[3];
- *Q_E = res[2];
- *I_P = res[1];
- *Q_P = res[0];
- *I_L = res[7];
- *Q_L = res[6];
-}
-
-#endif
-
/** \} */
View
2 tests/CMakeLists.txt
@@ -8,7 +8,7 @@ else (CMAKE_CROSSCOMPILING)
else (NOT CHECK_FOUND)
include_directories(${CHECK_INCLUDE_DIRS})
- set(TEST_LIBS ${TEST_LIBS} ${CHECK_LIBRARIES} swiftnav)
+ set(TEST_LIBS ${TEST_LIBS} ${CHECK_LIBRARIES} m swiftnav)
include_directories("${PROJECT_SOURCE_DIR}/include")

No commit comments for this range

Something went wrong with that request. Please try again.