Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 79 additions & 29 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ jobs:
macos_version: "14"
platform_id: "macosx_arm64"

- name: "windows-latest"
platform: "windows"
platform_id: "win_amd64"

steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -50,13 +54,20 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install poetry
run: pipx install poetry==1.8.3 --python $(which python3)
shell: bash

- name: Test poetry
- name: Install Poetry
run: python -m pip install poetry==1.8.3

- name: Set up custom PATH and set py version to cpXYZ [windows]
if: ${{matrix.os.platform_id == 'win_amd64'}}
shell: pwsh
run: |
poetry run python --version
echo "C:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\Users\runneradmin\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\nasm" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
$version = "${{ matrix.python-version }}"
$cp_version = "cp$($version -replace '\.', '')"
Add-Content -Path $env:GITHUB_ENV -Value "python_cp_version=$cp_version"

- name: Setup pypi for poetry [for releases only]
if: ${{ startsWith(github.ref, 'refs/tags/') }}
Expand All @@ -65,6 +76,7 @@ jobs:
poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }}

- name: Convert python version to cpXYZ
if: ${{matrix.os.platform_id != 'win_amd64'}}
run: |
version=${{ matrix.python-version }}
cp_version="cp${version//.}"
Expand Down Expand Up @@ -147,20 +159,6 @@ jobs:
mkdir ./dist
cp wheelhouse/*.whl ./dist/

- name: publish wheels (dry run) [macos]
if: matrix.os.platform == 'macos'
run: |
ls -l ./
ls -l ./dist
poetry publish --dry-run --no-interaction -vvv

- name: publish wheels (on publishing) [macos]
if: ${{ matrix.os.platform == 'macos' && startsWith(github.ref, 'refs/tags/') }}
run: |
ls -l ./
ls -l ./dist
poetry publish --no-interaction -vvv

- name: Set up QEMU [linux]
if: matrix.os.name == 'ubuntu-latest'
uses: docker/setup-qemu-action@v3
Expand Down Expand Up @@ -191,17 +189,69 @@ jobs:
mkdir ./dist
cp wheelhouse/*.whl ./dist/

- name: publish wheels (dry run) [linux]
if: matrix.os.platform == 'linux'
- name: Set up MSYS2 [windows]
if: ${{matrix.os.platform_id == 'win_amd64'}}
uses: msys2/setup-msys2@v2
with:
update: true
install: >
mingw-w64-x86_64-toolchain
mingw-w64-i686-toolchain

- name: Set up QPDF external-libs [windows]
if: ${{matrix.os.platform_id == 'win_amd64'}}
shell: pwsh
run: |
New-Item -Path 'C:\windows-libs' -ItemType Directory -Force
Invoke-WebRequest -Uri 'https://github.com/qpdf/external-libs/releases/download/release-2024-06-07/qpdf-external-libs-bin.zip' -OutFile 'C:\windows-libs\qpdf-external-libs-bin.zip'
Expand-Archive -Path 'C:\windows-libs\qpdf-external-libs-bin.zip' -DestinationPath 'C:\windows-libs' -Force
Get-ChildItem -Path 'C:\windows-libs' -Recurse

- name: Set up NASM assembler [windows]
if: ${{matrix.os.platform_id == 'win_amd64'}}
shell: pwsh
run: |
New-Item -Path 'C:\nasm' -ItemType Directory -Force
Invoke-WebRequest -Uri 'https://fossies.org/windows/misc/nasm-2.16.03-win64.zip/nasm-2.16.03/nasm.exe' -OutFile 'C:\nasm\nasm.exe'
nasm -v

- name: Build wheels
if: ${{matrix.os.platform_id == 'win_amd64'}}
env:
CIBW_PLATFORM: windows
CIBW_BUILD_VERBOSITY: 3
CIBW_ARCHS: AMD64
CIBW_PROJECT_REQUIRES_PYTHON: "~=${{ matrix.python-version }}.0"
PKG_CONFIG_PATH: "C:/msys64/mingw64/lib/pkgconfig"
PKG_CONFIG_EXECUTABLE: "C:/msys64/usr/bin/pkg-config.exe"
CMAKE_PREFIX_PATH: "C:/msys64/mingw64;C:/windows-libs/external-libs"
CMAKE_LIBRARY_PATH: "C:/msys64/mingw64/lib;C:/windows-libs/external-libs/lib-mingw64"
CMAKE_INCLUDE_PATH: "C:/msys64/mingw64/include;C:/windows-libs/external-libs/include"
CMAKE_GENERATOR: "MSYS Makefiles"
BUILD_THREADS: 1
ASM_NASM: "C:/nasm/nasm.exe"
shell: pwsh
run: |
Remove-Item -Recurse -Force "C:\Strawberry\"
$env:CMAKE_ARGS = "-DZLIB_LIBRARY=C:/windows-libs/external-libs/lib-mingw64/libz.a -DZLIB_INCLUDE_DIR=C:/windows-libs/external-libs/include -DJPEG_LIBRARY=C:/windows-libs/external-libs/lib-mingw64/libjpeg.a -DJPEG_INCLUDE_DIR=C:/windows-libs/external-libs/include"
poetry install --no-interaction --no-root --only=build
poetry run python -m cibuildwheel --output-dir .\wheelhouse
Get-ChildItem -Path .\wheelhouse -Filter *.whl | Format-List
Get-ChildItem -Path .\wheelhouse -Filter *.whl | ForEach-Object {
$file = $_.FullName
Write-Output "Inspecting $file"
poetry run python -m zipfile --list "$file"
}
if (-not (Test-Path -Path .\dist)) {
New-Item -Path .\dist -ItemType Directory
}
Copy-Item -Path .\wheelhouse\*.whl -Destination .\dist\

- name: publish wheels (dry run)
run: |
ls -l ./
ls -l ./dist
poetry publish --dry-run --no-interaction -vvv

- name: publish wheels (on publishing) [linux]
if: ${{ matrix.os.platform == 'linux' && startsWith(github.ref, 'refs/tags/') }}
- name: publish wheels (on publishing) [for releases only]
if: ${{ startsWith(github.ref, 'refs/tags/') }}
run: |
ls -l ./
ls -l ./dist
poetry publish --no-interaction -vvv

13 changes: 10 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -g3 -DROOT_PATH='\"${TOPLEVEL_PREFIX_PATH}\"' ${ENV_ARCHFLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -O3 -DROOT_PATH='\"${TOPLEVEL_PREFIX_PATH}\"' ${ENV_ARCHFLAGS}")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -O3 -DROOT_PATH='\"${TOPLEVEL_PREFIX_PATH}\"' ${ENV_ARCHFLAGS}")

if (WIN32)
set(TEST_PATH "\\\"${TOPLEVEL_PREFIX_PATH}\\\"")
add_definitions(-DROOT_PATH="\\\"${TOPLEVEL_PREFIX_PATH}\\\"")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -O3 ${ENV_ARCHFLAGS}")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -O3 -DROOT_PATH='\"${TOPLEVEL_PREFIX_PATH}\"' ${ENV_ARCHFLAGS}")
endif()

message(STATUS "cxx-compiler: " ${CMAKE_CXX_COMPILER})
message(STATUS "cxx-flags : " ${CMAKE_CXX_FLAGS})
Expand Down Expand Up @@ -147,7 +155,7 @@ include(FetchContent)
FetchContent_Declare(pybind11
FETCHCONTENT_BASE_DIR extlib_pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.10.0
GIT_TAG v2.13.5
GIT_SHALLOW TRUE
)
FetchContent_MakeAvailable(pybind11)
Expand Down Expand Up @@ -176,4 +184,3 @@ install(TARGETS docling_parse DESTINATION "${TOPLEVEL_PREFIX_PATH}/docling_parse

# do a bunch of result based tests
# do_test(Tutorial 4 "4 is 2")

4 changes: 2 additions & 2 deletions cmake/extlib_pybind11.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ include(ExternalProject)
include(CMakeParseArguments)

set(PYBIND11_URL https://github.com/pybind/pybind11.git)
set(PYBIND11_TAG v2.10.0)
set(PYBIND11_TAG v2.13.5)

ExternalProject_Add(extlib_pybind11
PREFIX extlib_pybind11
Expand All @@ -25,4 +25,4 @@ ExternalProject_Add(extlib_pybind11

add_library(pybind11 INTERFACE)
add_custom_target(install_extlib_pybind11 DEPENDS extlib_pybind11)
add_dependencies(pybind11 install_extlib_pybind11)
add_dependencies(pybind11 install_extlib_pybind11)
4 changes: 2 additions & 2 deletions cmake/os_opts.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ message(STATUS "entering in 'os_opts.cmake'")
if(WIN32)
message(STATUS "compiling on windows")

set(LIB_LINK qpdf jpeg utf8 z)

elseif(APPLE)
message(STATUS "compiling on mac-osx")

Expand All @@ -27,5 +29,3 @@ elseif(UNIX)
list(APPEND LIB_LINK dl m pthread rt resolv)

endif()


1 change: 1 addition & 0 deletions src/include_libs.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
#include <assert.h>
#include <filesystem>
#include <iostream>
#include <string>
3 changes: 2 additions & 1 deletion src/proj_folders/pdf_interface/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,8 @@ namespace pdf_lib

// FIXME
//std::string font_data_dir = PDF_DATA_DIR;
std::string font_data_dir = resource_utils::get_resources_dir(true);
//std::string font_data_dir = resource_utils::get_resources_dir(true);
std::string font_data_dir = resource_utils::get_resources_dir(true).string();

std::vector<std::string> font_data_key = {"font-data", "path"};
if (input.has(font_data_key))
Expand Down
29 changes: 20 additions & 9 deletions src/proj_folders/pdf_library/core/tools/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ namespace pdf_lib
std::pair<scalar_type, scalar_type> page_r0,
std::pair<scalar_type, scalar_type> page_dim);

template<typename scalar_type, long unsigned int N>
// template<typename scalar_type, long unsigned int N>
// std::vector<scalar_type> to_vector(std::array<scalar_type, N> arr);
template<typename scalar_type, std::size_t N>
std::vector<scalar_type> to_vector(std::array<scalar_type, N> arr);

template<typename scalar_type>
std::vector<scalar_type> get_bvec(std::array<scalar_type, 4> rhs);

Expand Down Expand Up @@ -308,14 +310,24 @@ namespace pdf_lib
}
}

template<typename scalar_type, long unsigned int N>
// template<typename scalar_type, long unsigned int N>
// std::vector<scalar_type> writer::to_vector(std::array<scalar_type, N> arr)
// {
// std::vector<scalar_type> result(N, 0);

// for(int l=0; l<N; l++)
// result[l] = arr[l];

// return result;
// }
template<typename scalar_type, std::size_t N>
std::vector<scalar_type> writer::to_vector(std::array<scalar_type, N> arr)
{
std::vector<scalar_type> result(N, 0);
std::vector<scalar_type> result(arr.size(), 0);

for(std::size_t l = 0; l < arr.size(); ++l)
result[l] = arr[l];

for(int l=0; l<N; l++)
result[l] = arr[l];

return result;
}

Expand Down Expand Up @@ -745,5 +757,4 @@ namespace pdf_lib

}

#endif

#endif
3 changes: 2 additions & 1 deletion src/proj_folders/pdf_library/qpdf/parser/cid_cmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ namespace pdf_lib

// FIXME
//directory = PDF_DATA_DIR;
directory = resource_utils::get_resources_dir(true);
//directory = resource_utils::get_resources_dir(true);
directory = resource_utils::get_resources_dir(true).string();

directory += directory.back()!='/'? "/":"";

Expand Down
3 changes: 2 additions & 1 deletion src/proj_folders/pdf_library/qpdf/parser/cid_to_utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ namespace pdf_lib
{
// FIXME
//directory = PDF_DATA_DIR;
directory = resource_utils::get_resources_dir(true);
//directory = resource_utils::get_resources_dir(true);
directory = resource_utils::get_resources_dir(true).string();

directory += directory.back()!='/'? "/":"";

Expand Down
3 changes: 2 additions & 1 deletion src/pybind/docling_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ namespace docling
docling_resources(),
interface()
{
std::string font_data_dir = resource_utils::get_resources_dir(true);
// std::string font_data_dir = resource_utils::get_resources_dir(true);
std::string font_data_dir = resource_utils::get_resources_dir(true).string();

pdf_lib::core::object<pdf_lib::core::FONT>::initialize(font_data_dir);
}
Expand Down
3 changes: 2 additions & 1 deletion src/pybind/docling_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ namespace docling

std::string docling_resources::get_resources_path()
{
return resource_utils::get_resources_dir(true);
// return resource_utils::get_resources_dir(true);
return resource_utils::get_resources_dir(true).string();
}

bool docling_resources::set_resources_path()
Expand Down
51 changes: 37 additions & 14 deletions src/utils/logging_library/logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,27 +186,50 @@ namespace logging_lib {
return Logger(domain);
}

// std::function<void(std::string, logging_level_type)> Logger::to_file(std::string filename, bool append)
// {
// static std::map<std::string, std::ofstream> streams;
// static char buffer[32];

// if(not streams.count(filename))
// streams[filename].open(filename, append ? std::ios::app : std::ios::out);

// std::ofstream & stream = streams[filename];
// return [&](std::string string, logging_level_type type) {

// auto time = std::chrono::system_clock::now();
// std::time_t t = std::chrono::system_clock::to_time_t(time);

// std::tm timeinfo;
// localtime_r(&t, &timeinfo);
// strftime(buffer, 32, "%Y-%m-%d %H:%M:%S", &timeinfo);

// stream << buffer << " [" << std::setw(7) << to_string(type) << "] " << string << std::endl;
// };
// }

std::function<void(std::string, logging_level_type)> Logger::to_file(std::string filename, bool append)
{
static std::map<std::string, std::ofstream> streams;
static char buffer[32];

if(not streams.count(filename))
streams[filename].open(filename, append ? std::ios::app : std::ios::out);

std::ofstream & stream = streams[filename];
return [&](std::string string, logging_level_type type) {

auto time = std::chrono::system_clock::now();
std::time_t t = std::chrono::system_clock::to_time_t(time);

struct tm * timeinfo;
timeinfo = localtime(&t);
strftime(buffer,32,"%F %T", timeinfo);

stream << buffer << " [" << std::setw(7) << to_string(type) << "] " << string << std::endl;

#ifdef _WIN32
struct tm timeinfo;
localtime_s(&timeinfo, &t);
#else
struct tm timeinfo;
localtime_r(&t, &timeinfo);
#endif

char buffer[80];
strftime(buffer, 80, "%d-%m-%Y %I:%M:%S", &timeinfo);
std::string str(buffer);

streams[filename] << buffer << " [" << std::setw(7) << to_string(type) << "] " << string;
};
}
};

void Logger::warn(std::string str, std::string domain)
{
Expand Down
Loading