Skip to content

Commit

Permalink
Partial utf8proc porting progress
Browse files Browse the repository at this point in the history
* Data generator from JuliaStrings/utf8proc#258
* Partial porting progress of utf8proc functions to Julia
* Some APIs from Base.Unicode and Unicode stdlib
  • Loading branch information
c42f committed Dec 21, 2023
1 parent 29404aa commit 0dfdb8f
Show file tree
Hide file tree
Showing 7 changed files with 18,955 additions and 1 deletion.
51 changes: 51 additions & 0 deletions data/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Unicode data generation rules. Except for the test data files, most
# users will not use these Makefile rules, which are primarily to re-generate
# data.jl when we get a new Unicode version or charwidth data

# programs
CURL=curl
JULIA=julia
CURLFLAGS = --retry 5 --location

.PHONY: clean rawdata

.DELETE_ON_ERROR:

data.new.jl: data_generator.jl rawdata
$(JULIA) --project=. data_generator.jl Julia > $@

# Unicode data version (must also update utf8proc_unicode_version function)
UNICODE_VERSION=15.1.0

rawdata: UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt EastAsianWidth.txt emoji-data.txt

UnicodeData.txt:
$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt

EastAsianWidth.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt

GraphemeBreakProperty.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt

DerivedCoreProperties.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt

CompositionExclusions.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CompositionExclusions.txt

CaseFolding.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CaseFolding.txt

NormalizationTest.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt

GraphemeBreakTest.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt

emoji-data.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt

clean:
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt emoji-data.txt
rm -f data.new.jl
69 changes: 69 additions & 0 deletions data/Manifest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.9.3"
manifest_format = "2.0"
project_hash = "bc0740aa2247b17bd49ba693fb87f41bbbddead6"

[[deps.Adapt]]
deps = ["LinearAlgebra", "Requires"]
git-tree-sha1 = "cde29ddf7e5726c9fb511f340244ea3481267608"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.7.2"

[deps.Adapt.extensions]
AdaptStaticArraysExt = "StaticArrays"

[deps.Adapt.weakdeps]
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "1.0.5+0"

[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[deps.LinearAlgebra]]
deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[deps.OffsetArrays]]
deps = ["Adapt"]
git-tree-sha1 = "2ac17d29c523ce1cd38e27785a7d23024853a4bb"
uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
version = "1.12.10"

[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.21+4"

[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[deps.Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.3.0"

[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"

[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.8.0+0"
2 changes: 2 additions & 0 deletions data/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[deps]
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Loading

0 comments on commit 0dfdb8f

Please sign in to comment.