Skip to content

Commit

Permalink
Manual: Remove html_entity_table and switch to UTF-8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Oct 20, 2023
1 parent 73d2c19 commit 4e51c62
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 191 deletions.
191 changes: 25 additions & 166 deletions src/common/string_utilities.F
Original file line number Diff line number Diff line change
Expand Up @@ -28,49 +28,6 @@ MODULE string_utilities
CHARACTER(LEN=1), PARAMETER :: question = '?'
CHARACTER(LEN=1), PARAMETER :: newline = ACHAR(10)

INTEGER, PARAMETER :: maxlen_entity_name = 10
CHARACTER(LEN=maxlen_entity_name), DIMENSION(252), PARAMETER :: html_entity_table = &
(/"" ", "" ", "& ", "& ", "' ", "' ", "< ", "< ", &
"> ", "> ", "  ", "  ", "© ", "© ", "° ", "° ", &
"± ", "± ", "² ", "² ", "³ ", "³ ", "· ", "· ", &
"¹ ", "¹ ", "Ä ", "Ä ", "Å ", "Å ", "Ç ", "Ç ", &
"È ", "È ", "É ", "É ", "Ê ", "Ê ", "Ò ", "Ò ", &
"Ó ", "Ó ", "Ô ", "Ô ", "Ö ", "Ö ", "× ", "× ", &
"Ü ", "Ü ", "à ", "à ", "á ", "á ", "â ", "â ", &
"ä ", "ä ", "å ", "å ", "ç ", "ç ", "è ", "è ", &
"é ", "é ", "ê ", "ê ", "ò ", "ò ", "ó ", "ó ", &
"ô ", "ô ", "ö ", "ö ", "ù ", "ù ", "ú ", "ú ", &
"û ", "û ", "ü ", "ü ", "Γ ", "Γ ", "Δ ", "Δ ", &
"Θ ", "Θ ", "Λ ", "Λ ", "Ξ ", "Ξ ", "Π ", "Π ", &
"Σ ", "Σ ", "Φ ", "Φ ", "Ψ ", "Ψ ", "Ω ", "Ω ", &
"α ", "α ", "β ", "β ", "γ ", "γ ", "δ ", "δ ", &
"ε ", "ε ", "ζ ", "ζ ", "η ", "η ", "θ ", "θ ", &
"ι ", "ι ", "κ ", "κ ", "λ ", "λ ", "μ ", "μ ", &
"ν ", "ν ", "ξ ", "ξ ", "ο ", "ο ", "π ", "π ", &
"ρ ", "ρ ", "ς ", "ς ", "σ ", "σ ", "τ ", "τ ", &
"υ ", "υ ", "φ ", "φ ", "χ ", "χ ", "ψ ", "ψ ", &
"ω ", "ω ", "ϑ", "ϑ ", "– ", "– ", "— ", "— ", &
"† ", "† ", "‡ ", "‡ ", "• ", "… ", "… ", "… ", &
"‰ ", "‰ ", "← ", "← ", "↑ ", "↑ ", "→ ", "→ ", &
"↓ ", "↓ ", "↔ ", "↔ ", "⇐ ", "⇐ ", "⇑ ", "⇑ ", &
"⇒ ", "⇒ ", "⇓ ", "⇓ ", "⇔ ", "⇔ ", "∀ ", "∀ ", &
"∂ ", "∂ ", "∇ ", "∇ ", "∈ ", "∈ ", "∉ ", "∉ ", &
"∋ ", "∋ ", "∝ ", "∝ ", "∞ ", "∞ ", "∠ ", "∠ ", &
"∧ ", "∧ ", "∨ ", "∨ ", "∩ ", "∩ ", "∪ ", "∪ ", &
"∼ ", "∼ ", "≅ ", "≅ ", "≈ ", "≈ ", "≠ ", "≠ ", &
"≡ ", "≡ ", "≤ ", "≤ ", "≥ ", "≥ ", "⊂ ", "⊂ ", &
"⊃ ", "⊃ ", "⊄ ", "⊄ ", "⊆ ", "⊆ ", "⊇ ", "⊇ ", &
"⊕ ", "⊕ ", "⊗ ", "⊗ ", "⊥ ", "⊥ ", "⋅ ", "⋅ ", &
"⟨ ", "〈 ", "⟩ ", "〉 "/)

INTEGER, PARAMETER :: maxlen_tag_name = 10
CHARACTER(LEN=maxlen_tag_name), DIMENSION(38), PARAMETER :: html_tag_table = &
(/"a ", "b ", "big ", "blockquote", "br/ ", "code ", "dd ", "del ", &
"div ", "dl ", "dt ", "em ", "h1 ", "h2 ", "h3 ", "h4 ", &
"h5 ", "h6 ", "hr ", "i ", "ins ", "li ", "ol ", "p ", &
"span ", "sub ", "sup ", "table ", "tbody ", "td ", "tfoot ", "th ", &
"thead ", "tr ", "tt ", "u ", "ul ", "pre "/)

PUBLIC :: ascii_to_string, &
compress, &
integer_to_string, &
Expand All @@ -83,12 +40,10 @@ MODULE string_utilities
substitute_special_xml_tokens, &
typo_match, &
uppercase, &
write_html_tables, &
xstring, &
strlcpy_c2f

PUBLIC :: html_entity_table, &
newline
PUBLIC :: newline

INTERFACE s2a
MODULE PROCEDURE s2a_1, s2a_2, s2a_3, s2a_4, s2a_5, s2a_6, s2a_7, s2a_8, s2a_9, &
Expand Down Expand Up @@ -3334,139 +3289,45 @@ SUBROUTINE remove_word(string)
END SUBROUTINE remove_word

! **************************************************************************************************
!> \brief Substitute special XML tokens like "<" or ">" in inp_string.
!> Optionally convert also all lowercase characters to uppercase, if
!> ltu is true.
!> \brief Substitutes the five predefined XML entities: &amp;, &lt;, &gt;, &apos;, and &quot;.
!> \param inp_string ...
!> \param ltu ...
!> \return ...
!> \date 10.03.2005
!> \par History
!> - Enable the use of HTML entity names (06.03.13,MK)
!> \author Matthias Krack (MK)
!> \version 1.1
!> !> \author Ole Schuett
! **************************************************************************************************
FUNCTION substitute_special_xml_tokens(inp_string, ltu) RESULT(out_string)
FUNCTION substitute_special_xml_tokens(inp_string) RESULT(out_string)

CHARACTER(LEN=*), INTENT(IN) :: inp_string
LOGICAL, INTENT(IN), OPTIONAL :: ltu
CHARACTER(LEN=2*LEN(inp_string)) :: out_string

CHARACTER(LEN=LEN(inp_string)) :: string
CHARACTER(LEN=maxlen_entity_name) :: entity_name
CHARACTER(LEN=maxlen_tag_name) :: tag_name
INTEGER :: i, ientry, ilen, j, k
INTEGER :: i, j

string = inp_string
out_string = ""

IF (PRESENT(ltu)) THEN
IF (ltu) CALL uppercase(string)
END IF

i = 0
j = 1
string_loop: DO
i = i + 1
IF (i > LEN_TRIM(string)) EXIT string_loop
IF (string(i:i) == "<") THEN
! Detect valid HTML tags and keep them
ientry = 0
ilen = INDEX(string(i:), ">")
IF ((ilen > 2) .AND. (ilen <= maxlen_tag_name + 3)) THEN
IF (string(i + 1:i + 1) == "/") THEN
tag_name(1:) = string(i + 2:i + ilen - 2)
ELSE
tag_name(1:) = string(i + 1:i + ilen - 2)
END IF
CALL lowercase(tag_name)
tag_loop: DO k = 1, SIZE(html_tag_table)
IF (tag_name == html_tag_table(k)) THEN
ientry = k
EXIT tag_loop
END IF
END DO tag_loop
IF (ientry > 0) THEN ! HTML tag found in table
IF (string(i + 1:i + 1) == "/") THEN
out_string(j:j + ilen + 7) = "&#60;/"//TRIM(tag_name)//"&#62;"
ELSE
out_string(j:j + ilen + 7) = "&#60;"//TRIM(tag_name)//"&#62;"
END IF
i = i + ilen - 1
j = j + ilen + 8
END IF
END IF
! HTML tag not found in table
IF (ientry == 0) THEN
out_string(j:j + 4) = "&#60;"
j = j + 5
END IF
ELSE IF (string(i:i) == ">") THEN
out_string(j:j + 4) = "&#62;"
DO i = 1, LEN_TRIM(inp_string)
SELECT CASE (inp_string(i:i))
CASE ("<")
out_string(j:j + 3) = "&lt;"
j = j + 4
CASE (">")
out_string(j:j + 3) = "&gt;"
j = j + 4
CASE ("&")
out_string(j:j + 4) = "&amp;"
j = j + 5
ELSE IF (string(i:i) == "&") THEN
! Substitute HTML entity names by the corresponding entity number
ientry = 0
ilen = INDEX(string(i:), ";")
IF ((ilen > 2) .AND. (ilen <= maxlen_entity_name)) THEN
entity_name(1:) = string(i:i + ilen - 1)
entity_loop: DO k = 1, SIZE(html_entity_table), 2
IF (entity_name == html_entity_table(k)) THEN
ientry = k + 1
EXIT entity_loop
END IF
END DO entity_loop
i = i + ilen - 1
IF (ientry > 0) THEN ! HTML entity found in table
ilen = LEN_TRIM(html_entity_table(ientry))
out_string(j:j + ilen - 1) = TRIM(ADJUSTL(html_entity_table(ientry)))
j = j + ilen
END IF
END IF
IF (ientry == 0) THEN
out_string(j:j + 4) = "&#38;"
j = j + 5
END IF
ELSE
out_string(j:j) = string(i:i)
CASE ("'")
out_string(j:j + 5) = "&apos;"
j = j + 6
CASE ('"')
out_string(j:j + 5) = "&quot;"
j = j + 6
CASE DEFAULT
out_string(j:j) = inp_string(i:i)
j = j + 1
END IF
END DO string_loop

END FUNCTION substitute_special_xml_tokens

! **************************************************************************************************
!> \brief Write the HTML entity and HTML tag table to unit output_unit
!> \param output_unit ...
!> \date 14.08.2013
!> \author Matthias Krack (MK)
!> \version 1.0
! **************************************************************************************************
SUBROUTINE write_html_tables(output_unit)

INTEGER, INTENT(IN) :: output_unit

INTEGER :: i

WRITE (UNIT=output_unit, FMT="(T2,A)") "<CP2K_HTML>"

DO i = 1, SIZE(html_tag_table)
WRITE (UNIT=output_unit, FMT="(T3,A)") &
"<TAG>", &
" <NAME>&#60;"//TRIM(html_tag_table(i))//"&#62;</NAME>", &
"</TAG>"
END SELECT
END DO
DO i = 1, SIZE(html_entity_table), 2
WRITE (UNIT=output_unit, FMT="(T3,A)") &
"<ENTITY>", &
" <NAME>&#38;"//TRIM(html_entity_table(i) (2:))//"</NAME>", &
" <CODE>&#38;"//TRIM(html_entity_table(i + 1) (2:))//"</CODE>", &
"</ENTITY>"
END DO

WRITE (UNIT=output_unit, FMT="(T2,A)") "</CP2K_HTML>"

END SUBROUTINE write_html_tables
END FUNCTION substitute_special_xml_tokens

! **************************************************************************************************
!> \brief Convert all upper case characters in a string to lower case.
Expand Down Expand Up @@ -3510,7 +3371,6 @@ ELEMENTAL SUBROUTINE uppercase(string)

END SUBROUTINE uppercase

! **************************************************************************************************
! **************************************************************************************************
!> \brief ...
!> \param string ...
Expand All @@ -3532,7 +3392,6 @@ ELEMENTAL SUBROUTINE xstring(string, ia, ib)

END SUBROUTINE xstring

! **************************************************************************************************
! **************************************************************************************************
!> \brief ...
!> \param str1 ...
Expand Down
28 changes: 3 additions & 25 deletions src/start/cp2k_runs.F
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,6 @@ MODULE cp2k_runs
USE rt_propagation, ONLY: rt_prop_setup
USE sirius_interface, ONLY: cp_sirius_finalize,&
cp_sirius_init
USE string_utilities, ONLY: html_entity_table,&
write_html_tables
USE swarm, ONLY: run_swarm
USE tamc_run, ONLY: qs_tamc
USE tmc_setup, ONLY: do_analyze_files,&
Expand Down Expand Up @@ -886,7 +884,7 @@ END SUBROUTINE farming_run
! **************************************************************************************************
SUBROUTINE write_xml_file()
INTEGER :: i, ie, is, unit_number
INTEGER :: i, unit_number
TYPE(section_type), POINTER :: root_section
NULLIFY (root_section)
Expand All @@ -897,26 +895,7 @@ SUBROUTINE write_xml_file()
file_action="WRITE", &
file_status="REPLACE")
WRITE (UNIT=unit_number, FMT="(A)") &
"<?xml version=""1.0"" encoding=""ISO-8859-1""?>", &
"<?xml-stylesheet type=""text/xsl"" href=""cp2k_input.xsl""?>"
!MK Write a HTML translation table
!MK In principle this is only required for non-standard HTML entities
is = 0
ie = 0
WRITE (UNIT=unit_number, FMT="(A)") &
"<!DOCTYPE documentElement["
DO i = 1, SIZE(html_entity_table), 2
is = INDEX(html_entity_table(i), "&") + 1
CPASSERT(is > 0)
ie = INDEX(html_entity_table(i), ";") - 1
CPASSERT(ie >= is)
WRITE (UNIT=unit_number, FMT="(A)") &
"<!ENTITY "//html_entity_table(i) (is:ie)//" """//TRIM(html_entity_table(i + 1))//""">"
END DO
WRITE (UNIT=unit_number, FMT="(A)") &
"]>"
WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'
!MK CP2K input structure
WRITE (UNIT=unit_number, FMT="(A)") &
Expand All @@ -928,8 +907,7 @@ SUBROUTINE write_xml_file()
DO i = 1, root_section%n_subsections
CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
END DO
! Append HTML entity and tag tables
CALL write_html_tables(unit_number)
WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
CALL close_file(unit_number=unit_number)
CALL section_release(root_section)
Expand Down

0 comments on commit 4e51c62

Please sign in to comment.