From 5ef1f3ad23fc2f1aed8639c0aecff9a692bd1c67 Mon Sep 17 00:00:00 2001 From: Robert Sipka Date: Mon, 27 Feb 2017 11:30:03 +0100 Subject: [PATCH] Refactor the generator scripts for unicode tables Extract the source code generator methods into a separated `unicode_c_source.py` script. Fix the generator scripts to make them compatible with both Python2 and Python3. Remove pylint warnings. JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com --- jerry-core/lit/lit-unicode-conversions.inc.h | 8 +- jerry-core/lit/lit-unicode-ranges.inc.h | 7 +- tools/unicode_c_source.py | 75 ++ tools/unicode_case_conversion.py | 886 +++++++++---------- tools/unicode_ranges.py | 350 +++----- 5 files changed, 617 insertions(+), 709 deletions(-) create mode 100644 tools/unicode_c_source.py mode change 100644 => 100755 tools/unicode_ranges.py diff --git a/jerry-core/lit/lit-unicode-conversions.inc.h b/jerry-core/lit/lit-unicode-conversions.inc.h index 0e7725733a..1efc1104f6 100644 --- a/jerry-core/lit/lit-unicode-conversions.inc.h +++ b/jerry-core/lit/lit-unicode-conversions.inc.h @@ -11,10 +11,11 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * - * This file is automatically generated by the unicode_case_conversion.py script. Do not edit! */ +/* This file is automatically generated by the unicode_case_conversion.py script + * from UnicodeData-9.0.0.txt and SpecialCasing-9.0.0.txt files. Do not edit! */ + /* Contains start points of character case ranges (these are bidirectional conversions). */ static const uint16_t jerry_character_case_ranges[] JERRY_CONST_DATA = { @@ -154,9 +155,8 @@ static const uint16_t jerry_upper_case_conversions[] JERRY_CONST_DATA = 0x0046, 0x004c }; -/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */ +/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */ static const uint8_t jerry_upper_case_conversion_counters[] JERRY_CONST_DATA = { 0x001c, 0x002c, 0x0010 }; - diff --git a/jerry-core/lit/lit-unicode-ranges.inc.h b/jerry-core/lit/lit-unicode-ranges.inc.h index b322357ac7..b050f249ff 100644 --- a/jerry-core/lit/lit-unicode-ranges.inc.h +++ b/jerry-core/lit/lit-unicode-ranges.inc.h @@ -11,11 +11,11 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * - * This file is automatically generated by the unicode_ranges.py script - * from UnicodeData-3.0.0.txt. Do not edit! */ +/* This file is automatically generated by the unicode_ranges.py script + * from UnicodeData-3.0.0.txt. Do not edit! */ + /** * Character interval starting points for the unicode letters. * @@ -180,4 +180,3 @@ static const uint16_t jerry_unicode_separator_chars[] JERRY_CONST_DATA = { 0x1680, 0x180e, 0x202f, 0x205f, 0x3000 }; - diff --git a/tools/unicode_c_source.py b/tools/unicode_c_source.py new file mode 100644 index 0000000000..7c5d66ba9f --- /dev/null +++ b/tools/unicode_c_source.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +# Copyright JS Foundation and other contributors, http://js.foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LICENSE = """/* Copyright JS Foundation and other contributors, http://js.foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */""" + + +class Source(object): + def __init__(self, filepath): + self.__filepath = filepath + self.__header = [LICENSE, ""] + self.__data = [] + + def complete_header(self, completion): + self.__header.append(completion) + self.__header.append("") # for an extra empty line + + def add_table(self, table, table_name, table_type, table_descr): + self.__data.append(table_descr) + self.__data.append("static const %s jerry_%s[] JERRY_CONST_DATA =" % (table_type, table_name)) + self.__data.append("{") + self.__data.append(format_code(table, 1)) + self.__data.append("};") + self.__data.append("") # for an extra empty line + + def generate(self): + with open(self.__filepath, 'w') as genereted_source: + genereted_source.write("\n".join(self.__header)) + genereted_source.write("\n".join(self.__data)) + + +def regroup(list_to_group, num): + return [list_to_group[i:i+num] for i in range(0, len(list_to_group), num)] + + +def hex_format(char): + if isinstance(char, str): + char = ord(char) + + return "0x{:04x}".format(char) + + +def format_code(code, indent): + lines = [] + # convert all characters to hex format + converted_code = [hex_format(char) for char in code] + # 10 hex number per line + for line in regroup(", ".join(converted_code), 10 * 8): + lines.append((' ' * indent) + line.strip()) + return "\n".join(lines) diff --git a/tools/unicode_case_conversion.py b/tools/unicode_case_conversion.py index 473953c588..9f90846ac7 100755 --- a/tools/unicode_case_conversion.py +++ b/tools/unicode_case_conversion.py @@ -14,6 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import print_function +from settings import PROJECT_DIR +from unicode_c_source import Source + import argparse import csv import itertools @@ -21,14 +25,181 @@ import sys import warnings -try: - unichr -except NameError: - unichr = chr +CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h') + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('--unicode-data', + metavar='FILE', + action='store', + required=True, + help='specify the unicode data file') + + parser.add_argument('--special-casing', + metavar='FILE', + action='store', + required=True, + help='specify the special casing file') + + parser.add_argument('--c-source', + metavar='FILE', + action='store', + default=CONVERSIONS_C_SOURCE, + help='specify the output c source for the conversion tables (default: %(default)s)') + + script_args = parser.parse_args() + + if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK): + print('The %s file is missing or not readable!' % script_args.unicode_data) + sys.exit(1) + + if not os.path.isfile(script_args.special_casing) or not os.access(script_args.special_casing, os.R_OK): + print('The %s file is missing or not readable!' % script_args.special_casing) + sys.exit(1) + + conv_tables = ConversionTables(script_args.unicode_data, script_args.special_casing) + + character_case_ranges = conv_tables.get_character_case_ranges() + character_pair_ranges = conv_tables.get_character_pair_ranges() + character_pairs = conv_tables.get_character_pairs() + upper_case_special_ranges = conv_tables.get_upper_case_special_ranges() + lower_case_ranges = conv_tables.get_lower_case_ranges() + lower_case_conversions = conv_tables.get_lower_case_conversions() + upper_case_conversions = conv_tables.get_upper_case_conversions() + + c_source = Source(script_args.c_source) + + unicode_file = os.path.basename(script_args.unicode_data) + spec_casing_file = os.path.basename(script_args.special_casing) + + header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__), + " * from %s and %s files. Do not edit! */" % (unicode_file, spec_casing_file), + ""] + + c_source.complete_header("\n".join(header_completion)) + + c_source.add_table(character_case_ranges[0], + "character_case_ranges", + "uint16_t", + ("/* Contains start points of character case ranges " + "(these are bidirectional conversions). */")) + + c_source.add_table(character_case_ranges[1], + "character_case_range_lengths", + "uint8_t", + "/* Interval lengths of start points in `character_case_ranges` table. */") + + c_source.add_table(character_pair_ranges[0], + "character_pair_ranges", + "uint16_t", + "/* Contains the start points of bidirectional conversion ranges. */") + + c_source.add_table(character_pair_ranges[1], + "character_pair_range_lengths", + "uint8_t", + "/* Interval lengths of start points in `character_pair_ranges` table. */") + + c_source.add_table(character_pairs, + "character_pairs", + "uint16_t", + "/* Contains lower/upper case bidirectional conversion pairs. */") + + c_source.add_table(upper_case_special_ranges[0], + "upper_case_special_ranges", + "uint16_t", + ("/* Contains start points of one-to-two uppercase ranges where the second character\n" + " * is always the same.\n" + " */")) + + c_source.add_table(upper_case_special_ranges[1], + "upper_case_special_range_lengths", + "uint8_t", + "/* Interval lengths for start points in `upper_case_special_ranges` table. */") + + c_source.add_table(lower_case_ranges[0], + "lower_case_ranges", + "uint16_t", + "/* Contains start points of lowercase ranges. */") + + c_source.add_table(lower_case_ranges[1], + "lower_case_range_lengths", + "uint8_t", + "/* Interval lengths for start points in `lower_case_ranges` table. */") + + c_source.add_table(lower_case_conversions[0], + "lower_case_conversions", + "uint16_t", + ("/* The remaining lowercase conversions. The lowercase variant can " + "be one-to-three character long. */")) + + c_source.add_table(lower_case_conversions[1], + "lower_case_conversion_counters", + "uint8_t", + "/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */") + + c_source.add_table(upper_case_conversions[0], + "upper_case_conversions", + "uint16_t", + ("/* The remaining uppercase conversions. The uppercase variant can " + "be one-to-three character long. */")) + + c_source.add_table(upper_case_conversions[1], + "upper_case_conversion_counters", + "uint8_t", + "/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */") + + c_source.generate() + + +class ConversionTables(object): + def __init__(self, unicode_data_file, special_casing_file): + """ + Read the corresponding unicode values of lower and upper case letters and store these in tables + + :param unicode_data_file: Contains the default case mappings (one-to-one mappings). + :param special_casing_file: Contains additional informative case mappings that are either not one-to-one + or which are context-sensitive. + """ + + case_mappings = read_case_mappings(unicode_data_file, special_casing_file) + lower_case = case_mappings[0] + upper_case = case_mappings[1] + + self.__character_case_ranges = extract_ranges(lower_case, upper_case) + self.__character_pair_ranges = extract_character_pair_ranges(lower_case, upper_case) + self.__character_pairs = extract_character_pairs(lower_case, upper_case) + self.__upper_case_special_ranges = extract_special_ranges(upper_case) + self.__lower_case_ranges = extract_ranges(lower_case) + self.__lower_case_conversions = extract_conversions(lower_case) + self.__upper_case_conversions = extract_conversions(upper_case) + + if lower_case: + warnings.warn('Not all elements extracted from the lowercase table!') + if upper_case: + warnings.warn('Not all elements extracted from the uppercase table!') + + def get_character_case_ranges(self): + return self.__character_case_ranges + + def get_character_pair_ranges(self): + return self.__character_pair_ranges + + def get_character_pairs(self): + return self.__character_pairs + + def get_upper_case_special_ranges(self): + return self.__upper_case_special_ranges + + def get_lower_case_ranges(self): + return self.__lower_case_ranges + + def get_lower_case_conversions(self): + return self.__lower_case_conversions -TOOLS_DIR = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.normpath(os.path.join(TOOLS_DIR, '..')) -C_SOURCE_FILE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h') + def get_upper_case_conversions(self): + return self.__upper_case_conversions def parse_unicode_sequence(raw_data): @@ -46,7 +217,12 @@ def parse_unicode_sequence(raw_data): continue # Convert it to unicode code point (from hex value without 0x prefix) - result += unichr(int(unicode_char, 16)) + hex_val = int(unicode_char, 16) + try: + result += unichr(hex_val) + except NameError: + result += chr(hex_val) + return result @@ -60,8 +236,8 @@ def read_case_mappings(unicode_data_file, special_casing_file): :return: Upper and lower case mappings. """ - lower_case_mapping = CaseMapping() - upper_case_mapping = CaseMapping() + lower_case_mapping = {} + upper_case_mapping = {} # Add one-to-one mappings with open(unicode_data_file) as unicode_data: @@ -78,10 +254,10 @@ def read_case_mappings(unicode_data_file, special_casing_file): small_letter = line[13] if capital_letter: - upper_case_mapping.add(letter_id, parse_unicode_sequence(capital_letter)) + upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter) if small_letter: - lower_case_mapping.add(letter_id, parse_unicode_sequence(small_letter)) + lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter) # Update the conversion tables with the special cases with open(special_casing_file) as special_casing: @@ -107,575 +283,303 @@ def read_case_mappings(unicode_data_file, special_casing_file): small_letter = parse_unicode_sequence(line[1]) capital_letter = parse_unicode_sequence(line[3]) - lower_case_mapping.add(letter_id, small_letter) - upper_case_mapping.add(letter_id, capital_letter) + lower_case_mapping[letter_id] = small_letter + upper_case_mapping[letter_id] = capital_letter return lower_case_mapping, upper_case_mapping -class CaseMapping(dict): - """Class defines an informative, default mapping.""" - - def __init__(self): - """Initialize the case mapping table.""" - self._conversion_table = {} - - def add(self, letter_id, mapped_value): - """ - Add mapped value of the unicode letter. - - :param letter_id: An integer, representing the unicode code point of the character. - :param mapped_value: Corresponding character of the case type. - """ - self._conversion_table[letter_id] = mapped_value - - def remove(self, letter_id): - """ - Remove mapping from the conversion table. - - :param letter_id: An integer, representing the unicode code point of the character. - """ - del self._conversion_table[letter_id] - - def get_value(self, letter_id): - """ - Get the mapped value of the given unicode character. - - :param letter_id: An integer, representing the unicode code point of the character. - :return: The mapped value of the character. - """ - - if self.contains(letter_id): - return self._conversion_table[letter_id] - - return None - - def get_conversion_distance(self, letter_id): - """ - Calculate the distance between the unicode character and its mapped value - (only needs and works with one-to-one mappings). - - :param letter_id: An integer, representing the unicode code point of the character. - :return: The conversion distance. - """ - - mapped_value = self.get_value(letter_id) - - if mapped_value and len(mapped_value) == 1: - return ord(mapped_value) - letter_id +def extract_ranges(letter_case, reverse_letter_case=None): + """ + Extract ranges from case mappings + (the second param is optional, if it's not empty, a range will contains bidirectional conversions only). - return None + :param letter_id: An integer, representing the unicode code point of the character. + :param letter_case: case mappings dictionary which contains the conversions. + :param reverse_letter_case: Comparable case mapping table which contains the return direction of the conversion. + :return: A table with the start points and their mapped value, and another table with the lengths of the ranges. + """ - def is_bidirectional_conversion(self, letter_id, other_case_mapping): - """ - Check that two unicode value are also a mapping value of each other. + in_range = False + range_position = -1 + ranges = [] + range_lengths = [] - :param letter_id: An integer, representing the unicode code point of the character. - :param other_case_mapping: Comparable case mapping table which possible contains - the return direction of the conversion. - :return: True, if it's a reverible conversion, false otherwise. - """ + for letter_id in sorted(letter_case.keys()): + prev_letter_id = letter_id - 1 - if not self.contains(letter_id): - return False + # One-way conversions + if reverse_letter_case is None: + if len(letter_case[letter_id]) > 1: + in_range = False + continue - # Check one-to-one mapping - mapped_value = self.get_value(letter_id) - if len(mapped_value) > 1: - return False + if prev_letter_id not in letter_case or len(letter_case[prev_letter_id]) > 1: + in_range = False + continue - # Check two way conversions - mapped_value_id = ord(mapped_value) - if other_case_mapping.get_value(mapped_value_id) != unichr(letter_id): - return False + # Two way conversions + else: + if not is_bidirectional_conversion(letter_id, letter_case, reverse_letter_case): + in_range = False + continue - return True + if not is_bidirectional_conversion(prev_letter_id, letter_case, reverse_letter_case): + in_range = False + continue - def contains(self, letter_id): - """ - Check that a unicode character is in the conversion table. + conv_distance = calculate_conversion_distance(letter_case, letter_id) + prev_conv_distance = calculate_conversion_distance(letter_case, prev_letter_id) - :param letter_id: An integer, representing the unicode code point of the character. - :return: True, if it contains the character, false otherwise. - """ - if letter_id in self._conversion_table: - return True + if conv_distance != prev_conv_distance: + in_range = False + continue - return False + if in_range: + range_lengths[range_position] += 1 + else: + in_range = True + range_position += 1 - def get_table(self): - return self._conversion_table + # Add the start point of the range and its mapped value + ranges.extend([prev_letter_id, ord(letter_case[prev_letter_id])]) + range_lengths.append(2) - def extract_ranges(self, other_case_mapping=None): - """ - Extract ranges from case mappings - (the second param is optional, if it's not empty, a range will contains bidirectional conversions only). + # Remove all ranges from the case mapping table. + for idx in range(0, len(ranges), 2): + range_length = range_lengths[idx // 2] - :param letter_id: An integer, representing the unicode code point of the character. - :param other_case_mapping: Comparable case mapping table which contains the return direction of the conversion. - :return: A table with the start points and their mapped value, and another table with the lengths of the ranges. - """ + for incr in range(range_length): + del letter_case[ranges[idx] + incr] + if reverse_letter_case is not None: + del reverse_letter_case[ranges[idx + 1] + incr] - in_range = False - range_position = -1 - ranges = [] - range_lengths = [] + return ranges, range_lengths - for letter_id in sorted(self._conversion_table.keys()): - prev_letter_id = letter_id - 1 - # One-way conversions - if other_case_mapping is None: - if len(self.get_value(letter_id)) > 1: - in_range = False - continue +def extract_character_pair_ranges(letter_case, reverse_letter_case): + """ + Extract two or more character pairs from the case mapping tables. - if not self.contains(prev_letter_id) or len(self.get_value(prev_letter_id)) > 1: - in_range = False - continue + :param letter_case: case mappings dictionary which contains the conversions. + :param reverse_letter_case: Comparable case mapping table which contains the return direction of the conversion. + :return: A table with the start points, and another table with the lengths of the ranges. + """ - # Two way conversions - else: - if not self.is_bidirectional_conversion(letter_id, other_case_mapping): - in_range = False - continue + start_points = [] + lengths = [] + in_range = False + element_counter = -1 - if not self.is_bidirectional_conversion(prev_letter_id, other_case_mapping): - in_range = False - continue + for letter_id in sorted(letter_case.keys()): + # Only extract character pairs + if not is_bidirectional_conversion(letter_id, letter_case, reverse_letter_case): + in_range = False + continue - conv_distance = self.get_conversion_distance(letter_id) - prev_conv_distance = self.get_conversion_distance(prev_letter_id) + if ord(letter_case[letter_id]) == letter_id + 1: + prev_letter_id = letter_id - 2 - if (conv_distance != prev_conv_distance): + if not is_bidirectional_conversion(prev_letter_id, letter_case, reverse_letter_case): in_range = False - continue if in_range: - range_lengths[range_position] += 1 + lengths[element_counter] += 2 else: + element_counter += 1 + start_points.append(letter_id) + lengths.append(2) in_range = True - range_position += 1 - - # Add the start point of the range and its mapped value - ranges.extend([prev_letter_id, ord(self.get_value(prev_letter_id))]) - range_lengths.append(2) - - # Remove all ranges from the case mapping table. - index = 0 - while index != len(ranges): - range_length = range_lengths[index // 2] - - for incr in range(range_length): - self.remove(ranges[index] + incr) - if other_case_mapping is not None: - other_case_mapping.remove(ranges[index + 1] + incr) - - index += 2 - - return ranges, range_lengths - - def extract_character_pair_ranges(self, other_case_mapping): - """ - Extract two or more character pairs from the case mapping tables. - - :param other_case_mapping: Comparable case mapping table which contains the return direction of the conversion. - :return: A table with the start points, and another table with the lengths of the ranges. - """ - - start_points = [] - lengths = [] - in_range = False - element_counter = -1 - for letter_id in sorted(self._conversion_table.keys()): - # Only extract character pairs - if not self.is_bidirectional_conversion(letter_id, other_case_mapping): - in_range = False - continue - - if self.get_value(letter_id) == unichr(letter_id + 1): - prev_letter_id = letter_id - 2 - - if not self.is_bidirectional_conversion(prev_letter_id, other_case_mapping): - in_range = False - - if in_range: - lengths[element_counter] += 2 - else: - element_counter += 1 - start_points.append(letter_id) - lengths.append(2) - in_range = True - - else: - in_range = False - - # Remove all founded case mapping from the conversion tables after the scanning method - idx = 0 - while idx != len(start_points): - letter_id = start_points[idx] - conv_length = lengths[idx] + else: + in_range = False - for incr in range(0, conv_length, 2): - self.remove(letter_id + incr) - other_case_mapping.remove(letter_id + 1 + incr) + # Remove all founded case mapping from the conversion tables after the scanning method + for idx in range(len(start_points)): + letter_id = start_points[idx] + conv_length = lengths[idx] - idx += 1 + for incr in range(0, conv_length, 2): + del letter_case[letter_id + incr] + del reverse_letter_case[letter_id + 1 + incr] - return start_points, lengths + return start_points, lengths - def extract_character_pairs(self, other_case_mapping): - """ - Extract character pairs. Check that two unicode value are also a mapping value of each other. - - :param other_case_mapping: Comparable case mapping table which contains the return direction of the conversion. - :return: A table with character pairs. - """ - character_pairs = [] +def extract_character_pairs(letter_case, reverse_letter_case): + """ + Extract character pairs. Check that two unicode value are also a mapping value of each other. - for letter_id in sorted(self._conversion_table.keys()): - if self.is_bidirectional_conversion(letter_id, other_case_mapping): - mapped_value = self.get_value(letter_id) - character_pairs.extend([letter_id, ord(mapped_value)]) + :param letter_case: case mappings dictionary which contains the conversions. + :param reverse_letter_case: Comparable case mapping table which contains the return direction of the conversion. + :return: A table with character pairs. + """ - # Remove character pairs from case mapping tables - self.remove(letter_id) - other_case_mapping.remove(ord(mapped_value)) + character_pairs = [] - return character_pairs + for letter_id in sorted(letter_case.keys()): + if is_bidirectional_conversion(letter_id, letter_case, reverse_letter_case): + mapped_value = letter_case[letter_id] + character_pairs.extend([letter_id, ord(mapped_value)]) - def extract_special_ranges(self): - """ - Extract special ranges. It contains that ranges of one-to-two mappings where the second character - of the mapped values are equals and the other characters are following each other. - eg.: \u1f80 and \u1f81 will be in one range becase their upper-case values are \u1f08\u0399 and \u1f09\u0399 + # Remove character pairs from case mapping tables + del letter_case[letter_id] + del reverse_letter_case[ord(mapped_value)] - :return: A table with the start points and their mapped values, and a table with the lengths of the ranges. - """ + return character_pairs - special_ranges = [] - special_range_lengths = [] - range_position = -1 +def extract_special_ranges(letter_case): + """ + Extract special ranges. It contains start points of one-to-two letter case ranges + where the second character is always the same. - for letter_id in sorted(self._conversion_table.keys()): - mapped_value = self.get_value(letter_id) + :param letter_case: case mappings dictionary which contains the conversions. - if len(mapped_value) != 2: - continue + :return: A table with the start points and their mapped values, and a table with the lengths of the ranges. + """ - prev_letter_id = letter_id - 1 + special_ranges = [] + special_range_lengths = [] - if not self.contains(prev_letter_id): - in_range = False - continue + range_position = -1 - prev_mapped_value = self.get_value(prev_letter_id) + for letter_id in sorted(letter_case.keys()): + mapped_value = letter_case[letter_id] - if len(prev_mapped_value) != 2: - continue + if len(mapped_value) != 2: + continue - if prev_mapped_value[1] != mapped_value[1]: - continue + prev_letter_id = letter_id - 1 - if (ord(prev_mapped_value[0]) - prev_letter_id) != (ord(mapped_value[0]) - letter_id): - in_range = False - continue + if prev_letter_id not in letter_case: + in_range = False + continue - if in_range: - special_range_lengths[range_position] += 1 - else: - range_position += 1 - in_range = True + prev_mapped_value = letter_case[prev_letter_id] - special_ranges.extend([prev_letter_id, ord(prev_mapped_value[0]), ord(prev_mapped_value[1])]) - special_range_lengths.append(1) + if len(prev_mapped_value) != 2: + continue - # Remove special ranges from the conversion table - idx = 0 - while idx != len(special_ranges): - range_length = special_range_lengths[idx // 3] - letter_id = special_ranges[idx] + if prev_mapped_value[1] != mapped_value[1]: + continue - for incr in range(range_length): - self.remove(special_ranges[idx] + incr) + if (ord(prev_mapped_value[0]) - prev_letter_id) != (ord(mapped_value[0]) - letter_id): + in_range = False + continue - idx += 3 + if in_range: + special_range_lengths[range_position] += 1 + else: + range_position += 1 + in_range = True - return special_ranges, special_range_lengths + special_ranges.extend([prev_letter_id, ord(prev_mapped_value[0]), ord(prev_mapped_value[1])]) + special_range_lengths.append(1) - def extract_conversions(self): - """ - Extract conversions. It provide the full (or remained) case mappings from the table. - The counter table contains the information of how much one-to-one, one-to-two or one-to-three mappings - exists successively in the conversion table. + # Remove special ranges from the conversion table + for idx in range(0, len(special_ranges), 3): + range_length = special_range_lengths[idx // 3] + letter_id = special_ranges[idx] - :return: A table with conversions, and a table with counters. - """ + for incr in range(range_length): + del letter_case[special_ranges[idx] + incr] - unicodes = [[], [], []] - unicode_lengths = [0, 0, 0] + return special_ranges, special_range_lengths - # 1 to 1 byte - for letter_id in sorted(self._conversion_table.keys()): - mapped_value = self.get_value(letter_id) - if len(mapped_value) != 1: - continue +def extract_conversions(letter_case): + """ + Extract conversions. It provide the full (or remained) case mappings from the table. + The counter table contains the information of how much one-to-one, one-to-two or one-to-three mappings + exists successively in the conversion table. - unicodes[0].extend([letter_id, ord(mapped_value)]) - self.remove(letter_id) + :return: A table with conversions, and a table with counters. + """ - # 1 to 2 bytes - for letter_id in sorted(self._conversion_table.keys()): - mapped_value = self.get_value(letter_id) + unicodes = [[], [], []] + unicode_lengths = [0, 0, 0] - if len(mapped_value) != 2: - continue + # 1 to 1 byte + for letter_id in sorted(letter_case.keys()): + mapped_value = letter_case[letter_id] - unicodes[1].extend([letter_id, ord(mapped_value[0]), ord(mapped_value[1])]) - self.remove(letter_id) + if len(mapped_value) != 1: + continue - # 1 to 3 bytes - for letter_id in sorted(self._conversion_table.keys()): - mapped_value = self.get_value(letter_id) + unicodes[0].extend([letter_id, ord(mapped_value)]) + del letter_case[letter_id] - if len(mapped_value) != 3: - continue + # 1 to 2 bytes + for letter_id in sorted(letter_case.keys()): + mapped_value = letter_case[letter_id] - unicodes[2].extend([letter_id, ord(mapped_value[0]), ord(mapped_value[1]), ord(mapped_value[2])]) - self.remove(letter_id) - - unicode_lengths = [int(len(unicodes[0]) / 2), int(len(unicodes[1]) / 3), int(len(unicodes[2]) / 4)] - - return list(itertools.chain.from_iterable(unicodes)), unicode_lengths - - -def regroup(l, n): - return [l[i:i+n] for i in range(0, len(l), n)] - - -def hex_format(ch): - if isinstance(ch, str): - ch = ord(ch) - - return "0x{:04x}".format(ch) - - -def format_code(code, indent): - lines = [] - # convert all characters to hex format - converted_code = map(hex_format, code) - # 10 hex number per line - for line in regroup(", ".join(converted_code), 10 * 8): - lines.append((' ' * indent) + line.strip()) - return "\n".join(lines) - - -def create_c_format_table(type_name, array_name, table, description=""): - return """{DESC} -static const {TYPE} jerry_{NAME}[] JERRY_CONST_DATA = -{{ -{TABLE} -}}; - -""".format(DESC=description, TYPE=type_name, NAME=array_name, TABLE=format_code(table, 1)) - - -def copy_tables_to_c_source(gen_tables, c_source): - data = [] - - header = """/* Copyright JS Foundation and other contributors, http://js.foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is automatically generated by the {SCRIPT} script. Do not edit! - */ - -""".format(SCRIPT=os.path.basename(__file__)) - - data.append(header) - - character_case_ranges = gen_tables.get_character_case_ranges() - character_pair_ranges = gen_tables.get_character_pair_ranges() - character_pairs = gen_tables.get_character_pairs() - upper_case_special_ranges = gen_tables.get_upper_case_special_ranges() - lower_case_ranges = gen_tables.get_lower_case_ranges() - lower_case_conversions = gen_tables.get_lower_case_conversions() - upper_case_conversions = gen_tables.get_upper_case_conversions() - - description = "/* Contains start points of character case ranges (these are bidirectional conversions). */" - data.append(create_c_format_table('uint16_t', 'character_case_ranges', - character_case_ranges[0], - description)) - - description = "/* Interval lengths of start points in `character_case_ranges` table. */" - data.append(create_c_format_table('uint8_t', - 'character_case_range_lengths', - character_case_ranges[1], - description)) - - description = "/* Contains the start points of bidirectional conversion ranges. */" - data.append(create_c_format_table('uint16_t', - 'character_pair_ranges', - character_pair_ranges[0], - description)) - - description = "/* Interval lengths of start points in `character_pair_ranges` table. */" - data.append(create_c_format_table('uint8_t', - 'character_pair_range_lengths', - character_pair_ranges[1], - description)) - - description = "/* Contains lower/upper case bidirectional conversion pairs. */" - data.append(create_c_format_table('uint16_t', - 'character_pairs', - character_pairs, - description)) - - description = """/* Contains start points of one-to-two uppercase ranges where the second character - * is always the same. - */""" - data.append(create_c_format_table('uint16_t', - 'upper_case_special_ranges', - upper_case_special_ranges[0], - description)) - - description = "/* Interval lengths for start points in `upper_case_special_ranges` table. */" - data.append(create_c_format_table('uint8_t', - 'upper_case_special_range_lengths', - upper_case_special_ranges[1], - description)) - - description = "/* Contains start points of lowercase ranges. */" - data.append(create_c_format_table('uint16_t', 'lower_case_ranges', lower_case_ranges[0], description)) - - description = "/* Interval lengths for start points in `lower_case_ranges` table. */" - data.append(create_c_format_table('uint8_t', 'lower_case_range_lengths', lower_case_ranges[1], description)) - - description = "/* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */" - data.append(create_c_format_table('uint16_t', - 'lower_case_conversions', - lower_case_conversions[0], - description)) - - description = "/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */" - - data.append(create_c_format_table('uint8_t', - 'lower_case_conversion_counters', - lower_case_conversions[1], - description)) - - description = "/* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */" - data.append(create_c_format_table('uint16_t', - 'upper_case_conversions', - upper_case_conversions[0], - description)) - - description = "/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */" - data.append(create_c_format_table('uint8_t', - 'upper_case_conversion_counters', - upper_case_conversions[1], - description)) - - with open(c_source, 'w') as genereted_source: - genereted_source.write(''.join(data)) - - -class GenTables(object): - """Class defines an informative, default generated tables.""" - - def __init__(self, lower_case_table, upper_case_table): - """ - Generate the extracted tables from the given case mapping tables. + if len(mapped_value) != 2: + continue - :param lower_case_table: Lower-case mappings. - :param upper_case_table: Upper-case mappings. - """ + unicodes[1].extend([letter_id, ord(mapped_value[0]), ord(mapped_value[1])]) + del letter_case[letter_id] - self._character_case_ranges = lower_case_table.extract_ranges(upper_case_table) - self._character_pair_ranges = lower_case_table.extract_character_pair_ranges(upper_case_table) - self._character_pairs = lower_case_table.extract_character_pairs(upper_case_table) - self._upper_case_special_ranges = upper_case_table.extract_special_ranges() - self._lower_case_ranges = lower_case_table.extract_ranges() - self._lower_case_conversions = lower_case_table.extract_conversions() - self._upper_case_conversions = upper_case_table.extract_conversions() + # 1 to 3 bytes + for letter_id in sorted(letter_case.keys()): + mapped_value = letter_case[letter_id] - if lower_case_table.get_table(): - warnings.warn('Not all elements extracted from the lowercase conversion table!') - if upper_case_table.get_table(): - warnings.warn('Not all elements extracted from the uppercase conversion table!') + if len(mapped_value) != 3: + continue - def get_character_case_ranges(self): - return self._character_case_ranges + unicodes[2].extend([letter_id, ord(mapped_value[0]), ord(mapped_value[1]), ord(mapped_value[2])]) + del letter_case[letter_id] - def get_character_pair_ranges(self): - return self._character_pair_ranges + unicode_lengths = [int(len(unicodes[0]) / 2), int(len(unicodes[1]) / 3), int(len(unicodes[2]) / 4)] - def get_character_pairs(self): - return self._character_pairs + return list(itertools.chain.from_iterable(unicodes)), unicode_lengths - def get_upper_case_special_ranges(self): - return self._upper_case_special_ranges - def get_lower_case_ranges(self): - return self._lower_case_ranges +def is_bidirectional_conversion(letter_id, letter_case, reverse_letter_case): + """ + Check that two unicode value are also a mapping value of each other. - def get_lower_case_conversions(self): - return self._lower_case_conversions + :param letter_id: An integer, representing the unicode code point of the character. + :param other_case_mapping: Comparable case mapping table which possible contains + the return direction of the conversion. + :return: True, if it's a reverible conversion, false otherwise. + """ - def get_upper_case_conversions(self): - return self._upper_case_conversions + if letter_id not in letter_case: + return False + # Check one-to-one mapping + mapped_value = letter_case[letter_id] + if len(mapped_value) > 1: + return False -def main(): - parser = argparse.ArgumentParser() + # Check two way conversions + mapped_value_id = ord(mapped_value) - parser.add_argument('--unicode-data', - metavar='FILE', - action='store', - required=True, - help='specify the unicode data file') + if mapped_value_id not in reverse_letter_case or len(reverse_letter_case[mapped_value_id]) > 1: + return False - parser.add_argument('--special-casing', - metavar='FILE', - action='store', - required=True, - help='specify the special casing file') + if ord(reverse_letter_case[mapped_value_id]) != letter_id: + return False - parser.add_argument('--c-source', - metavar='FILE', - action='store', - default=C_SOURCE_FILE, - help='specify the output c source (default: %(default)s)') + return True - script_args = parser.parse_args() - if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK): - print('The %s file is missing or not readable!' % script_args.unicode_data) - sys.exit(1) +def calculate_conversion_distance(letter_case, letter_id): + """ + Calculate the distance between the unicode character and its mapped value + (only needs and works with one-to-one mappings). - if not os.path.isfile(script_args.special_casing) or not os.access(script_args.special_casing, os.R_OK): - print('The %s file is missing or not readable!' % script_args.special_casing) - sys.exit(1) + :param letter_case: case mappings dictionary which contains the conversions. + :param letter_id: An integer, representing the unicode code point of the character. + :return: The conversion distance. + """ - lower_case_table, upper_case_table = read_case_mappings(script_args.unicode_data, script_args.special_casing) + if letter_id not in letter_case or len(letter_case[letter_id]) > 1: + return None - gen_tables = GenTables(lower_case_table, upper_case_table) + return ord(letter_case[letter_id]) - letter_id - copy_tables_to_c_source(gen_tables, script_args.c_source) if __name__ == "__main__": main() diff --git a/tools/unicode_ranges.py b/tools/unicode_ranges.py old mode 100644 new mode 100755 index 0b9f9f72b7..cfce04ab1a --- a/tools/unicode_ranges.py +++ b/tools/unicode_ranges.py @@ -26,152 +26,146 @@ # connector punctuation: Pc # separators: Zs +from __future__ import print_function +from settings import PROJECT_DIR +from unicode_c_source import Source + import argparse import bisect import csv import itertools import os +import sys -TOOLS_DIR = os.path.dirname(os.path.abspath(__file__)) -PROJECT_DIR = os.path.normpath(os.path.join(TOOLS_DIR, '..')) -C_SOURCE_FILE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h') - -parser = argparse.ArgumentParser() +RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h') -parser.add_argument('unicode_data', - metavar='FILE', - action='store', - help='specify the unicode data file') +def main(): + parser = argparse.ArgumentParser() -parser.add_argument('--c-source', - metavar='FILE', - action='store', - default=C_SOURCE_FILE, - help='specify the output c source (default: %(default)s)') + parser.add_argument('unicode_data', + metavar='FILE', + action='store', + help='specify the unicode data file') -script_args = parser.parse_args() + parser.add_argument('--c-source', + metavar='FILE', + action='store', + default=RANGES_C_SOURCE, + help='specify the output c source (default: %(default)s)') + script_args = parser.parse_args() -def main(): if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK): print('The %s file is missing or not readable!' % script_args.unicode_data) sys.exit(1) - letters, non_letters, separators = read_categories() - - letters_list = list(ranges(letters)) - letter_interval_sps, letter_interval_lengths, letter_chars = split_list(letters_list) - - non_letters_list = list(ranges(non_letters)) - non_letter_interval_sps, non_letter_interval_lengths, non_letter_chars = split_list(non_letters_list) - - separator_list = list(ranges(separators)) - separator_interval_sps, separator_interval_lengths, separator_chars = split_list(separator_list) - - source = GenSource() - - letter_interval_sps_desc = """/** - * Character interval starting points for the unicode letters. - * - * The characters covered by these intervals are from - * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl - */""" - source.add_table("uint16_t", - "unicode_letter_interval_sps", - letter_interval_sps, - letter_interval_sps_desc) - - letter_interval_lengths_desc = """/** - * Character lengths for the unicode letters. - * - * The characters covered by these intervals are from - * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl - */""" - source.add_table("uint8_t", - "unicode_letter_interval_lengths", - letter_interval_lengths, - letter_interval_lengths_desc) - - letter_chars_desc = """/** - * Those unicode letter characters that are not inside any of - * the intervals specified in jerry_unicode_letter_interval_sps array. - * - * The characters are from the following Unicode categories: - * Lu, Ll, Lt, Lm, Lo, Nl - */""" - source.add_table("uint16_t", - "unicode_letter_chars", - letter_chars, - letter_chars_desc) - - non_letter_interval_sps_desc = """/** - * Character interval starting points for non-letter character - * that can be used as a non-first character of an identifier. - * - * The characters covered by these intervals are from - * the following Unicode categories: Nd, Mn, Mc, Pc - */""" - source.add_table("uint16_t", - "unicode_non_letter_ident_part_interval_sps", - non_letter_interval_sps, - non_letter_interval_sps_desc) - - non_letter_interval_lengths_desc = """/** - * Character interval lengths for non-letter character - * that can be used as a non-first character of an identifier. - * - * The characters covered by these intervals are from - * the following Unicode categories: Nd, Mn, Mc, Pc - */""" - source.add_table("uint8_t", - "unicode_non_letter_ident_part_interval_lengths", - non_letter_interval_lengths, - non_letter_interval_lengths_desc) - - non_letter_chars_desc = """/** - * Those non-letter characters that can be used as a non-first - * character of an identifier and not included in any of the intervals - * specified in jerry_unicode_non_letter_ident_part_interval_sps array. - * - * The characters are from the following Unicode categories: - * Nd, Mn, Mc, Pc - */""" - source.add_table("uint16_t", - "unicode_non_letter_ident_part_chars", - non_letter_chars, - non_letter_chars_desc) - - separator_interval_sps_desc = """/** - * Unicode separator character interval starting points from Unicode category: Zs - */""" - source.add_table("uint16_t", - "unicode_separator_char_interval_sps", - separator_interval_sps, - separator_interval_sps_desc) - - separator_interval_lengths_desc = """/** - * Unicode separator character interval lengths from Unicode category: Zs - */""" - source.add_table("uint8_t", - "unicode_separator_char_interval_lengths", - separator_interval_lengths, - separator_interval_lengths_desc) - - separator_chars_desc = """/** - * Unicode separator characters that are not in the - * jerry_unicode_separator_char_intervals array. - * - * Unicode category: Zs - */""" - source.add_table("uint16_t", - "unicode_separator_chars", - separator_chars, - separator_chars_desc) - - source.write_source() - - -def read_categories(): + letters, non_letters, separators = read_categories(script_args.unicode_data) + + letter_tables = split_list(list(ranges(letters))) + non_letter_tables = split_list(list(ranges(non_letters))) + separator_tables = split_list(list(ranges(separators))) + + c_source = Source(script_args.c_source) + + header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__), + " * from %s. Do not edit! */" % os.path.basename(script_args.unicode_data), + ""] + + c_source.complete_header("\n".join(header_completion)) + + c_source.add_table(letter_tables[0], + "unicode_letter_interval_sps", + "uint16_t", + ("/**\n" + " * Character interval starting points for the unicode letters.\n" + " *\n" + " * The characters covered by these intervals are from\n" + " * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n" + " */")) + + c_source.add_table(letter_tables[1], + "unicode_letter_interval_lengths", + "uint8_t", + ("/**\n" + " * Character lengths for the unicode letters.\n" + " *\n" + " * The characters covered by these intervals are from\n" + " * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n" + " */")) + + c_source.add_table(letter_tables[2], + "unicode_letter_chars", + "uint16_t", + ("/**\n" + " * Those unicode letter characters that are not inside any of\n" + " * the intervals specified in jerry_unicode_letter_interval_sps array.\n" + " *\n" + " * The characters are from the following Unicode categories:\n" + " * Lu, Ll, Lt, Lm, Lo, Nl\n" + " */")) + + c_source.add_table(non_letter_tables[0], + "unicode_non_letter_ident_part_interval_sps", + "uint16_t", + ("/**\n" + " * Character interval starting points for non-letter character\n" + " * that can be used as a non-first character of an identifier.\n" + " *\n" + " * The characters covered by these intervals are from\n" + " * the following Unicode categories: Nd, Mn, Mc, Pc\n" + " */")) + + c_source.add_table(non_letter_tables[1], + "unicode_non_letter_ident_part_interval_lengths", + "uint8_t", + ("/**\n" + " * Character interval lengths for non-letter character\n" + " * that can be used as a non-first character of an identifier.\n" + " *\n" + " * The characters covered by these intervals are from\n" + " * the following Unicode categories: Nd, Mn, Mc, Pc\n" + " */")) + + c_source.add_table(non_letter_tables[2], + "unicode_non_letter_ident_part_chars", + "uint16_t", + ("/**\n" + " * Those non-letter characters that can be used as a non-first\n" + " * character of an identifier and not included in any of the intervals\n" + " * specified in jerry_unicode_non_letter_ident_part_interval_sps array.\n" + " *\n" + " * The characters are from the following Unicode categories:\n" + " * Nd, Mn, Mc, Pc\n" + " */")) + + c_source.add_table(separator_tables[0], + "unicode_separator_char_interval_sps", + "uint16_t", + ("/**\n" + " * Unicode separator character interval starting points from Unicode category: Zs\n" + " */")) + + c_source.add_table(separator_tables[1], + "unicode_separator_char_interval_lengths", + "uint8_t", + ("/**\n" + " * Unicode separator character interval lengths from Unicode category: Zs\n" + " */")) + + c_source.add_table(separator_tables[2], + "unicode_separator_chars", + "uint16_t", + ("/**\n" + " * Unicode separator characters that are not in the\n" + " * jerry_unicode_separator_char_intervals array.\n" + " *\n" + " * Unicode category: Zs\n" + " */")) + + c_source.generate() + + +def read_categories(unicode_data_file): """ Read the corresponding unicode values and store them in category lists. @@ -186,7 +180,7 @@ def read_categories(): non_letters = [] separators = [] - with open(script_args.unicode_data) as unicode_data: + with open(unicode_data_file) as unicode_data: unicode_data_reader = csv.reader(unicode_data, delimiter=';') for line in unicode_data_reader: @@ -228,10 +222,9 @@ def ranges(i): :return: List of ranges. """ - - for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x): - b = list(b) - yield b[0][1], b[-1][1] + for _, group in itertools.groupby(enumerate(i), lambda q: (q[1] - q[0])): + group = list(group) + yield group[0][1], group[-1][1] def split_list(category_list): @@ -241,87 +234,24 @@ def split_list(category_list): :return: List of interval starting points, interval lengths and single chars """ - unicode_category_interval_sps = [] - unicode_category_interval_lengths = [] - unicode_category_chars = [] + interval_sps = [] + interval_lengths = [] + chars = [] for element in category_list: interval_length = element[1] - element[0] if interval_length == 0: - unicode_category_chars.append(element[0]) - - elif (interval_length > 255): + chars.append(element[0]) + elif interval_length > 255: for i in range(element[0], element[1], 256): length = 255 if (element[1] - i > 255) else (element[1] - i) - unicode_category_interval_sps.append(i) - unicode_category_interval_lengths.append(length) + interval_sps.append(i) + interval_lengths.append(length) else: - unicode_category_interval_sps.append(element[0]) - unicode_category_interval_lengths.append(element[1] - element[0]) - - return unicode_category_interval_sps, unicode_category_interval_lengths, unicode_category_chars - - -class GenSource(object): - """Class defines a default generated c source.""" - - def __init__(self): - self._data = [] - - header = """/* Copyright JS Foundation and other contributors, http://js.foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is automatically generated by the {SCRIPT} script - * from {UNICODES}. Do not edit! - */ - -""".format(SCRIPT=os.path.basename(__file__), UNICODES=os.path.basename(script_args.unicode_data)) - - self._data.append(header) - - def _regroup(self, l, n): - return [l[i:i+n] for i in range(0, len(l), n)] - - def _hex_format(self, ch): - if isinstance(ch, str): - ch = ord(ch) - - return "0x{:04x}".format(ch) - - def _format_code(self, code, indent): - lines = [] - # convert all characters to hex format - converted_code = map(self._hex_format, code) - # 10 hex number per line - for line in self._regroup(", ".join(converted_code), 10 * 8): - lines.append((' ' * indent) + line.strip()) - return "\n".join(lines) - - def add_table(self, type_name, array_name, table, description=""): - table_str = """{DESC} -static const {TYPE} jerry_{NAME}[] JERRY_CONST_DATA = -{{ -{TABLE} -}}; - -""".format(DESC=description, TYPE=type_name, NAME=array_name, TABLE=self._format_code(table, 1)) - - self._data.append(table_str) - - def write_source(self): - with open(script_args.c_source, 'w') as genereted_source: - genereted_source.write(''.join(self._data)) + interval_sps.append(element[0]) + interval_lengths.append(element[1] - element[0]) + + return interval_sps, interval_lengths, chars if __name__ == "__main__":