In [2]:
import json
import re
from collections import Counter


class ValidationError(Exception):
    pass


class IPA_CHAR:
    """
    Class to interact with a JSON dictionary containing IPA characters and their hex codes & characters.
    """
    _data = None

    ranking_dictionary = {
        'AFFRICATE': 1, 'DIPHTONG': 1, 'CONSONANT': 1,
        'VOWEL': 1, 'DIACRITIC': 0, 'SUPRASEGMENTAL': 0, 'TONE-ACCENT': 0
    }

    @classmethod
    def load_data(cls, file_path):
        if cls._data is None:
            try:
                with open(file_path, 'r') as f:
                    cls._data = json.load(f)
            except FileNotFoundError:
                raise ValidationError(f"Error: File {file_path} not found.")
            except json.JSONDecodeError:
                raise ValidationError(f"Error: File {file_path} is not a valid JSON.")

    @classmethod
    def _char_data(cls, char):
        char = char.strip()
        if not char:
            raise ValidationError("Error: Input character is empty or whitespace only.")

        char_code = format(ord(char), '04x')
        for category, symbols in cls._data.items():
            for name, value in symbols.items():
                if value["code"] == char_code:
                    return category, name, value["code"]
        raise ValidationError(f"Error: Symbol '{char}' does not exist in the dictionary.")

    @classmethod
    def rank(cls, char):
        category, _, _ = cls._char_data(char)
        return cls.ranking_dictionary.get(category)

    @classmethod
    def name(cls, char):
        _, name, _ = cls._char_data(char)
        return name

    @classmethod
    def category(cls, char):
        category, _, _ = cls._char_data(char)
        return category

    @classmethod
    def code(cls, char):
        _, _, code = cls._char_data(char)
        return code

    @classmethod
    def is_valid_char(cls, char):
        try:
            cls._char_data(char)
            return True
        except ValidationError:
            return False
    
    
        
        
        
# You only load the data once, outside of the class methods. It populates the _data class variable.
IPA_CHAR.load_data('/Users/yanlashchev/Desktop/Secret_project/IPA_Table.json')

# After loading the data once, you can use the methods directly without instantiating the class.
print(IPA_CHAR.rank('a'))
print(IPA_CHAR.name('a'))
print(IPA_CHAR.category('a'))
print(IPA_CHAR.code('a'))


1
OPEN FRONT UNROUNDED VOWEL
VOWEL
0061


In [35]:
class CREATE_IPA:
    """
    Class to define and manage custom diphthongs and affricates.
    """
    
    def __init__(self):
        # Dictionaries to store custom diphthongs and affricates
        self.custom_diphthongs = {}
        self.custom_affricates = {}
    
    
    def add_diphthong(self, diphthong, name, rank):
        """
        Define a custom diphthong with its name and rank.
        """
        self.custom_diphthongs[diphthong] = {"name": name, "rank": rank}

    def add_affricate(self, affricate, name, rank):
        """
        Define a custom affricate with its name and rank.
        """
        self.custom_affricates[affricate] = {"name": name, "rank": rank}
    
    def remove_diphthong(self, diphthong):
        """
        Remove a custom diphthong definition.
        """
        if diphthong in self.custom_diphthongs:
            del self.custom_diphthongs[diphthong]

    def remove_affricate(self, affricate):
        """
        Remove a custom affricate definition.
        """
        if affricate in self.custom_affricates:
            del self.custom_affricates[affricate]

    def get_all_custom(self):
        """
        Return all custom definitions.
        """
        return {
            "DIPHTONGS": self.custom_diphthongs,
            "AFFRICATES": self.custom_affricates
        }

    def rank(self, segment):
        if segment in self.custom_diphthongs:
            return self.custom_diphthongs[segment]["rank"]
        elif segment in self.custom_affricates:
            return self.custom_affricates[segment]["rank"]
        else:
            return None

    def name(self, segment):
        if segment in self.custom_diphthongs:
            return self.custom_diphthongs[segment]["name"]
        elif segment in self.custom_affricates:
            return self.custom_affricates[segment]["name"]
        else:
            return None


In [33]:

class IPAString:
    
    custom_definitions = CREATE_IPA()
     
    def __init__(self, string="", handle_geminate=True):
        self.string = string.strip()
        self.handle_geminate = handle_geminate
        self._validate_string()

    def _validate_string(self):
        errors = [f"{idx+1}. Invalid character '{char}' at position {idx}"
                  for idx, char in enumerate(self.string) if not IPA_CHAR.is_valid_char(char)]

        if errors:
            formatted_errors = "\n".join(errors)
            error_message = f"\n{'-' * 17}\n{formatted_errors}\n{'-' * 17}"
            raise ValidationError(error_message)
        
    def _effective_representation(self):
        if not self.handle_geminate:
            return self.string

        def replace_geminate_if_consonant(match):
            char = match.group(0)[0]
            if IPA_CHAR.category(char) == "CONSONANT":
                return char
            return match.group(0)

        return re.sub(r'(\w)\1+', replace_geminate_if_consonant, self.string)

    def get_maximal_munch_matches(self, segment):
        matches = []
        i = 0
        while i < len(segment):
            found_match = False
            for j in range(len(segment), i, -1):
                if self.custom_definitions.name(segment[i:j]):
                    matches.append(segment[i:j])
                    i = j
                    found_match = True
                    break
            if not found_match:
                matches.append(segment[i])
                i += 1
        return matches

    def name(self, segment=None):
        if not segment:
            segment = self._effective_representation()
        matches = self.get_maximal_munch_matches(segment)
        return [self.custom_definitions.name(match) or IPA_CHAR.name(match) for match in matches]

    def rank(self, segment=None):
        if not segment:
            segment = self._effective_representation()
        matches = self.get_maximal_munch_matches(segment)
        return [self.custom_definitions.rank(match) or IPA_CHAR.rank(match) for match in matches]

    @property
    def unicode_string(self):
        return ''.join(['\\u{:04x}'.format(ord(char)) for char in self._effective_representation()])

    @property
    def syllables(self):
        effective_string = self._effective_representation()
        syllable_break = "."  # IPA symbol for syllable break
        return effective_string.split(syllable_break)

    @property
    def segment_type(self):
        effective_string = self._effective_representation()
        matches = self.get_maximal_munch_matches(effective_string)
        return [self.custom_definitions.category(match) or IPA_CHAR.category(match) for match in matches]

    @property
    def segment_count(self):
        effective_string = self._effective_representation()
        matches = self.get_maximal_munch_matches(effective_string)
        categories = [self.custom_definitions.category(match) or IPA_CHAR.category(match) for match in matches]
        count = Counter(categories)
        return {'vwl': count['VOWEL'], 'cnst': count['CONSONANT']}

    @property
    def stress(self):
        effective_string = self._effective_representation()
        return [("STRESSED" if self.is_stressed(syllable) else "UNSTRESSED") for syllable in effective_string.split('.')]

    def total_length(self, segment=None):
        if segment:
            self.string = segment
        effective_string = self._effective_representation()
        return sum([self.custom_definitions.rank(char) or IPA_CHAR.rank(char) for char in effective_string if IPA_CHAR.rank(char) is not None])

    def toggle_geminate_handling(self, value=None):
        """Toggle the geminate handling on or off. If a value is provided (True/False), set accordingly."""
        if value is None:
            self.handle_geminate = not self.handle_geminate
        else:
            self.handle_geminate = value
           

# Additional setup and utility code may be needed depending on how IPA_CHAR and CREATE_IPA are defined.

  






TypeError: IPAString.total_length() got an unexpected keyword argument 'geminate'

In [36]:
test = ['ap.ˈʃatʼ.ɬa.kʼj','ap.ˈʃatʼ.ɬa.kʼj','ap.ˈʃatʼ.ɬa.kʼj']

for word in test:
    print(word)
    word = IPAString()
    print(word.total_length('baie'))
    print(word.segment_count)
    print(word.segment_type)
    print(word.syllables)
    print(word.stress)
    print(word.unicode_string)
    print("______________________________")
    
    
    


ap.ˈʃatʼ.ɬa.kʼj
4


AttributeError: 'CREATE_IPA' object has no attribute 'category'

In [21]:
custom_IPA = CREATE_IPA()
custom_IPA.add_diphthong("ai", "diphthong", 1)
IPAString.custom_definitions = custom_IPA

word = IPAString()

word.total_length('ai')

2

In [38]:
# First, load the basic IPA data (assuming the necessary initial setup is done).
IPA_CHAR.load_data('/Users/yanlashchev/Desktop/Secret_project/IPA_Table.json')

# Create an instance of IPAString with geminate handling ON (default behavior)
word1 = IPAString("ba.bb.bcc")

# Displaying results
print("When handle_geminate is ON:")
print("Segment Names:", word1.name())  # It should print the segment names of the given string
print("Segment Ranks:", word1.rank())  # It should print the ranks for the segments
print("Total Length:", word1.total_length())
print("Segment Count:", word1.segment_count)
print("Segment Types:", word1.segment_type)
print("Syllables:", word1.syllables)
print("Stress:", word1.stress)
print("Unicode String:", word1.unicode_string)

# Toggle geminate handling OFF
word1.toggle_geminate_handling(False)

# Displaying results after toggling
print("\nWhen handle_geminate is OFF:")
print("Segment Names:", word1.name())  # It should print the segment names of the given string
print("Segment Ranks:", word1.rank())  # It should print the ranks for the segments
print("Total Length:", word1.total_length())
print("Segment Count:", word1.segment_count)
print("Segment Types:", word1.segment_type)
print("Syllables:", word1.syllables)
print("Stress:", word1.stress)
print("Unicode String:", word1.unicode_string)


When handle_geminate is ON:
Segment Names: ['VOICED BILABIAL PLOSIVE', 'OPEN FRONT UNROUNDED VOWEL', 'SYLLABLE BREAK', 'VOICED BILABIAL PLOSIVE', 'SYLLABLE BREAK', 'VOICED BILABIAL PLOSIVE', 'VOICELESS PALATAL PLOSIVE']
Segment Ranks: [1, 1, 0, 1, 0, 1, 1]
Total Length: 5


AttributeError: 'CREATE_IPA' object has no attribute 'category'