In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
def print_malayalam_charset():
    print("Character | Unicode | Hex")
    print("----------|----------|--------")
    
    # Malayalam Unicode range (0D00-0D7F)
    for code_point in range(0x0D00, 0x0D7F + 1):
        char = chr(code_point)
        print(f"{char:^9} | {code_point:^8} | U+{code_point:04X}")

# Generate the table
print_malayalam_charset()

In [None]:
def print_comprehensive_malayalam_charset():
    # Basic Malayalam block
    basic_range = range(0x0D00, 0x0D7F + 1)
    
    # Additional blocks that might contain Malayalam-related characters
    chillu_range = [0x0D7A, 0x0D7B, 0x0D7C, 0x0D7D, 0x0D7E, 0x0D7F]  # Chillu characters
    
    # Some common combinations (examples)
    combinations = [
        "ക്ക", "ങ്ങ", "ച്ച", "ഞ്ഞ", "ട്ട", "ണ്ണ", "ത്ത", "ന്ന", "പ്പ", "മ്മ", 
        "യ്യ", "ല്ല", "വ്വ", "ശ്ശ", "സ്സ", "ള്ള", "റ്റ"
    ]
    
    print("1. Basic Characters:")
    print("Character | Unicode | Hex | Name")
    print("-" * 50)
    for code_point in basic_range:
        char = chr(code_point)
        try:
            name = unicodedata.name(char)
        except ValueError:
            name = "N/A"
        if unicodedata.category(char)[0] != 'C':  # Skip control characters
            print(f"{char:^9} | {code_point:^8} | U+{code_point:04X} | {name}")
    
    print("\n2. Common Combined Characters (Samyuktaksharangal):")
    print("Character | Components")
    print("-" * 30)
    for combo in combinations:
        components = [f"U+{ord(c):04X}" for c in combo]
        print(f"{combo:^9} | {' + '.join(components)}")

import unicodedata
print_comprehensive_malayalam_charset()

In [None]:
import unicodedata

def find_malayalam_characters():
    malayalam_chars = []
    
    # Search through a large range of Unicode
    # Going beyond just the Malayalam block to catch any related characters
    for code_point in range(0x0000, 0x10000):
        try:
            char = chr(code_point)
            name = unicodedata.name(char)
            if 'MALAYALAM' in name:
                malayalam_chars.append((char, code_point, name))
        except ValueError:
            continue
    
    # Print results in a nice table
    print("Character | Unicode | Hex    | Name")
    print("-" * 80)
    for char, code_point, name in malayalam_chars:
        print(f"{char:^9} | {code_point:^8} | U+{code_point:04X} | {name}")
    
    print(f"\nTotal characters found: {len(malayalam_chars)}")

find_malayalam_characters()

In [None]:
import unicodedata
import pandas as pd

def create_malayalam_charset_csv():
    # List to store character data
    malayalam_chars = []
    
    # Find all characters with 'MALAYALAM' in their Unicode name
    for code_point in range(0x0000, 0x10000):
        try:
            char = chr(code_point)
            name = unicodedata.name(char)
            if 'MALAYALAM' in name:
                category = unicodedata.category(char)
                hex_code = f"U+{code_point:04X}"
                malayalam_chars.append({
                    'Character': char,
                    'Unicode_Decimal': code_point,
                    'Unicode_Hex': hex_code,
                    'Name': name,
                    'Category': category
                })
        except ValueError:
            continue
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(malayalam_chars)
    
    # Save with UTF-8 encoding to properly handle Malayalam characters
    df.to_csv('malayalam_unicode_chars.csv', index=False, encoding='utf-8')
    
    print(f"Total characters saved: {len(malayalam_chars)}")
    print("Data saved to 'malayalam_unicode_chars.csv'")
    
    # Display first few rows
    return df

# Create the CSV and show the data
df = create_malayalam_charset_csv()
df