In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
def print_malayalam_charset():
    print("Character | Unicode | Hex")
    print("----------|----------|--------")
    
    # Malayalam Unicode range (0D00-0D7F)
    for code_point in range(0x0D00, 0x0D7F + 1):
        char = chr(code_point)
        print(f"{char:^9} | {code_point:^8} | U+{code_point:04X}")

# Generate the table
print_malayalam_charset()

Character | Unicode | Hex
----------|----------|--------
    ഀ     |   3328   | U+0D00
    ഁ     |   3329   | U+0D01
    ം     |   3330   | U+0D02
    ഃ     |   3331   | U+0D03
    ഄ     |   3332   | U+0D04
    അ     |   3333   | U+0D05
    ആ     |   3334   | U+0D06
    ഇ     |   3335   | U+0D07
    ഈ     |   3336   | U+0D08
    ഉ     |   3337   | U+0D09
    ഊ     |   3338   | U+0D0A
    ഋ     |   3339   | U+0D0B
    ഌ     |   3340   | U+0D0C
    ഍     |   3341   | U+0D0D
    എ     |   3342   | U+0D0E
    ഏ     |   3343   | U+0D0F
    ഐ     |   3344   | U+0D10
    ഑     |   3345   | U+0D11
    ഒ     |   3346   | U+0D12
    ഓ     |   3347   | U+0D13
    ഔ     |   3348   | U+0D14
    ക     |   3349   | U+0D15
    ഖ     |   3350   | U+0D16
    ഗ     |   3351   | U+0D17
    ഘ     |   3352   | U+0D18
    ങ     |   3353   | U+0D19
    ച     |   3354   | U+0D1A
    ഛ     |   3355   | U+0D1B
    ജ     |   3356   | U+0D1C
    ഝ     |   3357   | U+0D1D
    ഞ     |   3358   | U+0D1E
    ട     |  

In [2]:
def print_comprehensive_malayalam_charset():
    # Basic Malayalam block
    basic_range = range(0x0D00, 0x0D7F + 1)
    
    # Additional blocks that might contain Malayalam-related characters
    chillu_range = [0x0D7A, 0x0D7B, 0x0D7C, 0x0D7D, 0x0D7E, 0x0D7F]  # Chillu characters
    
    # Some common combinations (examples)
    combinations = [
        "ക്ക", "ങ്ങ", "ച്ച", "ഞ്ഞ", "ട്ട", "ണ്ണ", "ത്ത", "ന്ന", "പ്പ", "മ്മ", 
        "യ്യ", "ല്ല", "വ്വ", "ശ്ശ", "സ്സ", "ള്ള", "റ്റ"
    ]
    
    print("1. Basic Characters:")
    print("Character | Unicode | Hex | Name")
    print("-" * 50)
    for code_point in basic_range:
        char = chr(code_point)
        try:
            name = unicodedata.name(char)
        except ValueError:
            name = "N/A"
        if unicodedata.category(char)[0] != 'C':  # Skip control characters
            print(f"{char:^9} | {code_point:^8} | U+{code_point:04X} | {name}")
    
    print("\n2. Common Combined Characters (Samyuktaksharangal):")
    print("Character | Components")
    print("-" * 30)
    for combo in combinations:
        components = [f"U+{ord(c):04X}" for c in combo]
        print(f"{combo:^9} | {' + '.join(components)}")

import unicodedata
print_comprehensive_malayalam_charset()

1. Basic Characters:
Character | Unicode | Hex | Name
--------------------------------------------------
    ഀ     |   3328   | U+0D00 | MALAYALAM SIGN COMBINING ANUSVARA ABOVE
    ഁ     |   3329   | U+0D01 | MALAYALAM SIGN CANDRABINDU
    ം     |   3330   | U+0D02 | MALAYALAM SIGN ANUSVARA
    ഃ     |   3331   | U+0D03 | MALAYALAM SIGN VISARGA
    ഄ     |   3332   | U+0D04 | MALAYALAM LETTER VEDIC ANUSVARA
    അ     |   3333   | U+0D05 | MALAYALAM LETTER A
    ആ     |   3334   | U+0D06 | MALAYALAM LETTER AA
    ഇ     |   3335   | U+0D07 | MALAYALAM LETTER I
    ഈ     |   3336   | U+0D08 | MALAYALAM LETTER II
    ഉ     |   3337   | U+0D09 | MALAYALAM LETTER U
    ഊ     |   3338   | U+0D0A | MALAYALAM LETTER UU
    ഋ     |   3339   | U+0D0B | MALAYALAM LETTER VOCALIC R
    ഌ     |   3340   | U+0D0C | MALAYALAM LETTER VOCALIC L
    എ     |   3342   | U+0D0E | MALAYALAM LETTER E
    ഏ     |   3343   | U+0D0F | MALAYALAM LETTER EE
    ഐ     |   3344   | U+0D10 | MALAYALAM LETTER AI
    ഒ  

In [4]:
import unicodedata

def find_malayalam_characters():
    malayalam_chars = []
    
    # Search through a large range of Unicode
    # Going beyond just the Malayalam block to catch any related characters
    for code_point in range(0x0000, 0x10000):
        try:
            char = chr(code_point)
            name = unicodedata.name(char)
            if 'MALAYALAM' in name:
                malayalam_chars.append((char, code_point, name))
        except ValueError:
            continue
    
    # Print results in a nice table
    print("Character | Unicode | Hex    | Name")
    print("-" * 80)
    for char, code_point, name in malayalam_chars:
        print(f"{char:^9} | {code_point:^8} | U+{code_point:04X} | {name}")
    
    print(f"\nTotal characters found: {len(malayalam_chars)}")

find_malayalam_characters()

Character | Unicode | Hex    | Name
--------------------------------------------------------------------------------
    ࡠ     |   2144   | U+0860 | SYRIAC LETTER MALAYALAM NGA
    ࡡ     |   2145   | U+0861 | SYRIAC LETTER MALAYALAM JA
    ࡢ     |   2146   | U+0862 | SYRIAC LETTER MALAYALAM NYA
    ࡣ     |   2147   | U+0863 | SYRIAC LETTER MALAYALAM TTA
    ࡤ     |   2148   | U+0864 | SYRIAC LETTER MALAYALAM NNA
    ࡥ     |   2149   | U+0865 | SYRIAC LETTER MALAYALAM NNNA
    ࡦ     |   2150   | U+0866 | SYRIAC LETTER MALAYALAM BHA
    ࡧ     |   2151   | U+0867 | SYRIAC LETTER MALAYALAM RA
    ࡨ     |   2152   | U+0868 | SYRIAC LETTER MALAYALAM LLA
    ࡩ     |   2153   | U+0869 | SYRIAC LETTER MALAYALAM LLLA
    ࡪ     |   2154   | U+086A | SYRIAC LETTER MALAYALAM SSA
    ഀ     |   3328   | U+0D00 | MALAYALAM SIGN COMBINING ANUSVARA ABOVE
    ഁ     |   3329   | U+0D01 | MALAYALAM SIGN CANDRABINDU
    ം     |   3330   | U+0D02 | MALAYALAM SIGN ANUSVARA
    ഃ     |   3331   | U+0D03 | MALA

In [5]:
import unicodedata
import pandas as pd

def create_malayalam_charset_csv():
    # List to store character data
    malayalam_chars = []
    
    # Find all characters with 'MALAYALAM' in their Unicode name
    for code_point in range(0x0000, 0x10000):
        try:
            char = chr(code_point)
            name = unicodedata.name(char)
            if 'MALAYALAM' in name:
                category = unicodedata.category(char)
                hex_code = f"U+{code_point:04X}"
                malayalam_chars.append({
                    'Character': char,
                    'Unicode_Decimal': code_point,
                    'Unicode_Hex': hex_code,
                    'Name': name,
                    'Category': category
                })
        except ValueError:
            continue
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(malayalam_chars)
    
    # Save with UTF-8 encoding to properly handle Malayalam characters
    df.to_csv('malayalam_unicode_chars.csv', index=False, encoding='utf-8')
    
    print(f"Total characters saved: {len(malayalam_chars)}")
    print("Data saved to 'malayalam_unicode_chars.csv'")
    
    # Display first few rows
    return df

# Create the CSV and show the data
df = create_malayalam_charset_csv()
df

Total characters saved: 129
Data saved to 'malayalam_unicode_chars.csv'


Unnamed: 0,Character,Unicode_Decimal,Unicode_Hex,Name,Category
0,ࡠ,2144,U+0860,SYRIAC LETTER MALAYALAM NGA,Lo
1,ࡡ,2145,U+0861,SYRIAC LETTER MALAYALAM JA,Lo
2,ࡢ,2146,U+0862,SYRIAC LETTER MALAYALAM NYA,Lo
3,ࡣ,2147,U+0863,SYRIAC LETTER MALAYALAM TTA,Lo
4,ࡤ,2148,U+0864,SYRIAC LETTER MALAYALAM NNA,Lo
...,...,...,...,...,...
124,ൻ,3451,U+0D7B,MALAYALAM LETTER CHILLU N,Lo
125,ർ,3452,U+0D7C,MALAYALAM LETTER CHILLU RR,Lo
126,ൽ,3453,U+0D7D,MALAYALAM LETTER CHILLU L,Lo
127,ൾ,3454,U+0D7E,MALAYALAM LETTER CHILLU LL,Lo
