# Unicode characters in python

In [1]:
s = "abcde"

In [2]:
# I want to know the unicode numbers for the characters in s

for letter in s:
    print(ord(letter))

97
98
99
100
101


In [3]:
ord("$")

36

In [4]:
ord("{")

123

In [5]:
ord("(")

40

In [6]:
ord("Z")

90

## Inserting unicode character into a string

### I. char method

In [7]:
# how can I insert the characters of other lang into my string?

# (1) -- using chr
chr(97)

'a'

In [9]:
chr(20013)  # chinese character

'‰∏≠'

In [10]:
chr(1513)  # hebrew character

'◊©'

In [11]:
s = f"{chr(1513)}{chr(1003)}{chr(1493)}{chr(1501)}"

s

'◊©œ´◊ï◊ù'

In [14]:
# a better and nicer ways to do it. But we have to use hexa-decimals
# hexadecimal numbers use digits 0-9, a-f

s = "abcde"

for letter in s:
    print(hex(ord(letter)))

0x61
0x62
0x63
0x64
0x65


### II. \x

In [15]:
print("\x61")

a


In [17]:
# \xHH == a Unicode character with two hex digits

print("\x61\x62\x63\x64\x65")

abcde


In [18]:
s = "◊©œ´◊ï◊ù"

for letter in s:
    print(hex(ord(letter)))

0x5e9
0x3eb
0x5d5
0x5dd


In [19]:
# but I print the hex digits

print("\x5e9\x3eb\x5d5\x5dd")

^9>b]5]d


### III. \u

In [21]:
# \x only works if the character has 2 digits, if the characters
# will fit into 4 hex digits, use \u

print("\u05e9\u03eb\u05d5\u05dd")

◊©œ´◊ï◊ù


In [23]:
# can I use \u with two-digit hex codes? YES!, put two leading zeros

print("\u0061\u0062\u0063\u0064\u0065")

abcde


In [24]:
s = "üî•"
ord(s)

128293

In [25]:
hex(ord(s))

'0x1f525'

### IV. \U

In [26]:
# if there is more than 4 hex digits, use \U with 8 hex-digits

print("I am on \U0001f525")

I am on üî•


### V. unicode names

In [27]:
# can use name instead of \x, \u and \U.
# Every unicode character has a name

import unicodedata

s = "abcde"

for letter in s:
    print(unicodedata.name(letter))

LATIN SMALL LETTER A
LATIN SMALL LETTER B
LATIN SMALL LETTER C
LATIN SMALL LETTER D
LATIN SMALL LETTER E


In [28]:
print("\N{LATIN SMALL LETTER A}")

a


In [29]:
s = "◊©œ´◊ï◊ù"

for letter in s:
    print(unicodedata.name(letter))

HEBREW LETTER SHIN
COPTIC SMALL LETTER GANGIA
HEBREW LETTER VAV
HEBREW LETTER FINAL MEM


In [30]:
print("\N{HEBREW LETTER SHIN}")

◊©


In [31]:
# chinese character

print("\N{CJK UNIFIED IDEOGRAPH-4E2D}")

‰∏≠


In [32]:
print("I am on \N{FIRE}!")

I am on üî•!
