# Builtin Python conversions between unicode char string, bytes, hex and binary string and integer formats 

I find myself often needing to convert strings from hex to binary, or perform bitwise operations such as XOR. 
I saw a lot of different methods on the web, many of which required custom coding or external libraries. I wanted to gather the cleanest methods in one place.

All the methods here rely only on Python builtins. I prefer clean, succinct and explicit solutions, even if they may take a few more nanoseconds per call (which they often  do not). I use Python 3.6+ f-strings liberally, but you could easily use the format() builtin if you prefer.

If anyone has an easier way to convert between char and binary string representations I'd love to hear.

It may seem strange to convert a unicode char string into an integer. I do this because Python's bitwise operators are defined for numeric types but undefined for other types. See the end of the notebook for how to XOR two integers of unequal length.

## Setup

In [1]:
# string
S = u'¡Hola Mundo!'
# bytes
BY = b'\xc2\xa1Hola Mundo!'
# bytearray
BA = bytearray(b'\xc2\xa1Hola Mundo!')
# hex string
H = u'c2a1486f6c61204d756e646f21'
# decimal string
D = u'15420178183189456939991737593633'
# binary string
B = u'11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001'
# int
I = 15420178183189456939991737593633

## character string conversions

In [2]:
by = bytes(S, encoding='utf-8')
print(repr((S, by)))
assert by == BY

('¡Hola Mundo!', b'\xc2\xa1Hola Mundo!')


In [3]:
ba = bytearray(S, encoding='utf-8')
print(repr((S, ba)))
assert ba == BA

('¡Hola Mundo!', bytearray(b'\xc2\xa1Hola Mundo!'))


In [4]:
h = bytes(S, encoding='utf-8').hex()
print(repr((S, h)))
assert h == H

('¡Hola Mundo!', 'c2a1486f6c61204d756e646f21')


In [5]:
d = f'{int(bytes(S, encoding="utf-8").hex(), base=16):d}'
print(repr((S, d)))
assert d == D

('¡Hola Mundo!', '15420178183189456939991737593633')


In [6]:
# charstr to binstr is very indirect: charstr->bytes->hexstr->int->binstr
# any better way?
b = f'{int(bytes(S, encoding="utf-8").hex(), base=16):b}'
print(repr((S, b)))
assert b == B

('¡Hola Mundo!', '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [7]:
i = int(bytes(S, encoding='utf-8').hex(), base=16)
print(repr((S, i)))
assert i == I

('¡Hola Mundo!', 15420178183189456939991737593633)


## bytes conversions

In [8]:
s = BY.decode()
print(repr((BY, s)))
assert s == S

(b'\xc2\xa1Hola Mundo!', '¡Hola Mundo!')


In [9]:
ba = bytearray(BY)
print(repr((BY, ba)))
assert ba == BA

(b'\xc2\xa1Hola Mundo!', bytearray(b'\xc2\xa1Hola Mundo!'))


In [10]:
# uses builtin hex() rather than binascii.hexlify()
h = BY.hex()
print(repr((BY, h)))
assert h == H

(b'\xc2\xa1Hola Mundo!', 'c2a1486f6c61204d756e646f21')


In [11]:
b = f'{int(BY.hex(), base=16):b}'
print(repr((BY, b)))
assert b == B

(b'\xc2\xa1Hola Mundo!', '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [12]:
d = f'{int(BY.hex(), base=16):d}'
print(repr((BY, d)))
assert d == D

(b'\xc2\xa1Hola Mundo!', '15420178183189456939991737593633')


In [13]:
i = int(BY.hex(), base=16)
print(repr((BY, i)))
assert i == I

(b'\xc2\xa1Hola Mundo!', 15420178183189456939991737593633)


## bytearray conversions

In [14]:
s = BA.decode()
print(repr((BA, s)))
assert s == S

(bytearray(b'\xc2\xa1Hola Mundo!'), '¡Hola Mundo!')


In [15]:
by = bytes(BA)
print(repr((BA, by)))
assert by == BY

(bytearray(b'\xc2\xa1Hola Mundo!'), b'\xc2\xa1Hola Mundo!')


In [16]:
# uses builtin hex() rather than binascii.hexlify()
h = BA.hex()
print(repr((BA, h)))
assert h == H

(bytearray(b'\xc2\xa1Hola Mundo!'), 'c2a1486f6c61204d756e646f21')


In [17]:
b = f'{int(BA.hex(), base=16):b}'
print(repr((BA, b)))
assert b == B

(bytearray(b'\xc2\xa1Hola Mundo!'), '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [18]:
d = f'{int(BA.hex(), base=16):d}'
print(repr((BA, d)))
assert d == D

(bytearray(b'\xc2\xa1Hola Mundo!'), '15420178183189456939991737593633')


In [19]:
i = int(BA.hex(), base=16)
print(repr((BA, i)))
assert i == I

(bytearray(b'\xc2\xa1Hola Mundo!'), 15420178183189456939991737593633)


## hexstr conversions

In [20]:
s = bytes.fromhex(H).decode()
print(repr((H, s)))
assert s == S

('c2a1486f6c61204d756e646f21', '¡Hola Mundo!')


In [21]:
by = bytes.fromhex(H)
print(repr((H, by)))
assert by == BY

('c2a1486f6c61204d756e646f21', b'\xc2\xa1Hola Mundo!')


In [22]:
ba = bytearray.fromhex(H)
print(repr((H, ba)))
assert ba == BA

('c2a1486f6c61204d756e646f21', bytearray(b'\xc2\xa1Hola Mundo!'))


In [23]:
b = f'{int(H, base=16):b}'
print(repr((H, b)))
assert b == B

('c2a1486f6c61204d756e646f21', '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [24]:
d = f'{int(H, base=16):d}'
print(repr((H, d)))
assert d == D

('c2a1486f6c61204d756e646f21', '15420178183189456939991737593633')


In [25]:
i = int(H, base=16)
print(repr((H, i)))
assert i == I

('c2a1486f6c61204d756e646f21', 15420178183189456939991737593633)


## binstr conversions

In [26]:
# binstr->int->hexstr->bytes->charstr
# any better way?
s = bytes.fromhex(f'{int(B, base=2):x}').decode()
print(repr((B, s)))
assert s == S

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', '¡Hola Mundo!')


In [27]:
by = bytes.fromhex(f'{int(B, base=2):x}')
print(repr((B, by)))
assert by == BY

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', b'\xc2\xa1Hola Mundo!')


In [28]:
ba =  bytearray.fromhex(f'{int(B, base=2):x}')
print(repr((B, ba)))
assert ba == BA

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', bytearray(b'\xc2\xa1Hola Mundo!'))


In [29]:
h = f'{int(B, base=2):x}'
print(repr((B, h)))
assert h == H

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', 'c2a1486f6c61204d756e646f21')


In [30]:
d = f'{int(B, base=2):d}'
print(repr((B, d)))
assert d == D

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', '15420178183189456939991737593633')


In [31]:
i = int(B, base=2)
print(repr((B, i)))
assert i == I

('11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001', 15420178183189456939991737593633)


## decstr conversions

In [32]:
# decstr->int->hexstr->bytes->charstr
# any better way?
s = bytes.fromhex(f'{int(D, base=10):x}').decode()
print(repr((D, s)))
assert s == S

('15420178183189456939991737593633', '¡Hola Mundo!')


In [33]:
by = bytes.fromhex(f'{int(D, base=10):x}')
print(repr((D, by)))
assert by == BY

('15420178183189456939991737593633', b'\xc2\xa1Hola Mundo!')


In [34]:
ba = bytearray.fromhex(f'{int(D, base=10):x}')
print(repr((D, ba)))
assert ba == BA

('15420178183189456939991737593633', bytearray(b'\xc2\xa1Hola Mundo!'))


In [35]:
h = f'{int(D, base=10):x}'
print(repr((D, h)))
assert h == H

('15420178183189456939991737593633', 'c2a1486f6c61204d756e646f21')


In [36]:
b = f'{int(D, base=10):b}'
print(repr((D, b)))
assert b == B

('15420178183189456939991737593633', '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [37]:
i = int(D, base=10)
print(repr((D, i)))
assert i == I

('15420178183189456939991737593633', 15420178183189456939991737593633)


## int conversions

In [38]:
# int->hexstr->bytes->charstr
s = bytes.fromhex(f'{I:x}').decode()
print(repr((I, s)))
assert s == S

(15420178183189456939991737593633, '¡Hola Mundo!')


In [39]:
by = bytes.fromhex(f'{I:x}')
# by = I.to_bytes((I.bit_length()+7)//8, 'big') # also works but more complex
print(repr((I, by)))
assert by == BY

(15420178183189456939991737593633, b'\xc2\xa1Hola Mundo!')


In [40]:
ba = bytearray.fromhex(f'{I:x}')
# ba = bytearray(I.to_bytes((I.bit_length()+7)//8, 'big'))
print(repr((I, ba)))
assert ba == BA

(15420178183189456939991737593633, bytearray(b'\xc2\xa1Hola Mundo!'))


In [41]:
h = f'{I:x}'
print(repr((I, h)))
assert h == H

(15420178183189456939991737593633, 'c2a1486f6c61204d756e646f21')


In [42]:
b = f'{I:b}'
print(repr((I, b)))
assert b == B

(15420178183189456939991737593633, '11000010101000010100100001101111011011000110000100100000010011010111010101101110011001000110111100100001')


In [43]:
d = f'{I:d}' # also s = str(i)
print(repr((I, d)))
assert d == D

(15420178183189456939991737593633, '15420178183189456939991737593633')


# XOR of two integers of unequal length
Useful for crypto

The Python XOR operation on integers logically right-aligns them first. Typically we want to left-align them instead, so first we need to left-shift the shorter one to make it the same length as the longer one

In [48]:
def xor(i, j):
    d = i.bit_length() - j.bit_length()
    return (i << d) ^ j if d < 0 else i ^ (j << d)

print(repr(xor(I, I)))
assert xor(I, I) == 0

i1 = int('11001001', base=2)
i2 = int('1010', base=2)
r = xor(i1, i2)
print(f'{r:08b}')

0
01101001
