# bytes
- a bit has two possible values - on/off True/False 1/0, etc
- a 'byte' consists of 8 bits
- on most computers, memory and files hold a sequence of bytes
- networks transmit and receive bytes, or "byte streams"
- a byte stream may represent many different things
    - jpeg image
    - mp3 audio
    - mp4 video
    - Ascii text 
- file extensions like '.jpg', '.mp3' specify how to interpret
a byte stream
    

# Character encoding scheme
- is a mapping between integers and characters
    - Ascii
    - Unicode

# Ascii 
- each character represented by seven bits in a eight bit byte. eighth bit is not used 
    - there are 128 Ascii characters
        - first 32 are 'non-printing control characters'(NL, CR, TAB, ESC...)
        - rest are 'printing characters' like 'a', 'Z', '!'
- to see ascii encoding(like table below) on mac or linux, in a shell window do:
  - man ascii 

# Ascii character encoding in decimal

```
  0 nul   1 soh   2 stx   3 etx   4 eot   5 enq   6 ack   7 bel 
  8 bs    9 ht   10 nl   11 vt   12 np   13 cr   14 so   15 si  
 16 dle  17 dc1  18 dc2  19 dc3  20 dc4  21 nak  22 syn  23 etb 
 24 can  25 em   26 sub  27 esc  28 fs   29 gs   30 rs   31 us  
 32 sp   33 !    34 "    35 #    36 $    37 %    38 &    39 '   
 40 (    41 )    42 *    43 +    44 ,    45 -    46 .    47 /   
 48 0    49 1    50 2    51 3    52 4    53 5    54 6    55 7   
 56 8    57 9    58 :    59 ;    60 <    61 =    62 >    63 ?   
 64 @    65 A    66 B    67 C    68 D    69 E    70 F    71 G   
 72 H    73 I    74 J    75 K    76 L    77 M    78 N    79 O   
 80 P    81 Q    82 R    83 S    84 T    85 U    86 V    87 W   
 88 X    89 Y    90 Z    91 [    92 \    93 ]    94 ^    95 _   
 96 `    97 a    98 b    99 c   100 d   101 e   102 f   103 g   
104 h   105 i   106 j   107 k   108 l   109 m   110 n   111 o   
112 p   113 q   114 r   115 s   116 t   117 u   118 v   119 w   
120 x   121 y   122 z   123 {   124 |   125 }   126 ~   127 del 
```

# Python 'bytes' data type
- type 'bytes' stores a sequence of 8 bit bytes
- uses Ascii to input and display 'bytes' objects
- bytes are immutable, like strings
- [doc](https://docs.python.org/3.7/library/stdtypes.html#binaryseq)
- numpy can also store 8 bit bytes, but Python uses 'bytes' for certain system functions, like networking

In [1]:
# input like a string but 
# leading 'b' means bytes

b = b'foobar'

b, len(b), b[3], b[-1], type(b), type(b[0])

(b'foobar', 6, 98, 114, bytes, int)

In [2]:
# bytes objects holds 8 bit(one byte) unsigned ints, NOT characters

list(b), b[3], b[-1]

([102, 111, 111, 98, 97, 114], 98, 114)

In [3]:
# like 'str', a 'bytes' object is not mutable

b[3] = 33

TypeError: 'bytes' object does not support item assignment

In [4]:
# other constructors

bytes(range(65, 77)), bytes([65, 66,77])

(b'ABCDEFGHIJKL', b'ABM')

# hex numbers
- hex(hexadecimal) are base 16 numbers
- a hex digit represents 4 bits
- 2 hex digits represent 8 bits, 1 byte

In [5]:
[ [j, hex(j)] for j in range( 16)]

[[0, '0x0'],
 [1, '0x1'],
 [2, '0x2'],
 [3, '0x3'],
 [4, '0x4'],
 [5, '0x5'],
 [6, '0x6'],
 [7, '0x7'],
 [8, '0x8'],
 [9, '0x9'],
 [10, '0xa'],
 [11, '0xb'],
 [12, '0xc'],
 [13, '0xd'],
 [14, '0xe'],
 [15, '0xf']]

# Ascii character encoding in hex
```
x00 nul x01 soh x02 stx x03 etx x04 eot x05 enq x06 ack x07 bel 
x08 bs  x09 ht  x0a nl  x0b vt  x0c np  x0d cr  x0e so  x0f si  
x10 dle x11 dc1 x12 dc2 x13 dc3 x14 dc4 x15 nak x16 syn x17 etb 
x18 can x19 em  x1a sub x1b esc x1c fs  x1d gs  x1e rs  x1f us  
x20 sp  x21 !   x22 "   x23 #   x24 $   x25 %   x26 &   x27 '   
x28 (   x29 )   x2a *   x2b +   x2c ,   x2d -   x2e .   x2f /   
x30 0   x31 1   x32 2   x33 3   x34 4   x35 5   x36 6   x37 7   
x38 8   x39 9   x3a :   x3b ;   x3c <   x3d =   x3e >   x3f ?   
x40 @   x41 A   x42 B   x43 C   x44 D   x45 E   x46 F   x47 G   
x48 H   x49 I   x4a J   x4b K   x4c L   x4d M   x4e N   x4f O   
x50 P   x51 Q   x52 R   x53 S   x54 T   x55 U   x56 V   x57 W   
x58 X   x59 Y   x5a Z   x5b [   x5c \   x5d ]   x5e ^   x5f _   
x60 `   x61 a   x62 b   x63 c   x64 d   x65 e   x66 f   x67 g   
x68 h   x69 i   x6a j   x6b k   x6c l   x6d m   x6e n   x6f o   
x70 p   x71 q   x72 r   x73 s   x74 t   x75 u   x76 v   x77 w   
x78 x   x79 y   x7a z   x7b {   x7c |   x7d }   x7e ~   x7f del 
```

In [6]:
# can enter hex with \x escape

b'\x48\x65\x78'

b'Hex'

In [7]:
# int will take hex strings

4*16+14, int('4e', 16), 5*16+10, int('5a', 16)

(78, 78, 90, 90)

In [8]:
# 'bytes' have similar functionality 
# to the 'str' type we have been using

[a for a in dir(bytes) if not a.startswith('__')]

['capitalize',
 'center',
 'count',
 'decode',
 'endswith',
 'expandtabs',
 'find',
 'fromhex',
 'hex',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdigit',
 'islower',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']

# Python data type 'bytearray'
- mutable version of 'bytes'
- [doc](https://docs.python.org/3.7/library/stdtypes.html#binaryseq)


In [9]:
# can give it a list of ints,
# but this won't work, 256 is too big

bytearray([j for j in range(257)])

ValueError: byte must be in range(0, 256)

In [10]:
ba = bytearray(b)
ba, len(ba), ba[-1], type(ba), type(ba[0])

(bytearray(b'foobar'), 6, 114, bytearray, int)

In [11]:
# mutable

ba = bytearray()
for j in range(ord('a'), ord('z')+1):
    ba.append(j)
ba


bytearray(b'abcdefghijklmnopqrstuvwxyz')

In [12]:
# mutable

ba[0] = ord('F')
ba

bytearray(b'Fbcdefghijklmnopqrstuvwxyz')

In [13]:
# like bytes, stores ints, NOT characters

[ba[0], type(ba[0])]

[70, int]

In [14]:
# usual methods

[a for a in dir(bytearray) if not a.startswith('__')]

['append',
 'capitalize',
 'center',
 'clear',
 'copy',
 'count',
 'decode',
 'endswith',
 'expandtabs',
 'extend',
 'find',
 'fromhex',
 'hex',
 'index',
 'insert',
 'isalnum',
 'isalpha',
 'isascii',
 'isdigit',
 'islower',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'pop',
 'remove',
 'replace',
 'reverse',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']