In [21]:
import numpy as np

### Fixed bytes string
- each letter is coded in a single byte (so only ASCII letters fit here)
- The size of each element of the array is determined in advance, and string are truncated if needed

In [22]:
# Example 1: fixed-length string array
fixed_animals = np.array(['cat', 'dog', 'bird'], dtype='S3')
print(fixed_animals.dtype)  # |S3 meaning little endian S3 strings
print(fixed_animals)        # Note that some strings are truncated
print('element size: ', fixed_animals.itemsize)   # fixed-size-of-string x 1 (one byte per letter)
print('total array size: ', fixed_animals.nbytes)   # num-of-elements x max-element-size x 1 (one byte per letter)

|S3
[b'cat' b'dog' b'bir']
element size:  3
total array size:  9


In [23]:
# Example 2: fixed-length string array - but with different letters (Chinese)
try:
    fixed_animals = np.array(['cat', 'dog', 'bird', '你好世'], dtype='S3')
except UnicodeEncodeError:
    print('Cannot put this unicode into S3 encoded array')
    

Cannot put this unicode into S3 encoded array


### Fixed Unicode string
- each letter is coded in a 4 bits (UTF-32) 
- The size of each element of the array is determined in advance, and string are truncated if needed

In [24]:
# Example 3: fixed unicode strings (just ASCII letters)
letters = np.array(['ABC', 'DEFG'], dtype='U3')
print(letters.dtype)  # <U3 meaning little endian U3 strings
print(letters)        # Note that some strings are truncated
print('element size: ', letters.itemsize)   # 4-bytes-per-letter x 3-letters-in-each-item
print('total array size: ', letters.nbytes)   # item-size x num-of-elements

<U3
['ABC' 'DEF']
element size:  12
total array size:  24


In [25]:
# Example 4: fixed unicode strings - with Chinese characters and other icons
chinese_letters = np.array(['ABC', '你好世', '🌍🌎🌏'], dtype='U3') # These are 3 Chinese letters
print(letters.dtype)  # <U3 meaning little endian U3 strings
print(letters)        # Note that some strings are truncated
print('element size: ', letters.itemsize)   # 4-bytes-per-letter x 3-letters-in-each-item
print('total array size: ', letters.nbytes)   # item-size x num-of-elements

<U3
['ABC' 'DEF']
element size:  12
total array size:  24
