# String in Python

In [None]:
word = "silencio"
word

## Unicode, encoding and UTF-8 

[doc (aka the gory detail)](https://docs.python.org/3/howto/unicode.html)  
[`ord()` doc](https://docs.python.org/3/library/functions.html#ord)  
[`chr()` doc](https://docs.python.org/3/library/functions.html#chr)  
[`bin()` doc](https://docs.python.org/3/library/functions.html#bin)  
[`int()` doc](https://docs.python.org/3/library/functions.html#int)

- `unicode`: worldwide standard assigning one **number** to one **character**
- encoding: the way you **implement** this in computers (how to organise the 0s and 1s, binary representation, so as to get the computer to handle text properly)
- `utf-8` (Unicode Transformation Format – 8-bit): one specific implementation, in this case using 8 bits for each character


In [None]:
from IPython.display import YouTubeVideo
YouTubeVideo("MijmeoH9LT4", width=853, height=480) #  Characters, Symbols and the Unicode Miracle - Computerphile 

In [None]:
print("the unicode point for 'a' is:", ord("a"))

In [None]:
print("the character for unicode point 97:", chr(97))

In [None]:
print("binary representation of 97 is:", bin(97))

In [None]:
print("converting from binary to integer (base 10):", int('10', 2)) # try '0', '1', '10', '11' ...
print("'converting' from base 10 to integer (also base 10):", int('10', 10)) 

In [None]:
print("converting bytes for 97 back to 97:", int(bin(97), 2))

In [None]:
print("converting bytes for 97 back to 'a':", chr(int(bin(97), 2)))

In [None]:
# to convert a string to binary,
# first 'encode' to bytes
byte_string = "a".encode("utf8") # try adding more letters
# then turn the bytes into binary
# (the '0b' only indicates this is a binary string)
# (see the `int()` doc for details)
list_of_binary_strings = [bin(byte) for byte in byte_string]
print(list_of_binary_strings)

In [None]:
# Chinese characters take more than one byte!
byte_string = "龙".encode("utf8")
list_of_binary_strings = [bin(byte) for byte in byte_string]
print(list_of_binary_strings)

## Indexing

In [None]:
# start included, end excluded
# so characters 0, 1, 2
word[0:3] # not like JavaScript

In [None]:
# with negative numbers, count from the end
word[-1] # last character

In [None]:
# take 'every n', here every 2
# from 0 to the end, every 2 chars
word[::2] # every second letter

In [None]:
# hidden text!
word_hidden = "5sgi7lfefnmcvicoo"
print(word_hidden)

In [None]:
print(word_hidden[1::2])

## Concatenation, splitting, joining

In [None]:
two_silencios = word + " " + word
print(two_silencios)

In [None]:
# beware! 1) the splitting char(s) disappear,
# 2) when splitting at the end, you still get an empty string ("after" the split)
silencios_split1 = two_silencios.split(" ")
print(silencios_split1)

In [None]:
silencios_split2 = two_silencios.split("i")
print(silencios_split2)

In [None]:
# how to reverse a split?
# use "separator".join(iterable)
print(silencios_split1)
print(" ".join(silencios_split1))

## String constants

[docs](https://docs.python.org/3/library/string.html#string-constants)

In [None]:
import string

In [None]:
string.punctuation # very useful!, can be used to remove all punctuation

## String Methods

[docs](https://docs.python.org/3/library/stdtypes.html#string-methods)

### Search, replace, translate

In [None]:
word.find("s")

In [None]:
word.replace("i", " ")

In [None]:
# translate expects
word.translate(
    {
        ord("i"): ord("1"),
        ord("e"): ord("3"),
        ord("c"): ord("<"),
        ord("o"): ord("0"),
    }
)

In [None]:
word.translate(
    str.maketrans("ieco", "13<0")
)

In [None]:
# maketrans can take a third argument:
# here we say: translate everything as is ("" to "")
# and the last argument is for *everything that needs to be removed*
# (equivalent to do `ord(","): None` for all punctuation characters)
print(str.maketrans("", "", string.punctuation))

In [None]:
# now, we can remove all punctuation from text
print("Hello there! How are you? Yes, you...".translate(
    str.maketrans("", "", string.punctuation)
))

### Working with case

In [None]:
word.capitalize()

In [None]:
word.upper()

In [None]:
word.upper().lower()

In [None]:
word.capitalize().swapcase()

## F-string (formatted strings)

[tutorial](https://docs.python.org/3/tutorial/inputoutput.html#fancier-output-formatting)  
[f-strings docs](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings)  
[`str.format()` doc](https://docs.python.org/3/library/stdtypes.html#str.format)

In [None]:
print(f"What is the word? {word}.") # note the leading f

In [None]:
print(f"What is the word? {word:<80}.") # left aligned, specify the width

In [None]:
print(f"What is the word? {word:>80}.") # right aligned

In [None]:
print(f"What is the word? {word:^80}.") # centered

In [None]:
width = 80
print(f"What is the word? {word:^{width}}.") # use a variable in the parameter!