62. Unicode - Coding

In [11]:
# Basics
print(ord('A'))
print('α', '->', ord('α'), hex(ord('α')), sep=' ')
print('🐍', '->', ord('🐍'), hex(ord('🐍')), sep=' ')

65
α -> 945 0x3b1
🐍 -> 128013 0x1f40d


In [12]:
# Revert hexademical to decimal
print(int('0x1f40d', 16))

128013


In [13]:
# We can use some unicode symbols in the variable names - those, which are letters or numbers

α = 'alpha'
print(α)

alpha


In [14]:
# But we can not use all Unicode symbols. Snake for example can not be used

🐍 = 'snake"

SyntaxError: invalid character '🐍' (U+1F40D) (4218023384.py, line 3)

In [20]:
# Using Unicode characters which are not on the keyboard

# By name
hand = '\N{Raised Hand}'
print(hand)

# By numeric code
# lower case u - 4 characters
# capital U -> 8 characters
foot = '\U0001F9B6'
print(foot)

✋
🦶


64. Common String Methods - Coding

In [24]:
# Basics

message = 'Never give up and make the next step'

# Convert to uppercase
print(message.upper())

# Convert to lowercase
print(message.lower())

# Convert to title
print(message.title())


NEVER GIVE UP AND MAKE THE NEXT STEP
never give up and make the next step
Never Give Up And Make The Next Step


In [30]:
# Casefolding

street = 'stra\N{LATIN SMALL LETTER SHARP S}e'
print(street)

# Uppercasing street
street_upper = street.upper()
print(street_upper)

#comparing with lower will return False, but casefolding - Tuue
print(street_upper.lower(), street.lower(), street_upper.lower() == street.lower(), street_upper.casefold() == street.casefold())

straße
STRASSE
strasse straße False True


In [34]:
# Stripping

name1 = 'Henry '
print("'" + name1 + "'", "'" + name1.rstrip(' ') + "'")

'Henry ' 'Henry'


In [36]:
# Stripping both sides

name2 = '\t \tAAA\t \t'
print("'" + name2 + "'", "'" + name2.strip() + "'")

name3 = 'aaabbCCCCCababba'
print(name3.strip('ab'))

'	 	AAA	 	' 'AAA'
CCCCC


In [40]:
# Splitting a string on a separator

field = '1, Sofia, 1000'

field_in_list = field.split(',')
print(field_in_list)

# Unpacking
id, city, value = field.split(',')
print(id, city, value)

id = id.strip()
city = city.strip()
value = value.strip()
print(id, city, value)

# join back
fixed_list = [id, city, value]
field2 = ',---'.join(fixed_list)
print(field2)

['1', ' Sofia', ' 1000']
1  Sofia  1000
1 Sofia 1000
1,---Sofia,---1000


In [42]:
# Join on string

a = '*'.join('+' * 20)
print(a)

a = '  '.join(a)
print(a)

+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
+  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +  *  +


In [47]:
# Containment test

b = 'aBdecsecdefeddses'
print('cde' in b)
print('bdec' in b)
print('bdec'.casefold() in b.casefold())

True
False
True


In [48]:
# In works with any sequence

print(1 in [1, 2, 3])

True


In [50]:
# Some specific containment methods

print('Bulgaria'.startswith('Bul'))
print('Plovdiv'.casefold().endswith('DIV'.casefold()))

True
True


In [52]:
# Find the position of a substring

# Choice 1: index

c = "If you've nothing nice to say, say nothing."
print(c.index('nothing'))
# it returns the index of the first match


10


In [53]:
# Getting help

?str.index

[0;31mDocstring:[0m
S.index(sub[, start[, end]]) -> int

Return the lowest index in S where substring sub is found,
such that sub is contained within S[start:end].  Optional
arguments start and end are interpreted as in slice notation.

Raises ValueError when the substring is not found.
[0;31mType:[0m      method_descriptor

In [60]:
from timeit import timeit

c1 = timeit("'say' in c", globals=globals(), number = 100_000_000)
c2 = timeit("c.index('say')", globals=globals(), number = 100_000_000)
c3 = timeit("c.find('say')", globals=globals(), number = 100_000_000)

print(c1, c2, c3)

1.855871011968702 7.792377354984637 6.266531792993192


66. String Interpolation - Coding

In [63]:
# Basics

width, high, dept = 70, 30, 10

# Positioned placeholders - it is important to place the values in the same order
print('The object is {} width, {} high, {} in dept'.format(width, high, dept))

# Named placeholders - no need of specific order
print('The object is {w} width, {h} high, {d} in dept'.format(d = dept, w = width, h = high))

The object is 70 width, 30 high, 10 in dept
The object is 70 width, 30 high, 10 in dept


In [62]:
# f string

city = 'Sofia'
population = 1_500_000

print(f'The city {city} has population of {population} as per the last year.')

The city Sofia has population of 1500000 as per the last year.


In [64]:
# formatting

width, high, dept = 70.1437, 30.002121, 10.23

print('The object is {:.4f} width, {:.2f} high, {:.3f} in dept'.format(width, high, dept))
print('The object is {w:.4f} width, {h:.2f} high, {d:.3f} in dept'.format(d = dept, w = width, h = high))
print(f'The object is {width:.4f} width, {high:.2f} high, {dept:.3f} in dept')

The object is 70.1437 width, 30.00 high, 10.230 in dept
The object is 70.1437 width, 30.00 high, 10.230 in dept
The object is 70.1437 width, 30.00 high, 10.230 in dept


# Excercises

Given this string of comma separated characters, create three new variables containing the unicode codepoint (in hex), uppercase and lower case versions of each character (also comma delimited).

For example, if the string was `'a, b, c'` you should generate three lists that look like:
* `['0x61', '0x62', '0x63']`
* `['a', 'b', 'c']`
* `['A', 'B', 'C']`

[You should use the `split()` and `strip()` functions, amongst others, to help you solve this.]

In [65]:
s = 'Π, ύ, θ, ω, ν'

In [79]:
from copy import deepcopy

s_list = s.split(',')

s_list[0] = s_list[0].strip(' ')
s_list[1] = s_list[1].strip(' ')
s_list[2] = s_list[2].strip(' ')
s_list[3] = s_list[3].strip(' ')
s_list[4] = s_list[4].strip(' ')

print(s_list)

s_list_hex = deepcopy(s_list)
s_list_lower = deepcopy(s_list)
s_list_upper = deepcopy(s_list)

s_list_upper[0] = s_list[0].upper()
s_list_upper[1] = s_list[1].upper()
s_list_upper[2] = s_list[2].upper()
s_list_upper[3] = s_list[3].upper()
s_list_upper[4] = s_list[4].upper()

s_list_lower[0] = s_list[0].lower()
s_list_lower[1] = s_list[1].lower()
s_list_lower[2] = s_list[2].lower()
s_list_lower[3] = s_list[3].lower()
s_list_lower[4] = s_list[4].lower()

s_list_hex[0] = hex(ord(s_list[0]))
s_list_hex[1] = hex(ord(s_list[1]))
s_list_hex[2] = hex(ord(s_list[2]))
s_list_hex[3] = hex(ord(s_list[3]))
s_list_hex[4] = hex(ord(s_list[4]))

print(s_list_hex)
print(s_list_lower)
print(s_list_upper)

['Π', 'ύ', 'θ', 'ω', 'ν']
['0x3a0', '0x3cd', '0x3b8', '0x3c9', '0x3bd']
['π', 'ύ', 'θ', 'ω', 'ν']
['Π', 'Ύ', 'Θ', 'Ω', 'Ν']


Using two types of string interpolation, and given the variable a that contains an integer, print out the following string for a:

The number ...value of a... is (or is not) even

For example, if a is 42, the your code should print:

'The number 42 is even'

But if a is 31, then the same code should print:

'The number 31 is not even'



In [84]:
val = 42
stat = 'is not'

if val % 2 == 0:
    stat = 'is'

print('The number {} {} even'.format(val, stat))
print(f'The number {val} {stat} even')
print(f'The number {val} {'is' if val % 2 == 0 else 'is not'} even')

The number 42 is even
The number 42 is even
The number 42 is even


You are given two variables `a` and `b` (with `b` non-zero), and you need to generate a string that reads something like this:

```
'a / b = (result)'
```

But you want your string to be nicely formatted for display purposes, so you want to limit displaying possible digits after the decimal point in all your values to 4 digits.

In [88]:
a = 21
b = 5

print(f'{a} / {b} = ({ a / b:.4f})')

21 / 5 = (4.2000)
