### General String Operations

In [46]:
s = "Education is what remains after one has forgotten everything he learned in school."

In [2]:
s.upper()

'EDUCATION IS WHAT REMAINS AFTER ONE HAS FORGOTTEN EVERYTHING HE LEARNED IN SCHOOL.'

In [3]:
s.lower()

'education is what remains after one has forgotten everything he learned in school.'

In [4]:
s.replace("he", "she")

'Education is what remains after one has forgotten everything she learned in school.'

In [5]:
s.capitalize()

'Education is what remains after one has forgotten everything he learned in school.'

In [6]:
s.title()

'Education Is What Remains After One Has Forgotten Everything He Learned In School.'

In [7]:
s.isalpha()

False

In [8]:
s.isnumeric()

False

In [9]:
"123".isnumeric(), "123.45".isnumeric()

(True, False)

In [10]:
def is_numeric(s):
    flag = False
    try:
        float(s)
        flag = True
    except:
        pass
    return flag
is_numeric("123.45"), is_numeric("four")

(True, False)

In [11]:
s.find("forgotten")

40

In [12]:
"forgotten" in s

True

In [13]:
s.split(" ")

['Education',
 'is',
 'what',
 'remains',
 'after',
 'one',
 'has',
 'forgotten',
 'everything',
 'he',
 'learned',
 'in',
 'school.']

In [14]:
parts = ["hello", "world"]
":".join(parts)

'hello:world'

In [47]:
s.title().replace(" ", "|")

'Education|Is|What|Remains|After|One|Has|Forgotten|Everything|He|Learned|In|School.'

In [48]:
"|".join(s.title().split(" "))

'Education|Is|What|Remains|After|One|Has|Forgotten|Everything|He|Learned|In|School.'

In [17]:
import string

In [18]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [19]:
string.digits

'0123456789'

In [49]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [20]:
import random

In [21]:
s = string.ascii_letters + string.digits + "-_" 
"".join(random.choices(s, k = 50))

'QhRxGZ7PeQJuQpjORLftkZT5IFUVyOuPlZI0l-zKKlznW4wxyM'

In [90]:
random.choice(string.ascii_uppercase)

'U'

In [108]:
random.choices(string.ascii_letters + string.digits + string.punctuation, k = 5)

['H', '}', '^', '>', '!']

In [110]:
index = random.randint(1, 9)
index

3

In [111]:
random.choice(string.digits)

'0'

In [119]:
s1 = set(string.punctuation)
s1.intersection("hello world#")

{'#'}

In [161]:
def generate_password(n = random.randint(8, 50)):
    assert n>8
    chars = [random.choice(string.ascii_uppercase)]
    chars += random.choices(string.ascii_letters 
                    + string.digits + string.punctuation, k = n-1)
    index = random.randint(1, n-1)
    chars[index] = random.choice(string.digits)
    if len(set(string.punctuation).intersection(chars)) == 0:
        chars = generate_password(n)
    return "".join(chars)
generate_password()

'BXeZ#=vv;D8]JYT";kj1%^q8FWBY^&}.LtFyC5v3@A4-,'

### Non cryptographic encryption

In [22]:
import hashlib

In [23]:
s = "Hello World"
encrypted = hashlib.sha256(s.encode("utf-8")).hexdigest() # sha256 is one way encryption
encrypted

'a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e'

### Cryptographic Encryption

In [51]:
from cryptography.fernet import Fernet

In [53]:
key = Fernet.generate_key()
key

b'iyKwj52uSn1_Z_z94sJyCf2er7TAszWJwZe1di5dWM0='

In [55]:
"""
Do not loose the key. Otherwise you will not be able to decrypt.
"""
open("/tmp/fernet.key", "wb").write(key)

44

In [57]:
message = "my secrets".encode("utf-8")
message

b'my secrets'

In [59]:
fernet = Fernet(key)

In [73]:
encrypted = fernet.encrypt(message)
encrypted

b'gAAAAABdQR5Bb6ZAqQ08XIfjETmEHUhTiyrDhVmwyvSXDd4cfnN71sFtpdvnOSVBdF6_EgkEvZjIevrSkpvXM46LNJs3CGqn0g=='

In [74]:
fernet.decrypt(encrypted)

b'my secrets'

### String formatting

In [24]:
"Price of %s stock is %.2f" % ("GE", 10.52)

'Price of GE stock is 10.52'

In [25]:
d = dict(id = "KIAL", location = "Bangalore Intl Airport", max_temp = 29, min_temp = 21, precipitation = 0.2)

In [26]:
"{id:s}  : {location:s} : {max_temp:d}/{min_temp:d} {precipitation:f}".format(**d)

'KIAL  : Bangalore Intl Airport : 29/21 0.200000'

In [27]:
"{id:s}: {location:s} : {max_temp:d}/{min_temp:d} {precipitation:0.2f}".format(**d)

'KIAL: Bangalore Intl Airport : 29/21 0.20'

In [28]:
"{id:s}: {location:s} : {max_temp:d}/{min_temp:d} {precipitation:0.2f}".format_map(d)

'KIAL: Bangalore Intl Airport : 29/21 0.20'

### String parsing

In [29]:
ingredient = "Kumquat: 2 cups"

In [30]:
import re

In [31]:
pattern_text = r'(?P<ingredient>\w+):\s+(?P<amount>\d+)\s+(?P<unit>\w+)'

In [32]:
pattern = re.compile(pattern_text)

In [33]:
match = pattern.match(ingredient)

In [34]:
match is None

False

In [35]:
match.groups()

('Kumquat', '2', 'cups')

In [36]:
match.group('ingredient')

'Kumquat'

In [37]:
match.group('amount')

'2'

In [38]:
match.group('unit')

'cups'

```
\w matches any alphanumeric character (a to z, A to Z, 0 to 9)
\d matches any decimal digit
\s matches any space or tab character

Following inverses the pattern search
\W matches any character that's not a letter or a digit
\D matches any character that's not a digit
\S matches any character that's not some kind of space or tab


+ as a suffix means to match one or more of the preceeding patterns. For example \d+ matches one or more digits. To match an ordinary +, we need to use \+.
* as a suffix which matches zero or more of the preceding patterns. \w* matches zero or more characters. To match a *, we need to use \*.
? as a suffix which matches zero or one of the preceding expressions. This character is used in other places, and has a slightly different meaning. We saw it in (?P<name>...) where it was inside the () to define special properties for the grouping.
The . matches any single character. To match a . specifically, we need to use \.
```

### Exaples of parsing html 

Find all phone numbers from this page https://www.tatamotors.com/contact-us/
    

In [39]:
import requests

In [40]:
content = requests.get("https://www.tatamotors.com/contact-us/").text

In [41]:
content

'<!doctype html>\r\n<html>\r\n<head>\r\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\r\n<meta name="viewport" content="width=device-width; initial-scale=1.0" />\r\n<title>Contact Us - Tata Motors Limited</title>\r\n<link rel="stylesheet" type="text/css" href="https://www.tatamotors.com/wp-content/themes/tatamotors_2019/css/style.css" />\r\n<link rel="stylesheet" type="text/css" href="https://www.tatamotors.com/wp-content/themes/tatamotors_2019/css/nav.css" />\r\n<!--<link rel="stylesheet" type="text/css" href="css/jpreloader.css" />-->\r\n<link rel="stylesheet" type="text/css" href="https://www.tatamotors.com/wp-content/themes/tatamotors_2019/css/slick.css">\r\n<link rel="stylesheet" type="text/css" href="https://www.tatamotors.com/wp-content/themes/tatamotors_2019/css/colorbox.css" />\r\n<link rel="stylesheet" type="text/css" href="https://www.tatamotors.com/wp-content/themes/tatamotors_2019/css/inside.css" />\r\n<link rel="stylesheet" href="https://www.tatamo

In [42]:
pattern = re.compile(r"(\d{3,4}\s?[-]?\s?\d{7,8})")
pattern.findall(content)

['0124-2828904',
 '0124-2828900',
 '033-66027502',
 '022-67927055',
 '080-66373598',
 '022 - 62407101',
 '022 - 67927272',
 '020 - 67168700',
 '079 - 67772712',
 '0124-2828900',
 '0522-6668600',
 '044 - 66500900',
 '0484 - 6601400',
 '033 - 66027400',
 '0651 - 6560119',
 '0361 - 2237756',
 '0674 - 6626100',
 '080- 25320321',
 '080-25580019',
 '0657 - 2426616',
 '0657 - 2426937',
 '033 - 22883087',
 '033 - 22883062',
 '011 - 23271805',
 '011 - 23271802']

In [43]:
pattern = re.compile(r"(http[s]?://[\w.]+.com)")
pattern.findall(content)

['https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'http://www.tata.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'http://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'http://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tatamotors.com',
 'https://www.tat

In [44]:
pattern = re.compile(r"([\w.]+@[\w.]+.com)")
pattern.findall(content)

['itclcomplianceofficer@vistra.com',
 'itclcomplianceofficer@vistra.com',
 'example@example.com',
 'example@example.com']