# Chapter 8: Strings: A Deeper Look
## Jarrod Sims
## 8.2 Formatting Strings

### 8.2.1 Presentation Types

In [1]:
f'{17.489:.2f}'

'17.49'

In [2]:
f'{10:d}'

'10'

In [3]:
f'{65:c} {97:c}'

'A a'

In [4]:
f'{"hello":s} {7}'

'hello 7'

In [6]:
from decimal import Decimal
f' {Decimal("100000000000000000000000000.0"):.3f}'

' 100000000000000000000000000.000'

In [7]:
f' {Decimal("100000000000000000000000000.0"):.3e}'

' 1.000e+26'

In [8]:
print(f'{58:c}{45:c}{41:c}')

:-)


### 8.2.2 Field Widths and Alignment      

In [10]:
f'[{27:10d}]'

'[        27]'

In [11]:
f'[{3.5:10f}]'

'[  3.500000]'

In [12]:
f'[{"hello":10}]'

'[hello     ]'

In [13]:
f'[{27:<15d}]'

'[27             ]'

In [14]:
f'[{3.5:<15f}]'

'[3.500000       ]'

In [15]:
f'[{"hello":>15}]'

'[          hello]'

In [16]:
f'[{27:^7d}]'

'[  27   ]'

In [17]:
f'[{3.5:^7.1f}]'

'[  3.5  ]'

In [18]:
f'[{"hello":7}]'

'[hello  ]'

In [20]:
print(f'[{"Amanda":>10}\n{"Amanda":^10}\n[{"Amanda":<10}]')

[    Amanda
  Amanda  
[Amanda    ]


### 8.2.3 Numeric Formatting

In [21]:
f'[{27:+10d}]'

'[       +27]'

In [22]:
f'[{27:+010d}]'

'[+000000027]'

In [24]:
print(f'{27:d}\n{27: d}\n{-27: d}')

27
 27
-27


In [25]:
f'{12345678:,d}'

'12,345,678'

In [26]:
f'{123456.78:,.2f}'

'123,456.78'

In [27]:
print(f'{10240.473:+10,.2f}\n{-3210.9521:+10,.2f}')

+10,240.47
 -3,210.95


In [28]:
'{:.2f}'.format(17.489)

'17.49'

In [29]:
'{} {}'.format('Amanda', 'Cyan')

'Amanda Cyan'

In [31]:
'{0} {0} {1}'.format('Happy', 'Birthday')

'Happy Happy Birthday'

In [33]:
'{first} {last}'.format(first='Amanda', last='Cyan')

'Amanda Cyan'

In [34]:
'{last} {first}'.format(first='Amanda', last='Cyan')

'Cyan Amanda'

### 8.3 Concatenating and Repeating Strings

In [38]:
s1 = 'happy'
s2 = 'birthday'
s1 += ' ' + s2
s1

'happy birthday'

In [40]:
symbol = '>'
symbol *= 5
symbol

'>>>>>'

In [42]:
name = 'Jarrod'
name += ' Sims'
bar = '*'
bar *= len(name)
print(f'{bar}\n{name}\n{bar}')

***********
Jarrod Sims
***********


### 8.4 Stripping Whitespace from Strings

In [44]:
sentence = '\t \n This is a test string. \t\t \n'
sentence.strip()

'This is a test string.'

In [45]:
sentence.lstrip()

'This is a test string. \t\t \n'

In [46]:
sentence.rstrip()

'\t \n This is a test string.'

In [47]:
name = '      Margo Magenta       '
name.strip()

'Margo Magenta'

In [48]:
name.lstrip()

'Margo Magenta       '

In [49]:
name.rstrip()

'      Margo Magenta'

### 8.5 Changing Character Case

In [51]:
'happy birthday'.capitalize()

'Happy birthday'

In [53]:
'strings: a deeper look'.title()

'Strings: A Deeper Look'

In [56]:
test_string = 'happy new year'
test_string.capitalize()

'Happy new year'

In [57]:
test_string.title()

'Happy New Year'

### 8.6 Comparison Operators for Strings

In [59]:
print(f'A: {ord("A")}; a: {ord("a")}')

A: 65; a: 97


In [60]:
'Orange' == 'orange'

False

In [61]:
'Orange' != 'orange'

True

In [62]:
'Orange' < 'orange'

True

In [63]:
'Orange' <= 'orange'

True

In [64]:
'Orange' > 'orange'

False

In [65]:
'Orange' >= 'orange'

False

### 8.7 Searching for Substrings

In [66]:
sentence = 'to be or not to be that is the question'
sentence.count('to')

2

In [67]:
sentence.count('to' , 12)

1

In [68]:
sentence.count('that', 12, 25)

1

In [69]:
sentence.index('be')

3

In [70]:
sentence.rindex('be')

16

In [71]:
'that' in sentence

True

In [72]:
'THAT' in sentence

False

In [73]:
'THAT' not in sentence

True

In [74]:
sentence.startswith('to')

True

In [75]:
sentence.startswith('be')

False

In [76]:
sentence.endswith('quest')

False

In [77]:
for word in 'to be or not to be that is the question'.split():
    if word.startswith('t'):
        print(word, end= ' ')

to to that the 

### 8.8 Replacing Substrings

In [80]:
values = '1\t2\t3\t4\t5'
values.replace('\t', ',')

'1,2,3,4,5'

In [81]:
'1 2 3 4 5'.replace(' ',' --> ')

'1 --> 2 --> 3 --> 4 --> 5'

### 8.9 Splitting and Joining Strings

In [82]:
letters = 'A, B, C, D'
letters.split(', ')

['A', 'B', 'C', 'D']

In [83]:
letters.split(', ', 2)

['A', 'B', 'C, D']

In [84]:
letters_list = ['A', 'B', 'C', 'D']
','.join(letters_list)

'A,B,C,D'

In [85]:
','.join([str(i) for i in range(10)])

'0,1,2,3,4,5,6,7,8,9'

In [86]:
'Amanda: 89, 97, 92'.partition(': ')

('Amanda', ': ', '89, 97, 92')

In [88]:
url = 'https://nwmissouri.instructure.com/courses/47563/assignments/728305'
rest_of_url, separator, document = url.rpartition('/')
document

'728305'

In [89]:
rest_of_url

'https://nwmissouri.instructure.com/courses/47563/assignments'

In [90]:
lines = """This is line1
This is line 2
This is line 3"""
lines

'This is line1\nThis is line 2\nThis is line 3'

In [91]:
lines.splitlines()

['This is line1', 'This is line 2', 'This is line 3']

In [92]:
lines.splitlines(True)

['This is line1\n', 'This is line 2\n', 'This is line 3']

In [94]:
', '.join(reversed('Pamela White'.split()))

'White, Pamela'

### 8.10 Characters and Character-Testing Methods

In [95]:
'-27'.isdigit()

False

In [96]:
'27'.isdigit()

True

In [97]:
'A9876'.isalnum()

True

In [98]:
'123 Main Street'.isalnum()

False

In [100]:
file_path = 'C:\\Users\\jarro\\OneDrive\\Documents\\44-608\\608-mod5'
file_path

'C:\\Users\\jarro\\OneDrive\\Documents\\44-608\\608-mod5'

In [101]:
file_path = r'C:\Users\jarro\OneDrive\Documents\44-608\608-mod5'
file_path

'C:\\Users\\jarro\\OneDrive\\Documents\\44-608\\608-mod5'

### 8.12 Introduction to Regular Expressions

In [2]:
import re
pattern = '02215'
'Match' if re.fullmatch(pattern, '02215') else 'No match'

'Match'

In [3]:
'Match' if re.fullmatch(pattern, '51220') else 'No match'

'No match'

In [5]:
'Valid' if re.fullmatch(r'\d{5}', '02215') else 'Invalid'

'Valid'

In [6]:
'Valid' if re.fullmatch(r'\d{5}', '9876') else 'Invalid'

'Invalid'

In [7]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'Wally') else 'Invalid'

'Valid'

In [8]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'eva') else 'Invalid'

'Invalid'

In [9]:
'Match' if re.fullmatch('[^a-z]', 'A') else 'No match'

'Match'

In [10]:
'Match' if re.fullmatch('[^a-z]', 'a') else 'No match'

'No match'

In [11]:
'Match' if re.fullmatch('[*+$]', '*') else 'No match'

'Match'

In [12]:
'Match' if re.fullmatch('[*+$]', '!') else 'No match'

'No match'

In [13]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'Wally') else 'Invalid'

'Valid'

In [14]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'E') else 'Invalid'

'Invalid'

In [15]:
'Match' if re.fullmatch('labell?ed', 'labelled') else 'No match'

'Match'

In [16]:
'Match' if re.fullmatch('labell?ed', 'labeled') else 'No match'

'Match'

In [17]:
'Match' if re.fullmatch('labell?ed', 'labellled') else 'No match'

'No match'

In [19]:
'Match' if re.fullmatch(r'\d{3,}', '123') else 'No match'

'Match'

In [20]:
'Match' if re.fullmatch(r'\d{3,}', '1234567890') else 'No match'

'Match'

In [21]:
'Match' if re.fullmatch(r'\d{3,}', '12') else 'No match'

'No match'

In [22]:
'Match' if re.fullmatch(r'\d{3,6}', '123') else 'No match'

'Match'

In [23]:
'Match' if re.fullmatch(r'\d{3,6}', '123456') else 'No match'

'Match'

In [24]:
'Match' if re.fullmatch(r'\d{3,6}', '1234567') else 'No match'

'No match'

In [25]:
'Match' if re.fullmatch(r'\d{3,6}', '12') else 'No match'

'No match'

In [27]:
street = r'\d+ [A-Z][a-z]* [A-Z][a-z]*'
'Match' if re.fullmatch(street, '123 Main Street') else 'No match'

'Match'

In [28]:
'Match' if re.fullmatch(street, 'Main Street') else 'No match'

'No match'

In [33]:
re.sub(r'\t', ', ', '1\t2\t3\t4')

'1, 2, 3, 4'

In [34]:
re.sub(r'\t', ', ', '1\t2\t3\t4', count=2)

'1, 2, 3\t4'

In [37]:
re.split(r',\s*', '1  2,  3,4,   5,6,7,8')

['1  2', '3', '4', '5', '6', '7', '8']

In [38]:
re.split(r',\s*', '1  2,  3,4,   5,6,7,8', maxsplit=3)

['1  2', '3', '4', '5,6,7,8']

In [39]:
result = re.search('Python', 'Python is fun')
result.group() if result else 'not found'

'Python'

In [42]:
result2 = re.search('fun!', 'Python is fun')
result2.group() if result2 else 'not found'

'not found'

In [44]:
result3 = re.search('Sam', 'SAM WHITE', flags=re.IGNORECASE)
result3.group() if result3 else 'not found'

'SAM'

In [47]:
result = re.search('^Python', 'Python is fun')
result.group() if result else 'not found'

'Python'

In [48]:
result = re.search('^fun', 'Python is fun')
result.group() if result else 'not found'

'not found'

In [50]:
result = re.search('Python$', 'Python is fun')
result.group() if result else 'not found'

'not found'

In [51]:
result = re.search('fun$', 'Python is fun')
result.group() if result else 'not found'

'fun'

In [54]:
contact = 'Wally White, Home: 555-555-1234, Work: 555-555-4321'
re.findall(r'\d{3}-\d{3}-\d{4}', contact)

['555-555-1234', '555-555-4321']

In [55]:
for phone in re.finditer(r'\d{3}-\d{3}-\d{4}', contact):
    print(phone.group())

555-555-1234
555-555-4321


In [58]:
text = 'Charley Cyan, email: demo1@deitel.com'
pattern = r'([A-Z][a-z]+ [A-Z][a-z]+), email: (\w+@\w+\.\w{3})'
result = re.search(pattern, text)

In [59]:
result.groups()

('Charley Cyan', 'demo1@deitel.com')

In [60]:
result.group()

'Charley Cyan, email: demo1@deitel.com'

In [61]:
result.group(1)

'Charley Cyan'

In [62]:
result.group(2)

'demo1@deitel.com'

In [66]:
result = re.search(r'(\d+) ([-+*/]) (\d+)', '10 + 5')
result.groups()

('10', '+', '5')

In [67]:
result.group(1)

'10'

In [68]:
result.group(2)

'+'

In [69]:
result.group(3)

'5'

### 8.13 Intro to Data Science: Pandas, Regular Expressions and Data Mining

In [70]:
print('Jarrod Sims')

Jarrod Sims


### Data Validation

In [71]:
import pandas as pd
zips = pd.Series({'Boston': '02215', 'Miami': '3310'})
zips

Boston    02215
Miami      3310
dtype: object

In [72]:
zips.str.match(r'\d{5}')

Boston     True
Miami     False
dtype: bool

In [73]:
print('Jarrod Sims')

Jarrod Sims


In [74]:
cities = pd.Series(['Boston, MA 02215', 'Miami, FL 33101'])
cities

0    Boston, MA 02215
1     Miami, FL 33101
dtype: object

In [75]:
cities.str.contains(r' [A-Z]{2} ')

0    True
1    True
dtype: bool

In [76]:
cities.str.match(r' [A-Z]{2} ')

0    False
1    False
dtype: bool

### Reformatting Your Data

In [86]:
print('Jarrod Sims')

Jarrod Sims


In [79]:
contacts = [['Mike Green', 'demo1@deitel.com', '5555555555'],
            ['Sue Brown', 'demo2@deitel.com', '5555551234']]
contactsdf = pd.DataFrame(contacts,
                          columns=['Name', 'Email', 'Phone'])
contactsdf

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,5555555555
1,Sue Brown,demo2@deitel.com,5555551234


In [82]:
import re
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [84]:
formatted_phone = contactsdf['Phone'].map(get_formatted_phone)
formatted_phone

0    555-555-5555
1    555-555-1234
Name: Phone, dtype: object

In [85]:
contactsdf['Phone'] = formatted_phone
contactsdf

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,555-555-5555
1,Sue Brown,demo2@deitel.com,555-555-1234


In [None]:
Jarrod Sims