# 4. Input and Output

## 4.1 Output Formatting

### Basic usage of the str.format()

In [None]:
print('We are the {} who say "{}!"'.format('knights', 'Ni'))

In [None]:
print('{0} and {1}'.format('spam', 'eggs'))

In [None]:
print('{1} and {0}'.format('spam', 'eggs'))

In [None]:
print('The story of {0}, {1}, and {other}.'.format('Bill', 'Manfred', other='Georg'))

In [None]:
import math
print(math.pi)

### An optional ':' and format specifier can follow the field name. This allows greater control over how the value is formatted.

In [None]:
print('The value of PI is approximately {0:.3f}.'.format(math.pi))

In [None]:
table = {'Sjoerd': 4127, 'Jack': 4098, 'Dcab': 7678}
for name, phone in table.items():
        print('{0:10} ==> {1:10d}'.format(name, phone))

## 4.2 Reading and Writing Files

### open() returns a file object, and is most commonly used with two arguments: open(filename, mode). Mode can be 'r' when the file will only be read, 'w' for only writing

In [None]:
f = open('text.txt', 'w')

### File should be closed when you finish to using it.

In [None]:
f.close()
f.read()

In [None]:
f = open('text.txt', 'w')
f.write("This is first line\n")
f.write("This is second line\n")
f.write("This is third line\n")
f.close()

In [None]:
f = open('text.txt', 'r')
for line in f:
    print(line)
f.close()

### Use strip( ) to trim the new line character 

In [None]:
f = open('text.txt', 'r')
for line in f:
    print(line.strip())
f.close()

### Use with Statements when opening file.

In [None]:
with open('text.txt', 'r') as f:
    for line in f:
        print(line.strip())

## 4.3 Regular Expressions

### The Python "re" module provides regular expression support.

In [None]:
import re

In [None]:
s = 'an example word:cat!!'
match = re.search(r'word:\w\w\w', s)
if match:                      
    print('found', match.group()) ## 'found word:cat'
else:
    print('did not find')

### Basic Patterns
* a, X, 9, < -- ordinary characters just match themselves exactly. The meta-characters which do not match themselves because they have special meanings are: ```. ^ $ * + ? { [ ] \ | ( ) (details below)```
* . (a period) -- matches any single character except newline '\n'
* \w -- (lowercase w) matches a "word" character: a letter or digit or underbar [a-zA-Z0-9_]. Note that although "word" is the mnemonic for this, it only matches a single word char, not a whole word. \W (upper case W) matches any non-word character.
* \b -- boundary between word and non-word
* \s -- (lowercase s) matches a single whitespace character -- space, newline, return, tab, form [ \n\r\t\f]. \S (upper case S) matches any non-whitespace character.
* \t, \n, \r -- tab, newline, return
* \d -- decimal digit [0-9] (some older regex utilities do not support but \d, but they all support \w and \s)
* ^ = start, $ = end -- match the start or end of the string
* \ -- inhibit the "specialness" of a character. So, for example, use \. to match a period or \\ to match a slash. If you are unsure if a character has special meaning, such as '@', you can put a slash in front of it, \@, to make sure it is treated just as a character.


In [None]:
def match_result(match):
    if match:                      
        print('found', match.group()) ## 'found word:cat'
    else:
        print('did not find')
        
match_result(re.search(r'iii', 'piiig'))
match_result(re.search(r'igs', 'piiig'))
match_result(re.search(r'..g', 'piiig'))
match_result(re.search(r'\d\d\d', 'p123g'))
match_result(re.search(r'\w\w\w', '@@abcd!!'))

### Repetition
* '+' -> 1 or more occurrences of the pattern to its left, e.g. 'i+' = one or more i's
* '*' -> 0 or more occurrences of the pattern to its left
* '?' -> match 0 or 1 occurrences of the pattern to its left

In [None]:
match_result(re.search(r'pi+', 'piiig'))
match_result(re.search(r'i+', 'piigiiii'))
match_result(re.search(r'\d\s*\d\s*\d', 'xx1 2   3xx'))
match_result(re.search(r'\d\s*\d\s*\d', 'xx12  3xx'))
match_result(re.search(r'\d\s*\d\s*\d', 'xx123xx'))
match_result(re.search(r'^b\w+', 'foobar'))
match_result(re.search(r'b\w+', 'foobar'))

### Square Brackets
Square brackets can be used to indicate a set of chars, so [abc] matches 'a' or 'b' or 'c'.

In [None]:
s = 'abc-123@gmail.com'
match = re.search(r'[\w.-]+@[\w.-]+', s)
if match:
    print(match.group())  ## 'alice-b@google.com'

### Group Extraction
The "group" feature of a regular expression allows you to pick out parts of the matching text.

In [None]:
s = 'purple abc-123@gmail.com monkey dishwasher'
match = re.search('([\w.-]+)@([\w.-]+)', s)
if match:
    print(match.group())
    print(match.group(1))
    print(match.group(2))

### Findall
findall() finds **all** the matches and returns them as a list of strings

In [None]:
s = 'purple abc-123@gmail.com, blah monkey 1234-abc@gmail.com blah dishwasher'
emails = re.findall(r'[\w\.-]+@[\w\.-]+', s)
for email in emails:
    print(email)

## Exercise 4.1 Formatting the Area of a Triangle exercise in notebook1 with three decimal points

In [None]:
a = 5
b = 6
c = 7
s = (a + b + c) / 2
area = (s * (s - a) * (s - b) * (s - c)) ** 0.5
print("The area is {:.3f}".format(area))

## Exercise 4.2 Use output formatting for the multiplication table exercise in notebook2

In [None]:
def multiplication_table(a, b):
    for i in range(a, b+1):
        print('\n')
        for j in range(1, 10):
            print("{} x {} = {}".format(i, j, i*j))

multiplication_table(2, 9)

## Exercise 4.3 Counting jobNo and action from csv
Hints: use split() to split the column

In [None]:
actions = {}
jobnos = {}
with open('../data/user_log.csv') as f:
    for line in f:
        if 'action' in line:
            continue
        line = line.strip().split('|')
        
        action = line[1]
        if action in actions:
            actions[action] += 1
        else:
            actions[action] = 1
            
        jobno = line[2]
        if jobno in jobnos:
            jobnos[jobno] += 1
        else:
            jobnos[jobno] = 1
print(actions)
print(jobnos)

In [None]:
!cat ../data/user_log.csv

## Exercise 4.4 Find top 10 word count
Hints: You may need split( ), strip( ), lower(), regular expressions

In [None]:
d = {'d': 2, 'a': 1, 'b': 3, 'c': 0}
d.items()
sorted(d.items())
l = [(value, key) for (key, value) in d.items()]
sorted(l, reverse=True)

In [None]:
word_cnt = {}
with open('../data/alice.txt') as f:
    for line in f:
        words = line.strip().lower().split()
        for word in words:
            if word in word_cnt:
                word_cnt[word] += 1
            else:
                word_cnt[word] = 1
result = sorted([(v, k) for (k, v) in word_cnt.items()], reverse=True)
result[:10]