In [2]:
# 3.3 Project using the CSV module to read data
import csv

total = 0.0
with open('Data/portfolio2.csv', 'r') as f:
    rows = csv.reader(f)
    headers = next(rows) # skip the first line
    for row in rows:
        row[2] = int(row[2])
        row[3] = float(row[3])
        total += row[2] * row[3]

print('Total cost: ', total)

Total cost:  44671.15


In [4]:
# 4.2 Project moving a script into a function
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for row in rows:
            row[2] = int(row[2])
            row[3] = float(row[3])
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/portfolio.csv'))
print('Total cost: ', portfolio_cost('Data/portfolio3.csv'))

Total cost:  44671.15
Total cost:  39405.75


In [17]:
# 4.2 Project moving a script into a function
# Using glob to generate a list of files
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for row in rows:
            row[2] = int(row[2])
            row[3] = float(row[3])
            total += row[2] * row[3]
    return total

import glob
files = glob.glob('Data/portfolio*.csv')
for file in files:
    print(file, 'total cost:', portfolio_cost(file) )


Data\portfolio.csv total cost: 44671.15
Data\portfolio2.csv total cost: 44671.15
Data\portfolio3.csv total cost: 39405.75


In [19]:
# 4.3 Handling bad data and exception handling
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for row in rows:
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:
                print('Bad row:', row)
                print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15


In [20]:
# 4.3 Handling bad data and exception handling
# Introduce a rowno to help track where the error is in the data
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        rowno = 0
        for row in rows:
            rowno += 1
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:
                print('Row:', rowno, 'bad row:', row)
                print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15


In [20]:
# 4.3 Handling bad data and exception handling
# Introduce a rowno to help track where the error is in the data
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        rowno = 0
        for row in rows:
            rowno += 1
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:
                print('Row:', rowno, 'bad row:', row)
                print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15


In [30]:
# Play with enumerate
seasons = ['Spring', 'Summer', 'Fall', 'Winter']
enumerate(seasons)
list(enumerate(seasons))
list(enumerate(seasons, start=1))

[(1, 'Spring'), (2, 'Summer'), (3, 'Fall'), (4, 'Winter')]

In [29]:
# 4.3 Handling bad data and exception handling
# Introduce enumerate as a counter in the for loop
# to track where the error is in the data
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:
                print('Row:', rowno, 'bad row:', row)
                print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15


## 4.4 Function Design Consideration
### Anti-pattern: Diaper
A generic catching all exception is dangerous because it can hide a programming error

In [2]:
import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = flot(row[3])
            except Exception as err:    # Catching all exception (!!! dangerous !!!)
                print('Row:', rowno, 'bad row:', row)
                print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Row: 1 bad row: ['AA', '2007-06-11', 100, '32.20']
Reason: name 'flot' is not defined
Row: 2 bad row: ['IBM', '2007-05-13', 50, '91.10']
Reason: name 'flot' is not defined
Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 4 bad row: ['MSFT', '2007-05-17', 200, '51.23']
Reason: name 'flot' is not defined
Row: 5 bad row: ['GE', '2006-02-01', 95, '40.37']
Reason: name 'flot' is not defined
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Row: 7 bad row: ['IBM', '2006-07-09', 100, '70.44']
Reason: name 'flot' is not defined
Total cost:  0.0


In [1]:
import csv

def portfolio_cost(filename, errors='warn'):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total

print('Total cost: ', portfolio_cost('Data/missing.csv'))

Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15


## Calling portfolio_cost and explicitly silencing error
```python
# cryptic use of positional argument
print('Total cost: ', portfolio_cost('Data/missing.csv', 'silent'))
# keyword argument looks much more meaningful
print('Total cost: ', portfolio_cost('Data/missing.csv', errors = 'silent'))
```
### We can force keyword argument by adding * in the argument list like this
```python
def portfolio_cost(filename, *, errors='warn'):
```

In [7]:
import csv

def portfolio_cost(filename, *, errors='warn'):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                continue    # skip to the next row
            total += row[2] * row[3]
    return total
# this one will throw TypeError ... takes 1 positional argument but 2 were given
# print('Total cost: ', portfolio_cost('Data/missing.csv', 'silent'))
# the 2nd argument must be keyword argument named errors as follows
print('Total cost: ', portfolio_cost('Data/missing.csv', errors = 'silent'))

Total cost:  28900.15


### Add more sophistry into a range of valid value of errors

In [12]:
import csv

def portfolio_cost(filename, *, errors='warn'):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            total += row[2] * row[3]
    return total
# this one will throw TypeError ... takes 1 positional argument but 2 were given
# print('Total cost: ', portfolio_cost('Data/missing.csv', 'silent'))
# the 2nd argument must be keyword argument named errors as follows
print('Total cost: ', portfolio_cost('Data/missing.csv', errors = 'warn'))

Row: 3 bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Row: 6 bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost:  28900.15
