In [1]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        for row in rows:
            row[2] = int(row[2])
            row[3] = float(row[3])
            total += row[2]*row[3]
    return total

total = portfolio_cost('Data/portfolio.csv')
print('Total cost:', total)


In [2]:
!python port.py

Total cost: 44671.15


## break the code by using missing.csv

In [3]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        for row in rows:
            row[2] = int(row[2])
            row[3] = float(row[3])
            total += row[2]*row[3]
    return total

total = portfolio_cost('Data/missing.csv')
print('Total cost:', total)


In [4]:
!python port.py

Traceback (most recent call last):
  File "port.py", line 20, in <module>
    total = portfolio_cost('Data/missing.csv')
  File "port.py", line 15, in portfolio_cost
    row[2] = int(row[2])
ValueError: invalid literal for int() with base 10: ''


In [5]:
!cat Data/missing.csv

name,date,shares,price
"AA","2007-06-11",100,32.20
"IBM","2007-05-13",50,91.10
"CAT","2006-09-23",,83.44
"MSFT","2007-05-17",200,51.23
"GE","2006-02-01",95,40.37
"MSFT","2006-10-31",N/A,65.10
"IBM","2006-07-09",100,70.44


## catch ValueError

In [7]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        for row in rows:
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
                total += row[2]*row[3]
            except ValueError:
                print('Bad row:', row)
    return total

total = portfolio_cost('Data/missing.csv')
print('Total cost:', total)


In [8]:
!python port.py

Bad row: ['CAT', '2006-09-23', '', '83.44']
Bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Total cost: 28900.15


## why the row is bad?

In [9]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        for row in rows:
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
                total += row[2]*row[3]
            except ValueError as err:
                print('Bad row:', row)
                print('Reason:', err)
    return total

total = portfolio_cost('Data/missing.csv')
print('Total cost:', total)


In [10]:
!python port.py

Bad row: ['CAT', '2006-09-23', '', '83.44']
Reason: invalid literal for int() with base 10: ''
Bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Reason: invalid literal for int() with base 10: 'N/A'
Total cost: 28900.15


## where in the row is it bad?

In [11]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        rowno = 0
        for row in rows:
            rowno += 1
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
                total += row[2]*row[3]
            except ValueError as err:
                print('Row:', rowno, 'Bad row:', row)
                print('Row:', rowno, 'Reason:', err)
    return total

total = portfolio_cost('Data/missing.csv')
print('Total cost:', total)


In [12]:
!python port.py

Row: 3 Bad row: ['CAT', '2006-09-23', '', '83.44']
Row: 3 Reason: invalid literal for int() with base 10: ''
Row: 6 Bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Row: 6 Reason: invalid literal for int() with base 10: 'N/A'
Total cost: 28900.15


## use enumerate instead of keeping a counter for rowno

In [15]:
!cat port.py

# port.py

import csv

def portfolio_cost(filename):
    '''
    Computes total shares*price for a CSV file with name, date, shares, price data
    '''

    total = 0.0
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)        # Skip the header row
        for rowno, row in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
                total += row[2]*row[3]
            except ValueError as err:
                print('Row:', rowno, 'Bad row:', row)
                print('Row:', rowno, 'Reason:', err)
    return total

total = portfolio_cost('Data/missing.csv')
print('Total cost:', total)


In [16]:
!python port.py

Row: 3 Bad row: ['CAT', '2006-09-23', '', '83.44']
Row: 3 Reason: invalid literal for int() with base 10: ''
Row: 6 Bad row: ['MSFT', '2006-10-31', 'N/A', '65.10']
Row: 6 Reason: invalid literal for int() with base 10: 'N/A'
Total cost: 28900.15
