## 5.1 Basic Materials: tuples, lists, sets and dicts
### tuples is immutable
Given a tuple t is defined as
```python
t = ('AA', '2011-06-07', 100, 32.2)
```
An assignment to an element within a tuple will raise a TypeError such as
```python
t[2] = 50 # raise a TypeError: 'tuple' does not support item assignment
```

In [8]:
t = ('AA', '2011-06-07', 100, 32.2)
print(len(t))
print(t[0], t[1], t[2]*t[3])

name, date, shares, price = t
print(name, date, shares, price)

# tuple is immutable
print(t[2])
# TypeError: 'tuple' does not support item assignment
t[2] = 50

4
AA 2011-06-07 3220.0000000000005
AA 2011-06-07 100 32.2
100


TypeError: 'tuple' object does not support item assignment

In [12]:
names = ['IBM', 'YHOO', 'AA', 'CAT']
names.append('IBM')
names.insert(1, 'FB')
print(names)
names[2] = 'HPE'
print(names)

nums = [45, 13, 20, 17]
print(nums)


['IBM', 'FB', 'YHOO', 'AA', 'CAT', 'IBM']
['IBM', 'FB', 'HPE', 'AA', 'CAT', 'IBM']
[45, 13, 20, 17]


### Set can be used to dedup redundant items in list
We use curly brackets ('{}') to enclose items in set

In [18]:
distinct_names = {'YHOO', 'IBM', 'MSFT', 'IBM', 'YHOO', 'AA', 'CAT', 'IBM'}
print(distinct_names)

# We can use set to dedup names
print(names)
print(set(names))

print('AA' in names)
print('AA' in set(names))

{'IBM', 'AA', 'YHOO', 'MSFT', 'CAT'}
['IBM', 'FB', 'HPE', 'AA', 'CAT', 'IBM']
{'FB', 'IBM', 'HPE', 'AA', 'CAT'}
True
True


In [22]:
prices = {
    'IBM': 91.2,
    'MSFT': 45.23,
    'AA': 32.4,
    'YHOO': 9.23
}
print(prices['IBM'], prices['AA'])

prices['IBM'] = 87.23
print(prices)
print('IBM' in prices, 87.23 in prices)

91.2 32.4
{'IBM': 87.23, 'MSFT': 45.23, 'AA': 32.4, 'YHOO': 9.23}
True False


## 5.2 Project: Building a Data Structure from a File

1. Change a function name from portfolio_cost to read_portfolio
2. Return a list of holdings instead of portfolio cost

Initially, holding is modelled as a tuple of (quote, date, shares, price)

In [4]:
import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    
    portfolio = []    # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            # record is a row consisting of all columns
            record = (row[0], row[1], row[2], row[3])
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
print("{0!r}".format(portfolio))

[('AA', '2007-06-11', 100, 32.2), ('IBM', '2007-05-13"', 50, 91.1), ('CAT', '2006-09-23', 150, 83.44), ('MSFT', '2007-05-17', 200, 51.23), ('GE', '2006-02-01', 95, 40.37), ('MSFT', '2006-10-31', 50, 65.1), ('IBM', '2006-07-09', 100, 70.44)]


Further simplifation of record generation by converting an entire row to a tuple instead of packing all individual columns.

In [5]:
import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    
    portfolio = []    # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            # record is a row consisting of all columns
            record = tuple(row)
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
print("{0!r}".format(portfolio))

[('AA', '2007-06-11', 100, 32.2), ('IBM', '2007-05-13"', 50, 91.1), ('CAT', '2006-09-23', 150, 83.44), ('MSFT', '2007-05-17', 200, 51.23), ('GE', '2006-02-01', 95, 40.37), ('MSFT', '2006-10-31', 50, 65.1), ('IBM', '2006-07-09', 100, 70.44)]


Now calculate total (portfolio cost).

In [10]:
import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    
    portfolio = []    # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            # record is a row consisting of all columns
            record = tuple(row)
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
total = 0.0
for holding in portfolio:
    total += holding[2] * holding[3]    # shares * price
print(total)

44671.15


Unpack holding into variable names to make code more readable i.e. referring to shares, price instead of holding[2], holding[3].

In [13]:
import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    
    portfolio = []    # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            # record is a row consisting of all columns
            record = tuple(row)
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
total = 0.0
for name, date, shares, price in portfolio:
    total += shares * price
print(total)

44671.15


Switch to represent holding as a dictionary instead of tuple.
This is useful when there are many columns in a tuple. Because we don't have to tediously unpack all columns. Additionally, a change of order within a tuple will not affect calculation as we refer to them by name instead of its position.

In [4]:
import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list
    '''

    if errors not in ['warn', 'silent', 'raise']:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")
    
    portfolio = []    # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows) # skip the first line
        for (rowno, row) in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:    # Catching all exception (!!! dangerous !!!)
                if errors == 'warn':
                    print('Row:', rowno, 'bad row:', row)
                    print('Reason:', err)
                elif errors == 'raise':
                    raise    # Re-raise the last exception
                else:
                    pass    # Silence
                continue    # skip to the next row
            # record is a row consisting of all columns
            record = {
                'name': row[0],
                'date': row[1],
                'shares': row[2],
                'price': row[3]
                     }
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
total = 0.0
for holding in portfolio:
    total += holding['shares'] * holding['price']
print(total)

44671.15


### playing with json
#### as json string

In [15]:
import json
# export to other programs
data = json.dumps(portfolio)

# import from other programs
port = json.loads(data)

print("{!r}".format(port))

[{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}, {'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}, {'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}, {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}, {'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}, {'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}, {'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}]


#### as json file

In [16]:
import json

with open('Data/myport.json', 'w') as myport:
    json.dump(portfolio, myport)

# read the json file
with open('Data/myport.json', 'r') as myport:
    port = json.load(myport)

print("{!r}".format(port))

[{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}, {'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}, {'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}, {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}, {'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}, {'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}, {'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}]


## 5.3 Data Manipulation

In [5]:
portfolio = read_portfolio('Data/portfolio.csv')

print(portfolio[0])
print(len(portfolio))

for holding in portfolio:
    print(holding)

{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
7
{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}


In [6]:
portfolio = read_portfolio('Data/portfolio.csv')
total = 0.0
for holding in portfolio:
    total += holding['shares'] * holding['price']

print(total)

names = []
for holding in portfolio:
    names.append(holding['name'])

print(names)

more100 = []
for holding in portfolio:
    if holding['shares'] > 100:
        more100.append(holding)
print(more100)

44671.15
['AA', 'IBM', 'CAT', 'MSFT', 'GE', 'MSFT', 'IBM']
[{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}, {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}]


### List comprehension

In [12]:
portfolio = read_portfolio('Data/portfolio.csv')

total = sum([holding['shares'] * holding['price'] for holding in portfolio])
print(total)

names = [holding['name'] for holding in portfolio]
print(names)

more100 = [holding for holding in portfolio if holding['shares'] > 100]
print(more100)

44671.15
['AA', 'IBM', 'CAT', 'MSFT', 'GE', 'MSFT', 'IBM']
[{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}, {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}]


### Set comprehension

In [15]:
unique_names = set(names)
print(unique_names)

# or 
unique_names = {holding['name'] for holding in portfolio}
print(unique_names)

namestr = ','.join(unique_names)
print(namestr)

import urllib.request
u = urllib.request.urlopen('http://finance.yahoo.com/d/quotes.csv?s={}&f=l1'.format(namestr))
data = u.read()
data

{'AA', 'CAT', 'GE', 'MSFT', 'IBM'}
{'AA', 'CAT', 'GE', 'MSFT', 'IBM'}
AA,CAT,GE,MSFT,IBM


URLError: <urlopen error [Errno 11001] getaddrinfo failed>

### Mocking data from finance.yahoo.com (got Exception gaierror)
and move on with
### Dict comprehension
```python
# making a dict of name and stock price from an iterable of names and prices
prices = {name: float(price) for name, price in zip(unique_names, price_data)}
```

In [28]:
unique_names = {holding['name'] for holding in portfolio}
print(unique_names)

namestr = ','.join(unique_names)
print(namestr)

# import urllib.request
# u = urllib.request.urlopen('http://finance.yahoo.com/d/quotes.csv?s={}&f=l1'.format(namestr))
# data = u.read()
data = b'72.51\n9.27\n30.23\n153.74\n53.00\n'

price_data = data.split()

for name, price in zip(unique_names, price_data):
    print(name, '=', price)

prices = dict(zip(unique_names, price_data))

# Making price float instead of a sequence of byte by making use of dict comprehension
prices = {name: float(price) for name, price in zip(unique_names, price_data)}
print(prices['CAT'])

current_value = 0.0
for holding in portfolio:
    current_value += holding['shares'] * prices[holding['name']]

# Or
# Further calculate current_value using list comprenhension
current_value = sum([holding['shares'] * prices[holding['name']] for holding in portfolio])

print(current_value)

{'AA', 'CAT', 'GE', 'MSFT', 'IBM'}
AA,CAT,GE,MSFT,IBM
AA = b'72.51'
CAT = b'9.27'
GE = b'30.23'
MSFT = b'153.74'
IBM = b'53.00'
9.27
57898.35


## 5.4 Example sorting and grouping
We can use named function as a key such as function holding_name below
```python
def holding_name(holding):
    return holding['name']
```

Note: Also observe how to print a list member line by line. This is accomplished by unpacking the list with *operator. See https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists for more info

In [60]:
portfolio = read_portfolio('Data/portfolio.csv')
print(*portfolio, sep='\n') # print('\n'.join(portfolio))
print('{:*^60s}'.format(' sorted '))
def holding_name(holding):
    return holding['name']
portfolio.sort(key=holding_name)
print(*portfolio, sep='\n')

{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}
************************** sorted **************************
{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}
{'name': 'MSFT', 'date': '2006-10-31', 'shares': 50

### Or lambda such as
```python
portfolio.sort(key=lambda holding: holding['name'])
```

In [63]:
portfolio = read_portfolio('Data/portfolio.csv')
print('{:*^60s}'.format(' unsorted '))
print(*portfolio, sep='\n')

print('{:*^60s}'.format(' sorted '))
portfolio.sort(key=lambda holding: holding['name'])
print(*portfolio, sep='\n')

print('{:*^60s}'.format(' earliest '))
print(min(portfolio, key = lambda holding: holding['date']))
print('{:*^60s}'.format(' latest '))
print(max(portfolio, key = lambda holding: holding['date']))

************************* unsorted *************************
{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}
************************** sorted **************************
{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'IBM', 'date': '2007-05-13"', 'shares': 50, 'price': 91.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price