# Dealing with Reality: Lists, Iteration
## Intro Python

16 July 2022

## Last Session...
* **Strings:** `in`, methods (`.upper()`, `.count()`, `.replace()`, `.endswith()`, type conversions
* **Logic:** `not`, `and`, `or`
* **Conditionals:** Controlling what code runs with `if`, `elif`, `else`
* **Lists:** `[]`, methods (`.append()`, `.extend()`, `.insert()`, `.remove()`, `.pop()`, `.index()`, `.clear()`), 
* **Concepts:** nesting, mutability

## Questions?

In [1]:
provinces = ['ON', 
             'BC', 
             'QC', 
             'AB', 
             'MB', 
             'NL', 
             'PE', 
             'NP', 
             'NS', 
             'SK', 
             'Yukon Territories', 
             'Northwest Territories', 
             'Nunavut']
provinces

['ON',
 'BC',
 'QC',
 'AB',
 'MB',
 'NL',
 'PE',
 'NP',
 'NS',
 'SK',
 'Yukon Territories',
 'Northwest Territories',
 'Nunavut']

In [2]:
provinces.sort()
provinces

['AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'Northwest Territories',
 'Nunavut',
 'ON',
 'PE',
 'QC',
 'SK',
 'Yukon Territories']

In [3]:
fruits = ['kiwi', 'pear', 'grape', 'melon']

In [4]:
sorted_fruits = sorted(fruits)
sorted_fruits

['grape', 'kiwi', 'melon', 'pear']

In [5]:
sorted(provinces, key=len)

['AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'ON',
 'PE',
 'QC',
 'SK',
 'Nunavut',
 'Yukon Territories',
 'Northwest Territories']

In [6]:
mystery_solvers = [
    ['Sherlock', 'Watson'],
    ['Poirot'],
    ['Scooby', 'Shaggy', 'Fred', 'Velma', 'Daphne']
]

In [7]:
sorted(mystery_solvers, key=len)

[['Poirot'],
 ['Sherlock', 'Watson'],
 ['Scooby', 'Shaggy', 'Fred', 'Velma', 'Daphne']]

In [8]:
sorted(mystery_solvers)

[['Poirot'],
 ['Scooby', 'Shaggy', 'Fred', 'Velma', 'Daphne'],
 ['Sherlock', 'Watson']]

## `for` loops

In [9]:
for province in provinces:
    # do stuff
    print(province)

AB
BC
MB
NL
NP
NS
Northwest Territories
Nunavut
ON
PE
QC
SK
Yukon Territories


In [10]:
scores = [90, 90, 85, 78, 88]

In [11]:
count = 0
for score in scores:
    print(score)
    if score >= 90:
        count += 1

print(f'There were {count} 90+ scores')

90
90
85
78
88
There were 2 90+ scores


In [12]:
for score in scores:
    count = 0
    if score >= 90:
        count += 1
        
print(f'There were {count} 90+ scores')

There were 0 90+ scores


In [13]:
times = [0, 1, 2, 3, 4, 5, 6]

for i in range(7):
    print(i, i**2)

0 0
1 1
2 4
3 9
4 16
5 25
6 36


In [14]:
for i in range(0, 12, 3):
    print(i)

0
3
6
9


In [15]:
for i in range(12, 0, -3):
    print(i)

12
9
6
3


In [16]:
input_files = ['data1.csv', 'data2.csv', 'data3.csv']
output_files = []

for file in input_files:
    output_file_name = 'processed_' + file.replace('.csv', '.xlsx')
    output_files.append(output_file_name)
    # do more processing
    
output_files

['processed_data1.xlsx', 'processed_data2.xlsx', 'processed_data3.xlsx']

In [17]:
data1, data2, data3 = input_files
data3

'data3.csv'

In [18]:
stops = ['Sheppard-Yonge', 'Bayview', 'Bessarion', 'Leslie', 'Don Mills']
for index, stop in enumerate(stops):
    print(f'Stop {index + 1} is {stop}.')

Stop 1 is Sheppard-Yonge.
Stop 2 is Bayview.
Stop 3 is Bessarion.
Stop 4 is Leslie.
Stop 5 is Don Mills.


In [19]:
for index, stop in enumerate(stops):
    print(f'This stop is {stop}. The next stop is {stops[index+1]}')

This stop is Sheppard-Yonge. The next stop is Bayview
This stop is Bayview. The next stop is Bessarion
This stop is Bessarion. The next stop is Leslie
This stop is Leslie. The next stop is Don Mills


IndexError: list index out of range

In [20]:
for index in range(len(stops)-1):
    print(f'The next stop is: {stops[index+1]}')

The next stop is: Bayview
The next stop is: Bessarion
The next stop is: Leslie
The next stop is: Don Mills


In [21]:
for index, stop in stops:
    print(index)
    print(stop)

ValueError: too many values to unpack (expected 2)

In [22]:
scores[0]  = 95

In [23]:
for index, score in enumerate(scores):
    scores[index] = score + 2

scores

[97, 92, 87, 80, 90]

In [24]:
cities = ['Toronto', 'Montreal', 'Vancouver', 'Halifax']
lats = [43.65, 45.52, 49.28]
lons = [-79.38, -73.57, -123.13]

for city, lat, lon in zip(cities, lats, lons):
    print(f'{city} is at ({lat}, {lon})')

Toronto is at (43.65, -79.38)
Montreal is at (45.52, -73.57)
Vancouver is at (49.28, -123.13)


In [25]:
if len(cities) == len(lats) == len(lons):
    print('can zip')
else:
    print('different lens')

different lens


## Dictionaries

In [26]:
my_dictionary = {'key': 'value',
                 'key2': 'value2',
                 2000: 123}
my_dictionary

{'key': 'value', 'key2': 'value2', 2000: 123}

In [27]:
cities = {'Toronto': [43.65, -79.38],
         'Montreal': [45.52, -73.57],
         'Vancouver': [49,.28, -123.13]}

In [28]:
cities['Montreal']

[45.52, -73.57]

In [29]:
my_dictionary[2000]

123

In [30]:
olympics_hosts = {2020: 'Tokyo', 2016: 'Rio de Janiero', 2012:'London'}
olympics_hosts

{2020: 'Tokyo', 2016: 'Rio de Janiero', 2012: 'London'}

In [31]:
olympics_hosts[2008] = 'Beijing'
olympics_hosts[2008]

'Beijing'

In [32]:
olympics_hosts[2020] = 'Cancelled'

In [33]:
2004 in olympics_hosts

False

In [34]:
'Tokyo' in olympics_hosts

False

In [35]:
for i in olympics_hosts:
    print(i)

2020
2016
2012
2008


In [36]:
for value in olympics_hosts.values():
    print(value)

Cancelled
Rio de Janiero
London
Beijing


In [37]:
for key in olympics_hosts.keys():
    print(key)

2020
2016
2012
2008


In [38]:
for key, value in olympics_hosts.items():
    print(f'{value} hosted the {key} Olympics')

Cancelled hosted the 2020 Olympics
Rio de Janiero hosted the 2016 Olympics
London hosted the 2012 Olympics
Beijing hosted the 2008 Olympics


In [39]:
olympics_hosts[2008] = olympics_hosts.get(2008, 'Beijing')
olympics_hosts

{2020: 'Cancelled', 2016: 'Rio de Janiero', 2012: 'London', 2008: 'Beijing'}

In [40]:
all_olympics = {'summer': olympics_hosts,
               'winter': {2022: 'Beijing', 2018: 'Pyeongchang'}}
all_olympics

{'summer': {2020: 'Cancelled',
  2016: 'Rio de Janiero',
  2012: 'London',
  2008: 'Beijing'},
 'winter': {2022: 'Beijing', 2018: 'Pyeongchang'}}

In [41]:
all_olympics['winter']

{2022: 'Beijing', 2018: 'Pyeongchang'}

In [42]:
all_olympics['winter'][2018]

'Pyeongchang'

In [43]:
# same result as all_olympics['winter'][2018]
{2022: 'Beijing', 2018: 'Pyeongchang'}[2018]

'Pyeongchang'

In [44]:
all_olympics['summer'][2016]

'Rio de Janiero'

In [45]:
for season, hosts in all_olympics.items():
    print(season)
    for year, city in hosts.items():
        print(f'The {season.title()} {year} Olympics host was {city}')
        
print(city)

summer
The Summer 2020 Olympics host was Cancelled
The Summer 2016 Olympics host was Rio de Janiero
The Summer 2012 Olympics host was London
The Summer 2008 Olympics host was Beijing
winter
The Winter 2022 Olympics host was Beijing
The Winter 2018 Olympics host was Pyeongchang
Pyeongchang


In [46]:
all_olympics

{'summer': {2020: 'Cancelled',
  2016: 'Rio de Janiero',
  2012: 'London',
  2008: 'Beijing'},
 'winter': {2022: 'Beijing', 2018: 'Pyeongchang'}}

In [47]:
olympics_hosts

{2020: 'Cancelled', 2016: 'Rio de Janiero', 2012: 'London', 2008: 'Beijing'}

In [48]:
olympics_hosts[2020] = 'Cancelled'
olympics_hosts

{2020: 'Cancelled', 2016: 'Rio de Janiero', 2012: 'London', 2008: 'Beijing'}

In [49]:
all_olympics

{'summer': {2020: 'Cancelled',
  2016: 'Rio de Janiero',
  2012: 'London',
  2008: 'Beijing'},
 'winter': {2022: 'Beijing', 2018: 'Pyeongchang'}}

In [50]:
cities.update(my_dictionary)
cities

{'Toronto': [43.65, -79.38],
 'Montreal': [45.52, -73.57],
 'Vancouver': [49, 0.28, -123.13],
 'key': 'value',
 'key2': 'value2',
 2000: 123}

In [51]:
cities.update({2000: 'hello'})
cities

{'Toronto': [43.65, -79.38],
 'Montreal': [45.52, -73.57],
 'Vancouver': [49, 0.28, -123.13],
 'key': 'value',
 'key2': 'value2',
 2000: 'hello'}

## `while` loops

In [52]:
for k in cities.keys():
    print(k)

Toronto
Montreal
Vancouver
key
key2
2000


In [53]:
for score in scores:
    print(score > 85)

True
True
True
False
True


In [54]:
countdown = 10
while countdown > 5:
    print(countdown)
    countdown -= 1
    
print('Blastoff')    


10
9
8
7
6
Blastoff


In [55]:
countdown = 10
while True:
    if countdown <= 5:
        break
    print(countdown)
    countdown -= 1

print('blastoff')

10
9
8
7
6
blastoff


In [56]:
denominators = [100, 30, 0, -6]
numerators = [100, 45, 104, 2]

for x, y in zip(numerators, denominators):
    if y == 0:
        continue
        print('divide by zero, moving on')
    print(x/y)

1.0
1.5
-0.3333333333333333


## Tuples

In [57]:
coords = (39.5, -74.3)

In [58]:
tuple()

()

In [59]:
()

()

In [60]:
tuple(denominators)

(100, 30, 0, -6)

In [61]:
coords.append(70)

AttributeError: 'tuple' object has no attribute 'append'

In [62]:
lat, lon = coords
lat

39.5

In [63]:
provinces

['AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'Northwest Territories',
 'Nunavut',
 'ON',
 'PE',
 'QC',
 'SK',
 'Yukon Territories']

In [64]:
provinces.append('SK')
provinces

['AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'Northwest Territories',
 'Nunavut',
 'ON',
 'PE',
 'QC',
 'SK',
 'Yukon Territories',
 'SK']

## Sets

In [65]:
set(provinces)

{'AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'Northwest Territories',
 'Nunavut',
 'ON',
 'PE',
 'QC',
 'SK',
 'Yukon Territories'}

In [66]:
empty_set = set()
mystery_data = {}
type(mystery_data)

dict

In [67]:
things = {'keys', 'wallet', 'phone'}
type(things)

set

In [68]:
things.add('keys')
things

{'keys', 'phone', 'wallet'}

In [69]:
things.add('laptop')
things

{'keys', 'laptop', 'phone', 'wallet'}

In [70]:
things.remove('laptop')
things

{'keys', 'phone', 'wallet'}

In [71]:
rainbow = {'r', 'o', 'y', 'blue'}
olympic_flag = {'r', 'g', 'y', 'blue', 'black'}

In [72]:
rainbow.difference(olympic_flag)

{'o'}

In [73]:
olympic_flag.difference(rainbow)

{'black', 'g'}

In [74]:
rainbow.symmetric_difference(olympic_flag)

{'black', 'g', 'o'}

In [75]:
rainbow.intersection(olympic_flag)

{'blue', 'r', 'y'}

In [76]:
rainbow.union(olympic_flag)

{'black', 'blue', 'g', 'o', 'r', 'y'}

In [77]:
sorted(rainbow)

['blue', 'o', 'r', 'y']

In [78]:
rainbow.sort()

AttributeError: 'set' object has no attribute 'sort'

In [79]:
for colour in rainbow:
    print(colour)

o
blue
y
r


## Collections: a summary

(Adapted from: Table 17, Chapter 11, _Practical Programming: An Introduction to Computer Science Using Python 3.6_)

| Collection | Mutable? | Ordered? | Use when...|
|---|---|---|---|
| `str` | No | Yes | You want to keep track of text. |
| `list` | Yes | Yes | You want to keep track of and update an ordered sequence.|
| `tuple` | No | Yes | You want to build an ordered sequence that you know won't change or that you want to use as a key in a dictionary or as a value in a set. |
| `set` | Yes | No | You want to keep track of values, but order doesn't matter, and you don't want duplicates. The values must be immutable. |
| `dict` | Yes | No | You want to keep a mapping of keys to values. The keys must be immutable. |

## Modules

In [80]:
import math

In [81]:
math.pi

3.141592653589793

In [82]:
math.gcd(45709348, 34908, 2352)

4

In [83]:
pi

NameError: name 'pi' is not defined

In [84]:
gcd(2323, 24509809, 32)

NameError: name 'gcd' is not defined

In [85]:
from datetime import date

In [86]:
date.today()

datetime.date(2022, 7, 16)

## Better pattern matching with regular expressions

Regular expressions are a mini-language of its own! Python's documentation provides a [guide to regular expression syntax](https://docs.python.org/3/library/re.html#regular-expression-syntax).

| Character | Meaning |
|---|---|
| * | Match zero or more of the RE before |
| + | Match one or more of the RE before |
| ? | Match zero or one of the RE before |
| \ | Escape character |
| \d | Match any decimal digit |
| \s | Match any whitespace |
| \w | Match any word character (letters, digits, underscore) |
| [aeiou] | Match any of the characters in brackets |

In [87]:
import re

In [88]:
text = 'My favourite thing to do on Saturday morning is program. Programming is fine as long as there are not too many bugs.'

In [89]:
'favorite' in text

False

In [90]:
re.search('favou?rite', text)

<re.Match object; span=(3, 12), match='favourite'>

In [91]:
if re.search('favou?rite', text):
    print('This is about favourites')
else:
    print('This is about something else')

This is about favourites


In [92]:
re.sub('program+', 'garden', text, flags=re.I)

'My favourite thing to do on Saturday morning is garden. gardening is fine as long as there are not too many bugs.'

## Navigating folders

In [93]:
import os

In [94]:
os.getcwd()

'C:\\Users\\unive\\Documents\\GitHub\\dsi-python-workshop\\05-live-code'

In [95]:
os.listdir()

['.ipynb_checkpoints',
 '0711_fundamentals.ipynb',
 '0714_fundamentals_reality.ipynb',
 '0716_reality_in_out.ipynb',
 'provinces.txt',
 'readme.md']

In [None]:
os.listdir('../')

#os.listdir('sample_data')  # run if you are using Google Colab

In [96]:
os.listdir('C:\\Users\\unive\\Documents\\GitHub\\dsi-python-workshop\\data')

['bicycle-thefts - 4326.csv',
 'bike_thefts_joined.csv',
 'neighbourhood-profiles-2016-140-model.csv',
 'neighbourhoods.csv',
 'ttc-bus-delay-data-2021.xlsx',
 'ttc-streetcar-delay-data-2021.xlsx',
 'ttc-subway-delay-codes.xlsx',
 'ttc-subway-delay-data-2021.xlsx',
 'ttc-subway-delay-data-readme.xlsx']

In [97]:
for file in os.listdir('C:\\Users\\unive\\Documents\\GitHub\\dsi-python-workshop\\data'):
    print(file)

bicycle-thefts - 4326.csv
bike_thefts_joined.csv
neighbourhood-profiles-2016-140-model.csv
neighbourhoods.csv
ttc-bus-delay-data-2021.xlsx
ttc-streetcar-delay-data-2021.xlsx
ttc-subway-delay-codes.xlsx
ttc-subway-delay-data-2021.xlsx
ttc-subway-delay-data-readme.xlsx


In [98]:
os.listdir('../01-slides/sample_data')

['anscombe.json', 'california_housing_test.csv']

## Reading and writing files

In [None]:
# r: read
# w: write
# a: append
with open('../01-slides/sample_data/california_housing_test.csv', 'r') as f:
    # prints all 3000 lines of the file
    print(f.readlines())
    
print('We closed the file')

In [100]:
provinces

['AB',
 'BC',
 'MB',
 'NL',
 'NP',
 'NS',
 'Northwest Territories',
 'Nunavut',
 'ON',
 'PE',
 'QC',
 'SK',
 'Yukon Territories',
 'SK']

In [102]:
with open('provinces.txt', 'w') as outfile:
    #outfile.write(str(provinces))
    outfile.writelines(provinces)

print('We wrote to file')

We wrote to file


In [103]:
with open('provinces.txt', 'r') as province_file:
    print(province_file.read())

ABBCMBNLNPNSNorthwest TerritoriesNunavutONPEQCSKYukon TerritoriesSK


## Object-oriented programming

In [104]:
class BasketballPlayer:
    # jersey number, team, position
    def __init__(self, jersey_number, team, position):
        self.jersey_number = jersey_number
        self.team = team
        self.position = position
        
    # methods: pass, dribble, shoot
    def dribble(self):
        print('Dribbling')
        


In [105]:
player = BasketballPlayer(30, 'Bears', 'centre')

In [106]:
player

<__main__.BasketballPlayer at 0x1f9d7bed5d0>

In [107]:
player.dribble()

Dribbling


In [108]:
player.team

'Bears'