# Opdracht 1.2 - Een CSV bestand inlezen

In [3]:
import csv

In [4]:
filename = '../datasets/ca-500.csv'

In [5]:
with open(filename) as f:
    reader = csv.DictReader(f)
    print(reader.fieldnames)

['first_name', 'last_name', 'company_name', 'address', 'city', 'province', 'postal', 'phone1', 'phone2', 'email', 'web', 'date_of_birth']


### Print alleen de velden first_name, last_name, city en email

In [6]:
with open(filename) as f:
    reader = csv.DictReader(f)

    for row in reader:
        print('{:20} {:20} {:20} {}'.format(row['first_name'], 
                                            row['last_name'], 
                                            row['city'], 
                                            row['email']))

Francoise            Kohlman              Windsor              f.kohlman@hotmail.com
Kendra               Yandow               Alcida               kendray@cox.net
Lourdes              Beliles              Belleville           l.beliles@gmail.com
Hannah               Helmcamp             Vancouver            hannah.helmcamp@helmcamp.org
Tom                  Sport                LIle-Perrot          tom_sport@aol.com
Queenie              Torchio              Swift Current        queeniet@hotmail.com
Hui                  Moltz                Baker Brook          hui.moltz@yahoo.com
Josefa               Despain              Delhi                josefa.despain@hotmail.com
Lea                  Brzozowski           Bradford             lea.brzozowski@aol.com
Paola                Kardell              Aurora               paola_kardell@aol.com
Hortencia            Arredondo            New Waterford        hortencia.arredondo@arredondo.com
Leanna               Sifford              North York   

### De mensen die in Montreal wonen

In [9]:
with open(filename) as f:
    reader = csv.DictReader(f)
    
    for row in reader:
        if row['city'] == 'Montreal':
            print('{:20} {:20} {:20} {}'.format(row['first_name'], 
                                                row['last_name'], 
                                                row['city'], 
                                                row['email']))

Mammie               Masiejczyk           Montreal             mammie.masiejczyk@cox.net
Tesha                Purslow              Montreal             tesha.purslow@purslow.com
Harris               Elmquist             Montreal             helmquist@elmquist.com
Maddie               Tapian               Montreal             maddie.tapian@aol.com
Steffanie            Palenik              Montreal             s.palenik@palenik.com
Val                  Tijerina             Montreal             val.tijerina@cox.net
Denny                Gilliam              Montreal             dennyg@cox.net
Luann                Tibbles              Montreal             luann_tibbles@cox.net
Verona               Staiano              Montreal             
Rikki                Groom                Montreal             r.groom@groom.org
Ricki                Wida                 Montreal             ricki.wida@wida.com
Clemencia            Giglio               Montreal             clemencia_giglio@cox.net


### Het aantal mensen per city

In [8]:
cities = dict()

with open(filename) as f:
    reader = csv.DictReader(f)
    
    for row in reader:
        cities[row['city']] = cities.get(row['city'], 0) + 1

for city, n in sorted(cities.items(), key = lambda item: (item[1], item[0]), reverse = True):
        print(f'{city:25} {n:3} ' + n*'*')

Calgary                    19 *******************
Montreal                   12 ************
Etobicoke                  11 ***********
Vancouver                   9 *********
Surrey                      9 *********
Regina                      9 *********
North York                  9 *********
Edmonton                    9 *********
Quebec                      8 ********
Winnipeg                    7 *******
Toronto                     7 *******
Ottawa                      7 *******
Hamilton                    7 *******
Windsor                     6 ******
North Vancouver             6 ******
Mississauga                 6 ******
London                      6 ******
Trois-Rivieres              5 *****
Scarborough                 5 *****
Sault Ste. Marie            5 *****
Saskatoon                   5 *****
Peterborough                5 *****
Orleans                     5 *****
Oakville                    5 *****
Halifax                     5 *****
Gatineau                    5 *****
Br

### Het aantal mensen per email provider/domain

In [10]:
providers = {}
with open(filename) as f:
    reader = csv.DictReader(f)
    for row in reader:
        if row['email']:
            p = row['email'].find('@')
            provider = row['email'][p+1:]
            providers[provider] = providers.get(provider, 0) + 1

for provider, n in sorted(providers.items(), key = lambda item: (item[1], item[0]), reverse = True):
        print(f'{provider:25} {n}')

cox.net                   81
aol.com                   78
gmail.com                 64
hotmail.com               60
yahoo.com                 55
swartzbeck.org            2
zeanah.com                1
zarling.com               1
wrinkle.org               1
wolfgang.org              1
wieand.com                1
wida.com                  1
wetherby.org              1
westervelt.org            1
violett.com               1
vilven.org                1
tutterrow.org             1
tumolillo.com             1
traux.com                 1
thigpen.com               1
tenuta.org                1
tentler.org               1
talty.org                 1
stimac.org                1
stenquist.org             1
steinhorst.com            1
stalma.org                1
sine.com                  1
sifford.org               1
shintaku.org              1
shadle.com                1
sensibaugh.com            1
schuring.com              1
sams.com                  1
sabota.com                1
russi.com      

### Ter illustratie hetzelfde maar nu zonder gebruik te maken van het csv bibliotheek

In [11]:
with open(filename) as f:
    headers = f.readline().rstrip('\n').strip('"').split('","')
    for line in f:
        values = line.strip().rstrip('\n').strip('"').split('","')
        d = dict(zip(headers, values))
        if d['city'] in ['Montreal']:
            print('{:20} {:20} {:20} {}'.format(d['first_name'], 
                                                d['last_name'], 
                                                d['city'], 
                                                d['email']))

Mammie               Masiejczyk           Montreal             mammie.masiejczyk@cox.net
Tesha                Purslow              Montreal             tesha.purslow@purslow.com
Harris               Elmquist             Montreal             helmquist@elmquist.com
Maddie               Tapian               Montreal             maddie.tapian@aol.com
Steffanie            Palenik              Montreal             s.palenik@palenik.com
Val                  Tijerina             Montreal             val.tijerina@cox.net
Denny                Gilliam              Montreal             dennyg@cox.net
Luann                Tibbles              Montreal             luann_tibbles@cox.net
Verona               Staiano              Montreal             
Rikki                Groom                Montreal             r.groom@groom.org
Ricki                Wida                 Montreal             ricki.wida@wida.com
Clemencia            Giglio               Montreal             clemencia_giglio@cox.net
