# Video: Organizing Your Data with Dictionaries and Lists

This video gives an example of how to represent data using lists and dictionaries.

In [None]:
!wget -O penguins.csv "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff"

--2024-06-27 16:32:19--  https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff
Resolving portal.edirepository.org (portal.edirepository.org)... 129.24.240.161
Connecting to portal.edirepository.org (portal.edirepository.org)|129.24.240.161|:443... connected.
HTTP request sent, awaiting response... 200 
Length: 23755 (23K) [text/csv]
Saving to: ‘penguins.csv’


2024-06-27 16:32:20 (656 KB/s) - ‘penguins.csv’ saved [23755/23755]



In [None]:
import csv

with open("penguins.csv") as fp:
    reader = csv.DictReader(fp)
    data = [r for r in reader]

data[0]

{'studyName': 'PAL0708',
 'Sample Number': '1',
 'Species': 'Adelie Penguin (Pygoscelis adeliae)',
 'Region': 'Anvers',
 'Island': 'Torgersen',
 'Stage': 'Adult, 1 Egg Stage',
 'Individual ID': 'N1A1',
 'Clutch Completion': 'Yes',
 'Date Egg': '2007-11-11',
 'Culmen Length (mm)': '39.1',
 'Culmen Depth (mm)': '18.7',
 'Flipper Length (mm)': '181',
 'Body Mass (g)': '3750',
 'Sex': 'MALE',
 'Delta 15 N (o/oo)': '',
 'Delta 13 C (o/oo)': '',
 'Comments': 'Not enough blood for isotopes.'}

In [None]:
data[:10]

[{'studyName': 'PAL0708',
  'Sample Number': '1',
  'Species': 'Adelie Penguin (Pygoscelis adeliae)',
  'Region': 'Anvers',
  'Island': 'Torgersen',
  'Stage': 'Adult, 1 Egg Stage',
  'Individual ID': 'N1A1',
  'Clutch Completion': 'Yes',
  'Date Egg': '2007-11-11',
  'Culmen Length (mm)': '39.1',
  'Culmen Depth (mm)': '18.7',
  'Flipper Length (mm)': '181',
  'Body Mass (g)': '3750',
  'Sex': 'MALE',
  'Delta 15 N (o/oo)': '',
  'Delta 13 C (o/oo)': '',
  'Comments': 'Not enough blood for isotopes.'},
 {'studyName': 'PAL0708',
  'Sample Number': '2',
  'Species': 'Adelie Penguin (Pygoscelis adeliae)',
  'Region': 'Anvers',
  'Island': 'Torgersen',
  'Stage': 'Adult, 1 Egg Stage',
  'Individual ID': 'N1A2',
  'Clutch Completion': 'Yes',
  'Date Egg': '2007-11-11',
  'Culmen Length (mm)': '39.5',
  'Culmen Depth (mm)': '17.4',
  'Flipper Length (mm)': '186',
  'Body Mass (g)': '3800',
  'Sex': 'FEMALE',
  'Delta 15 N (o/oo)': '8.94956',
  'Delta 13 C (o/oo)': '-24.69454',
  'Comments':

In [None]:
data[8]

{'studyName': 'PAL0708',
 'Sample Number': '9',
 'Species': 'Adelie Penguin (Pygoscelis adeliae)',
 'Region': 'Anvers',
 'Island': 'Torgersen',
 'Stage': 'Adult, 1 Egg Stage',
 'Individual ID': 'N5A1',
 'Clutch Completion': 'Yes',
 'Date Egg': '2007-11-09',
 'Culmen Length (mm)': '34.1',
 'Culmen Depth (mm)': '18.1',
 'Flipper Length (mm)': '193',
 'Body Mass (g)': '3475',
 'Sex': '',
 'Delta 15 N (o/oo)': '',
 'Delta 13 C (o/oo)': '',
 'Comments': 'No blood sample obtained.'}

In [None]:
[r for r in data if r["Sex"] == ""]

[{'studyName': 'PAL0708',
  'Sample Number': '4',
  'Species': 'Adelie Penguin (Pygoscelis adeliae)',
  'Region': 'Anvers',
  'Island': 'Torgersen',
  'Stage': 'Adult, 1 Egg Stage',
  'Individual ID': 'N2A2',
  'Clutch Completion': 'Yes',
  'Date Egg': '2007-11-16',
  'Culmen Length (mm)': '',
  'Culmen Depth (mm)': '',
  'Flipper Length (mm)': '',
  'Body Mass (g)': '',
  'Sex': '',
  'Delta 15 N (o/oo)': '',
  'Delta 13 C (o/oo)': '',
  'Comments': 'Adult not sampled.'},
 {'studyName': 'PAL0708',
  'Sample Number': '9',
  'Species': 'Adelie Penguin (Pygoscelis adeliae)',
  'Region': 'Anvers',
  'Island': 'Torgersen',
  'Stage': 'Adult, 1 Egg Stage',
  'Individual ID': 'N5A1',
  'Clutch Completion': 'Yes',
  'Date Egg': '2007-11-09',
  'Culmen Length (mm)': '34.1',
  'Culmen Depth (mm)': '18.1',
  'Flipper Length (mm)': '193',
  'Body Mass (g)': '3475',
  'Sex': '',
  'Delta 15 N (o/oo)': '',
  'Delta 13 C (o/oo)': '',
  'Comments': 'No blood sample obtained.'},
 {'studyName': 'PAL070

In [None]:
[r["Comments"] for r in data if r["Sex"] == ""]

['Adult not sampled.',
 'No blood sample obtained.',
 'No blood sample obtained for sexing.',
 'No blood sample obtained for sexing.',
 'No blood sample obtained.',
 'Sexing primers did not amplify. Not enough blood for isotopes.']

In [None]:
set(r["Sex"] for r in data)

{'', 'FEMALE', 'MALE'}

In [None]:
data_by_sex = {}

In [None]:
for row in data:
    data_by_sex.setdefault(row["Sex"], []).append(row)

In [None]:
data_by_sex

{'MALE': [{'studyName': 'PAL0708',
   'Sample Number': '1',
   'Species': 'Adelie Penguin (Pygoscelis adeliae)',
   'Region': 'Anvers',
   'Island': 'Torgersen',
   'Stage': 'Adult, 1 Egg Stage',
   'Individual ID': 'N1A1',
   'Clutch Completion': 'Yes',
   'Date Egg': '2007-11-11',
   'Culmen Length (mm)': '39.1',
   'Culmen Depth (mm)': '18.7',
   'Flipper Length (mm)': '181',
   'Body Mass (g)': '3750',
   'Sex': 'MALE',
   'Delta 15 N (o/oo)': '',
   'Delta 13 C (o/oo)': '',
   'Comments': 'Not enough blood for isotopes.'},
  {'studyName': 'PAL0708',
   'Sample Number': '6',
   'Species': 'Adelie Penguin (Pygoscelis adeliae)',
   'Region': 'Anvers',
   'Island': 'Torgersen',
   'Stage': 'Adult, 1 Egg Stage',
   'Individual ID': 'N3A2',
   'Clutch Completion': 'Yes',
   'Date Egg': '2007-11-16',
   'Culmen Length (mm)': '39.3',
   'Culmen Depth (mm)': '20.6',
   'Flipper Length (mm)': '190',
   'Body Mass (g)': '3650',
   'Sex': 'MALE',
   'Delta 15 N (o/oo)': '8.66496',
   'Delta 1

In [None]:
len(data_by_sex)

3

In [None]:
data_by_sex.keys()

dict_keys(['MALE', 'FEMALE', ''])

In [None]:
for (sex, sex_rows) in data_by_sex.items():
    print(sex, len(sex_rows))

MALE 73
FEMALE 73
 6


In [None]:
data[0]

{'studyName': 'PAL0708',
 'Sample Number': '1',
 'Species': 'Adelie Penguin (Pygoscelis adeliae)',
 'Region': 'Anvers',
 'Island': 'Torgersen',
 'Stage': 'Adult, 1 Egg Stage',
 'Individual ID': 'N1A1',
 'Clutch Completion': 'Yes',
 'Date Egg': '2007-11-11',
 'Culmen Length (mm)': '39.1',
 'Culmen Depth (mm)': '18.7',
 'Flipper Length (mm)': '181',
 'Body Mass (g)': '3750',
 'Sex': 'MALE',
 'Delta 15 N (o/oo)': '',
 'Delta 13 C (o/oo)': '',
 'Comments': 'Not enough blood for isotopes.'}

In [None]:
data[0]["test"] = True

In [None]:
data_by_sex["MALE"][0]

{'studyName': 'PAL0708',
 'Sample Number': '1',
 'Species': 'Adelie Penguin (Pygoscelis adeliae)',
 'Region': 'Anvers',
 'Island': 'Torgersen',
 'Stage': 'Adult, 1 Egg Stage',
 'Individual ID': 'N1A1',
 'Clutch Completion': 'Yes',
 'Date Egg': '2007-11-11',
 'Culmen Length (mm)': '39.1',
 'Culmen Depth (mm)': '18.7',
 'Flipper Length (mm)': '181',
 'Body Mass (g)': '3750',
 'Sex': 'MALE',
 'Delta 15 N (o/oo)': '',
 'Delta 13 C (o/oo)': '',
 'Comments': 'Not enough blood for isotopes.',
 'test': True}