In [1]:
# Import libraries
import pandas as pd
import csv

from collections import defaultdict
from pprint import pprint

In [2]:
# Read data from file into list of list
df = pd.read_csv('data/baby_names.csv')
df.sort_values(by = ['RANK', 'NAME'], inplace = True)
df['NAME'] = df.NAME.str.title()
print(df.head())

       BRITH_YEAR  GENDER            ETHNICTY    NAME  COUNT  RANK
7898         2012  FEMALE      ASIAN AND PACI   Chloe    172     1
11622        2013    MALE  WHITE NON HISPANIC   David    304     1
8619         2012  FEMALE      WHITE NON HISP    Emma    228     1
322          2011  FEMALE  WHITE NON HISPANIC  Esther    224     1
3206         2011  FEMALE  WHITE NON HISPANIC  Esther    224     1


In [3]:
df[(df.BRITH_YEAR==2013) & (df.GENDER=='FEMALE') & (df.RANK == 3)]

Unnamed: 0,BRITH_YEAR,GENDER,ETHNICTY,NAME,COUNT,RANK
10031,2013,FEMALE,BLACK NON HISPANIC,Aaliyah,73,3
10637,2013,FEMALE,WHITE NON HISPANIC,Esther,214,3
10440,2013,FEMALE,HISPANIC,Mia,237,3
9988,2013,FEMALE,ASIAN AND PACIFIC ISLANDER,Olivia,109,3


In [4]:
baby_names_2012 = defaultdict(list)
for n, c in df[(df.BRITH_YEAR == 2012)].NAME.value_counts().items():
    baby_names_2012[c].append(n)

# in:
# baby_names_2012
# out: 
# defaultdict(list,
#            {6: ['ARIEL', 'JORDAN', 'AVERY'],
#             5: ['RILEY', 'DYLAN', 'RYAN'],
#             4: ['ISABELLA', 'MIA', 'SEBASTIAN', 'ADAM', ...]
#             3: [EVA', 'ALICE', 'HAZEL', 'CONNOR', ...]
#             2: ['KELLY', 'ERIK', 'ARMANI', 'AMELIE', ...]
#             1: ['MOISES', 'TIANA', 'CARSON', 'BROOKE', ...])

In [5]:
female_baby_names_2012 = df[(df.BRITH_YEAR == 2012) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'FEMALE')].set_index('RANK').sort_index().NAME.str.title().to_dict()

In [6]:
male_baby_names = {2012: {},
                   2013: df[(df.BRITH_YEAR == 2013) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'MALE')].set_index('RANK').sort_index().NAME.to_dict(),
                   2014: df[(df.BRITH_YEAR == 2014) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'MALE')].set_index('RANK').sort_index().NAME.to_dict()}

In [7]:
male_baby_names_2011 = df[(df.BRITH_YEAR == 2011) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'MALE')].set_index('RANK').sort_index().NAME.str.title().to_dict()

In [8]:
female_baby_names = {2011: df[(df.BRITH_YEAR == 2011) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'FEMALE') & (df.RANK < 11)].set_index('RANK').sort_index().NAME.to_dict(),
                     2012: df[(df.BRITH_YEAR == 2012) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'FEMALE') & (df.RANK < 11)].set_index('RANK').sort_index().NAME.to_dict(),
                     2013: df[(df.BRITH_YEAR == 2013) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'FEMALE') & (df.RANK < 11)].set_index('RANK').sort_index().NAME.to_dict(),
                     2014: df[(df.BRITH_YEAR == 2014) & (df.ETHNICTY.isin(['WHITE NON HISP', 'WHITE NON HISPANIC'])) &
                            (df.GENDER == 'FEMALE') & (df.RANK < 11)].set_index('RANK').sort_index().NAME.to_dict()}

# 02. Dictionaries - the root of Python

At the root of all things Python is a dictionary. Herein, you'll learn how to use them to safely handle data that can viewed in a variety of ways to answer even more questions about the New York Baby Names dataset. You'll explore how to loop through data in a dictionary, access nested data, add new data, and come to appreciate all of the wonderful capabilities of Python dictionaries.

## 02.01 Using dictionaries

See the video.

## 02.02 Creating and looping through dictionaries

You'll often encounter the need to loop over some array type data, like in Chapter 1, and provide it some structure so you can find the data you desire quickly.

You start that by creating an empty dictionary and assigning part of your array data as the key and the rest as the value.

Previously, you used __sorted()__ to organize your data in a list. Dictionaries can also be sorted. By default, using __sorted()__ on a dictionary will sort by the keys of the dictionary. You can also reverse the order by passing __reverse=True__ as a keyword argument.

Finally, since sorted returns a list, you can use slice notation to select only part of the list. For example, __[:10]__ will slice the first ten items off a list and return only those items.

**Instructions**

1. Create an empty dictionary called names_by_rank.
2. Loop over female_baby_names_2012.items(), unpacking it into the variables rank and name.
3. Inside the loop, add each name to the names_by_rank dictionary using the rank as the key.
4. Sort the names_by_rank dictionary keys in descending order, select the first ten items. Print each item.

**Results:**<br>
<font color=darkgreen>Brilliant work!</font>

In [9]:
# Create an empty dictionary: names_by_rank
names_by_rank = {}

# Loop over the girl names
for rank, name in female_baby_names_2012.items():
    # Add each name to the names_by_rank dictionary using rank as the key
    names_by_rank[rank] = name
    
# Sort the names_by_rank dict by rank in descending order and slice the first 10 items
for rank in sorted(names_by_rank, reverse=True)[:10]:
    # Print each item
    print(names_by_rank[rank])

Yara
Tzivia
Yitta
Tzipora
Shaina
Sima
Sylvia
Yehudis
Vera
Sloane


## 02.03 Safely finding by key

As demonstrated in the video, if you attempt to access a key that isn't present in a dictionary, you'll get a __KeyError__. One option to handle this type of error is to use a __try: except:__ block. You can learn more about error handling in Python Data Science Toolbox (Part 1) (https://learn.datacamp.com/courses/python-data-science-toolbox-part-1), specifically in this video (https://campus.datacamp.com/courses/python-data-science-toolbox-part-1/lambda-functions-and-error-handling?ex=7).

Python provides a faster, more versatile tool to help with this problem in the form of the __.get()__ method. The __.get()__ method allows you to supply the name of a key, and optionally, what you'd like to have returned if the key is not found.

You'll be using same __names__ dictionary from the previous exercise and will gain practice using the __.get()__ method.

**Instructions**

1. Safely print rank 7 from the names dictionary.
2. Safely print the type of rank 100 from the names dictionary.
3. Safely print rank 105 from the names dictionary or 'Not Found' if 105 is not found.

**Results:**<br>
<font color=darkgreen>Great work. Notice the difference in output between the three print statements. The names dictionary does not have ranks of 100 and 105.</font>

In [10]:
# Safely print rank 7 from the names dictionary
print(female_baby_names_2012.get(7))

# Safely print the type of rank 100 from the names dictionary
print(type(female_baby_names_2012.get(105)))

# Safely print rank 105 from the names dictionary or 'Not Found'
print(female_baby_names_2012.get(105, 'Not Found'))

Chaya
<class 'NoneType'>
Not Found


## 02.04 Dealing with nested data

A dictionary can contain another dictionary as the value of a key, and this is a very common way to deal with repeating data structures such as yearly, monthly or weekly data. All the same rules apply when creating or accessing the dictionary.

For example, if you had a dictionary that had a ranking of my cookie consumption by year and type of cookie. It might look like <code>cookies = {'2017': {'chocolate chip': 483, 'peanut butter': 115}, '2016': {'chocolate chip': 9513, 'peanut butter': 6792}}</code>. I could access how many chocolate chip cookies I ate in 2016 using <code>cookies['2016']['chocolate chip']</code>.

When exploring a new dictionary, it can be helpful to use the <code>.keys()</code> method to get an idea of what data might be available within the dictionary. You can also iterate over a dictionary and it will return each key in the dictionary for you to use inside the loop. Here, a dictionary called __boy_names__ has been loaded into your workspace. It consists of all male names in 2013 and 2014.

**Instructions**

1. Print the keys of the boy_names dictionary.
2. Print the keys of the boy_names dictionary for the year 2013.
3. Loop over the boy_names dictionary.
4. Inside the loop, safely print the year and the third ranked name. Print 'Unknown' if the third ranked name is not found.

**Results:**<br>
<font color=darkgreen>Excellent work! Dealing with nested dictionaries can be tricky. Here, the 2012 key consists of an empty dictionary, and so there is no third ranked name. As a result, 'Unknown' is printed, unlike for 2013 and 2014.</font>

In [11]:
# Print a list of keys from the boy_names dictionary
print(male_baby_names.keys())

# Print a list of keys from the boy_names dictionary for the year 2013
print(male_baby_names[2013].keys())

# Loop over the dictionary
for year in male_baby_names:
    # Safely print the year and the third ranked name or 'Unknown'
    print(year, male_baby_names[year].get(3, 'unknown'))

dict_keys([2012, 2013, 2014])
dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
2012 unknown
2013 Michael
2014 Michael


## 02.05 Altering dictionaries

See the video.

## 02.06 Adding and extending dictionaries

If you have a dictionary and you want to add data to it, you can simply create a new key and assign the data you desire to it. It's important to remember that if it's a nested dictionary, then all the keys in the data path must exist, and each key in the path must be assigned individually.

You can also use the __.update()__ method to update a dictionary with keys and values from another dictionary, tuples or keyword arguments.

Here, you'll combine several techniques used in prior exercises to setup your dictionary in a way that makes it easy to find the least popular baby name for each year.

Your job is to add data for the year 2011 to your dictionary by assignment, 2012 by update, and then find the least popular baby name for each year.

**Instructions**

1. Assign the names_2011 dictionary as the value to the 2011 key of the boy_names dictionary.
2. Update the 2012 key in the boy_names dictionary with the following data in a list of tuples: (1, 'Casey'), (2, 'Aiden').
3. Loop over the boy_names dictionary.
4. Inside the for loop, sort the data for each year of boy_names by descending rank and take the first result which will be the lowest ranked name.
5. Safely print the year and least popular name or 'Not Available' if it is not found. Take advantage of the .get() method.

**Results:**<br>
<font color=darkgreen>Great work! It looks like 'Yonah', 'Yitzchak', and 'Yidel' were the least popular baby names in 2011, 2013, and 2014. The dictionary in the 2012 key of boy_names did not contain any data until you added 'Casey' and 'Aiden', so unsurprisingly, 'Aiden' is the least popular name.</font>

In [12]:
# Assign the names_2011 dictionary as the value to the 2011 key of boy_names
male_baby_names[2011] = male_baby_names_2011

# Update the 2012 key in the boy_names dictionary
male_baby_names[2012].update([(1, 'Casey'), (2, 'Aiden')])

# Loop over the years in the boy_names dictionary 
for year in male_baby_names:
    # Sort the data for each year by descending rank and get the lowest one
    lowest_ranked =  sorted(male_baby_names[year], reverse=True)[0]
    # Safely print the year and the least popular name or 'Not Available'
    print(year, male_baby_names[year].get(lowest_ranked, 'Not Available'))

2012 Aiden
2013 Yitzchak
2014 Yidel
2011 Yonah


## 02.07 Popping and deleting from dictionaries

Often, you will want to remove keys and value from a dictionary. You can do so using the __del__ Python instruction. It's important to remember that __del__ will throw a __KeyError__ if the key you are trying to delete does not exist. You can not use it with the __.get()__ method to safely delete items; however, it can be used with __try: catch:__.

If you want to save that deleted data into another variable for further processing, the __.pop()__ dictionary method will do just that. You can supply a default value for __.pop()__ much like you did for __.get()__ to safely deal with missing keys. It's also typical to use __.pop()__ instead of del since it is a safe method.

Here, you'll remove __2011__ and __2015__ to save them for later, and then delete __2012__ from the dictionary.

**Instructions**

1. Remove 2011 from female_names and store it as female_names_2011.
2. Safely remove 2015 from female_names with a empty dictionary as the default and store it as female_names_2015. To do this, pass in an empty dictionary {} as a second argument to .pop().
3. Delete 2012 from female_names.
4. Print female_names.

**Results:**<br>
<font color=darkgreen>Great work! As expected, only the data from the years 2013 and 2014 is retained.</font>

In [13]:
# Remove 2011 from female_names and store it: female_names_2011
female_names_2011 = female_baby_names.pop(2011)

# Safely remove 2015 from female_names with an empty dictionary as the default: female_names_2015
female_names_2015 = female_baby_names.pop(2015, {})

# Delete 2012 from female_names
del female_baby_names[2012]

# Print female_names
pprint(female_baby_names)

{2013: {1: 'Olivia',
        2: 'Emma',
        3: 'Esther',
        4: 'Sophia',
        5: 'Sarah',
        6: 'Leah',
        7: 'Rachel',
        8: 'Chaya',
        9: 'Miriam',
        10: 'Chana'},
 2014: {1: 'Olivia',
        2: 'Esther',
        3: 'Rachel',
        4: 'Leah',
        5: 'Emma',
        6: 'Chaya',
        7: 'Sarah',
        8: 'Sophia',
        9: 'Ava',
        10: 'Miriam'}}


## 02.08 Pythonically using dictionaries

See the video.

## 02.09 Working with dictionaries more pythonically

So far, you've worked a lot with the keys of a dictionary to access data, but in Python, the preferred manner for iterating over items in a dictionary is with the __.items()__ method.

This returns each key and value from the dictionary as a tuple, which you can unpack in a __for__ loop. You'll now get practice doing this.

**Instructions**

1. Iterate over baby_names[2014], unpacking it into rank and name.
2. Print each rank and name.
3. Repeat the process for baby_names[2012].

**Results:**<br>
<font color=darkgreen>Nicely done. Using the .items() method to iterate over dictionaries is something you'll be doing very frequently in Python.</font>

In [14]:
# Iterate over the 2014 nested dictionary
for rank, name in list(male_baby_names[2014].items())[:10]:
    # Print rank and name
    print(rank, name)

1 Joseph
2 David
3 Michael
4 Moshe
5 Jacob
6 Benjamin
7 Alexander
8 Daniel
9 Samuel
10 Jack


In [15]:
# Iterate over the 2012 nested dictionary
for rank, name in female_baby_names.get(2012,{}).items():
    # Print rank and name
    print(rank, name)

## 02.10 Checking dictionaries for data

You can check to see if a key exists in a dictionary by using the __in__ expression.

For example, you can check to see if __'cookies'__ is a key in the dictionary by using __if 'cookies' in recipes_dict:__ this allows you to safely react to data being present in the dictionary.

You can also use the __in__ expression so see if data is in the value of a dictionary such as __if 'cookies' in recipes_dict.values()__. Remember you have to handle nested dictionaries differently as illustrated in the video and previous exercises, and use the in expression on each nested dictionary.

**Instructions**

1. Check to see if 2011 is in the baby_names dictionary.
2. Print 'Found 2011' if it is present.
3. Check to see if 1 is in baby_names[2012].
4. Print 'Found Rank 1 in 2012' if found and 'Rank 1 missing from 2012' if not found.
5. Check to see if rank 5 is in baby_names[2013].
5. Print 'Found Rank 5' if it is present.

**Results:**<br>
<font color=darkgreen>Superb! If you explore baby_names[2012] in the IPython Shell, you'll see that it is empty. Similarly, baby_names does not contain 2011.</font>

In [16]:
female_baby_names[2012] = {}

In [17]:
# Check to see if 2011 is in baby_names
if 2011 in female_baby_names:
    # Print 'Found 2011'
    print('Found 2011')
    
# Check to see if rank 1 is in 2012
if 1 in female_baby_names[2012]:
    # Print 'Found Rank 1 in 2012' if found
    print('Found Rank 1 in 2012')
else:
    # Print 'Rank 1 missing from 2012' if not found
    print('Rank 1 missing from 2012')
    
# Check to see if Rank 5 is in 2013
if 5 in female_baby_names[2013]:
   # Print 'Found Rank 5'
   print('Found Rank 5')

Rank 1 missing from 2012
Found Rank 5


## 02.11 Working with CSV files

See the video.

In [18]:
csvfile = 'data/ART_GALLERY.csv'

In [19]:
# Reading from a file using CSV reader
with open(csvfile, 'r') as f:
    for row in csv.reader(f):
        print(row)

['NAME', 'TEL', 'ADDRESS1', 'ADDRESS2', 'CITY', 'ZIP']
["O'reilly William & Co Ltd", '(212) 396-1822', '52 E 76th St', '', 'New York', '10021']


In [20]:
# Creating a dictionary from a file
with open(csvfile, 'r') as f:
    for row in csv.DictReader(f):
        pprint(row)

{'ADDRESS1': '52 E 76th St',
 'ADDRESS2': '',
 'CITY': 'New York',
 'NAME': "O'reilly William & Co Ltd",
 'TEL': '(212) 396-1822',
 'ZIP': '10021'}


## 02.12 Reading from a file using CSV reader

Python provides a wonderful module called __csv__ to work with CSV files. You can pass the __.reader()__ method of __csv__ a Python file object and use it as you would any other iterable. To create a Python file object, you use the __open()__ function, which accepts a file name and a mode. The mode is typically __'r'__ for read or __'w'__ for write.

Though you won't use it for this exercise, often CSV files will have a header row with field names, and you will need to use slice notation such as __[1:]__ to skip the header row.

You'll now use the __csv__ module to read the __baby_names.csv__ file and fill the __baby_names__ dictionary with data. This __baby_names__ dictionary has already been created for you.

**Instructions**

1. Import the python csv module.
2. Create a Python file object in read mode for baby_names.csv called csvfile with the open function.
3. Use the reader method from the csv module on the file object in a for loop. Inside the loop:
4. Print each row and add the rank (the 6th element of row) as the key and name (the 4th element of row) as the value to the existing dictionary (baby_names).
5. Print the keys of baby_names.

**Results:**<br>
<font color=darkgreen>Fantastic work! CSV files are among the most common methods of storing tabular data, and you'll encounter them in the wild very frequently. As a result, knowing how to leverage the csv module can be a great addition to your data science toolbox.</font>

In [21]:
baby_names = {}
csvfile = 'data/baby_names.csv'

In [22]:
# Create a python file object in read mode for the baby_names.csv file: csvfile
with open(csvfile, 'r') as f:
    # Loop over a csv reader on the file object
    for i, row in enumerate(csv.reader(f)):
        if i > 10:
            break
        else:
            # Print each row 
            print(row)
            # Add the rank and name to the dictionary
            baby_names[row[5]] = row[3]

# Print the dictionary keys
print(baby_names.keys())

['BRITH_YEAR', 'GENDER', 'ETHNICTY', 'NAME', 'COUNT', 'RANK']
['2011', 'FEMALE', 'HISPANIC', 'GERALDINE', '13', '75']
['2011', 'FEMALE', 'HISPANIC', 'GIA', '21', '67']
['2011', 'FEMALE', 'HISPANIC', 'GIANNA', '49', '42']
['2011', 'FEMALE', 'HISPANIC', 'GISELLE', '38', '51']
['2011', 'FEMALE', 'HISPANIC', 'GRACE', '36', '53']
['2011', 'FEMALE', 'HISPANIC', 'GUADALUPE', '26', '62']
['2011', 'FEMALE', 'HISPANIC', 'HAILEY', '126', '8']
['2011', 'FEMALE', 'HISPANIC', 'HALEY', '14', '74']
['2011', 'FEMALE', 'HISPANIC', 'HANNAH', '17', '71']
['2011', 'FEMALE', 'HISPANIC', 'HAYLEE', '17', '71']
dict_keys(['RANK', '75', '67', '42', '51', '53', '62', '8', '74', '71'])


## 02.13 Creating a dictionary from a file

The __csv__ module also provides a way to directly create a dictionary from a CSV file with the __DictReader__ class. If the file has a header row, that row will automatically be used as the keys for the dictionary. However, if not, you can supply a list of keys to be used. Each row from the file is returned as a dictionary. Using DictReader can make it much easier to read your code and understand what data is being used, especially when compared to the numbered indexes you used in the prior exercise.

Your job in this exercise is to create a dictionary directly from the data file using DictReader. __NOTE:__ The misspellings are from the original data, and this is a very common issue. Again, the baby_names dictionary has already been created for you.

**Instructions**

1. Import the Python csv module.
2. Create a Python file object in read mode for the baby_names.csv called csvfile.
3. Loop over a csv DictReader on csvfile. Inside the loop:
4. Print each row.
5. Add the 'RANK' of each row as the key and 'NAME' of each row as the value to the existing dictionary.
6. Print the dictionary keys.

**Results:**<br>
<font color=darkgreen></font>

In [23]:
baby_names = {}
csvfile = 'data/baby_names.csv'

In [24]:
# Create a python file object in read mode for the `baby_names.csv` file: csvfile
with open(csvfile, 'r') as f:
    # Loop over a DictReader on the file
    for i, row in enumerate(csv.DictReader(f), start=1):
        if i > 10:
            break;
        # Print each row 
        print(row)
        # Add the rank and name to the dictionary: baby_names
        baby_names[row['RANK']] = row['NAME']

    # Print the dictionary keys
    print(baby_names.keys())

{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GERALDINE', 'COUNT': '13', 'RANK': '75'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GIA', 'COUNT': '21', 'RANK': '67'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GIANNA', 'COUNT': '49', 'RANK': '42'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GISELLE', 'COUNT': '38', 'RANK': '51'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GRACE', 'COUNT': '36', 'RANK': '53'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'GUADALUPE', 'COUNT': '26', 'RANK': '62'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'HAILEY', 'COUNT': '126', 'RANK': '8'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'HALEY', 'COUNT': '14', 'RANK': '74'}
{'BRITH_YEAR': '2011', 'GENDER': 'FEMALE', 'ETHNICTY': 'HISPANIC', 'NAME': 'HANNAH', '

# Aditional material

- **Datacamp course**: https://learn.datacamp.com/courses/data-types-for-data-science-in-python