In [None]:
# Following are commands to download the data
repo = "https://raw.githubusercontent.com/jakevdp/data-USstates/master"
!cd data && curl -O {repo}/state-population.csv
!cd data && curl -O {repo}/state-areas.csv
!cd data && curl -O {repo}/state-abbrevs.csv

In [None]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}{1}
    """
    def __init__(self, *args):
        self.args = args
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
            for a in self.args)
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
            for a in self.args)

In [None]:
import numpy as np
import pandas as pd
# import display as ds

In [None]:
pop = pd.read_csv('data/state-population.csv')
areas = pd.read_csv('data/state-areas.csv')
abbrevs = pd.read_csv('data/state-abbrevs.csv')

In [None]:
display('pop.head()', 'areas.head()', 'abbrevs.head()')

In [None]:
# Rank US States and Regions by their 2010 population
population_data = pd.merge(pop, abbrevs, how='outer', left_on='state/region', right_on='abbreviation')
population_data = population_data.drop('abbreviation', axis=1)
population_data.head()

In [None]:
population_data.isnull().any() # figure out which columns have null values

In [None]:
population_data[population_data['population'].isnull()].head() # figure out which states/regions have missing population data

In [None]:
population_data.loc[population_data['state'].isnull(), 'state/region'].unique() # figure out which states/regions have missing values for abbreviation

In [None]:
# set the missing abbreviation values
population_data.loc[population_data['state/region'] == 'PR', 'state'] = "Puerto Rico"
population_data.loc[population_data['state/region'] == 'USA', 'state'] = "United States"
population_data.isnull().any()

In [None]:
state_population_data = pd.merge(population_data, areas, on='state', how='left')
state_population_data.head()

In [None]:
# check for missing values
state_population_data.isnull().any()

In [None]:
state_population_data['state'][state_population_data['area (sq. mi)'].isnull()].unique()

In [None]:
state_population_data.dropna(inplace=True)
state_population_data.head()

In [None]:
population_2010 = state_population_data.query("year == 2010 & ages == 'total'")
population_2010.head()

In [None]:
population_2010.set_index('state', inplace=True)
population_2010_density = population_2010['population'] / population_2010['area (sq. mi)']

In [None]:
population_2010_density.sort_values(ascending=False, inplace=True)
population_2010_density.head()

In [None]:
population_2010_density.tail()