In [19]:
import numpy as np
import pandas as pd

# Indexing and column rearrangement
There are circumstances in which it's useful to modify the order of your DataFrame columns. We do that now by extracting just two columns from the Pennsylvania election results DataFrame.

In this excersie we will read csv file and set the index to `country` and then assign a new DataFrame by selecting the list of columns `['winner', 'total', 'voters'].` 

In [3]:
# Read in filename and set the index: election
election = pd.read_csv('pennsylvania2012_turnout.csv', index_col='county')

# Create a separate dataframe with the columns ['winner', 'total', 'voters']: results
results = election[['winner', 'total', 'voters']]

In [4]:
results.head()

Unnamed: 0_level_0,winner,total,voters
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adams,Romney,41973,61156
Allegheny,Obama,614671,924351
Armstrong,Romney,28322,42147
Beaver,Romney,80015,115157
Bedford,Romney,21444,32189


# Slicing rows
- Slice the row labels `'Perry'` to `'Potter'` and assign the output to `p_counties`.
- Print the p_counties DataFrame. This has been done for you.
- Slice the row labels `'Potter'` to `'Perry'` in reverse order. To do this for hypothetical row labels `'a'` and `'b'`, you could use a stepsize of `-1` like so: `df.loc['b':'a':-1]`.


In [7]:
# Slice the row labels 'Perry' to 'Potter': p_counties
p_counties = election.loc['Perry':'Potter']

In [8]:
p_counties

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Perry,PA,18240,29.769737,68.591009,Romney,27245,66.948064,38.821272
Philadelphia,PA,653598,85.224251,14.051451,Obama,1099197,59.461407,71.1728
Pike,PA,23164,43.904334,54.882576,Romney,41840,55.363289,10.978242
Potter,PA,7205,26.259542,72.158223,Romney,10913,66.022175,45.898681


In [9]:
# Slice the row labels 'Potter' to 'Perry' in reverse order: p_counties_rev
p_counties_rev = election.loc['Potter':'Perry':-1]

In [10]:
p_counties_rev

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Potter,PA,7205,26.259542,72.158223,Romney,10913,66.022175,45.898681
Pike,PA,23164,43.904334,54.882576,Romney,41840,55.363289,10.978242
Philadelphia,PA,653598,85.224251,14.051451,Obama,1099197,59.461407,71.1728
Perry,PA,18240,29.769737,68.591009,Romney,27245,66.948064,38.821272


# Slicing columns
- Slice the columns from the starting column to `'Obama'` and assign the result to left_columns
- Slice the columns from `'Obama'` to `'winner'` and assign the result to middle_columns
- Slice the columns from `'Romney'` to the end and assign the result to right_columns

In [11]:
# Slice the columns from the starting column to 'Obama': left_columns
left_columns = election.loc[:, :'Obama']

# Print the output of left_columns.head()
print(left_columns.head())

          state   total      Obama
county                            
Adams        PA   41973  35.482334
Allegheny    PA  614671  56.640219
Armstrong    PA   28322  30.696985
Beaver       PA   80015  46.032619
Bedford      PA   21444  22.057452


In [12]:

# Slice the columns from 'Obama' to 'winner': middle_columns
middle_columns = election.loc[:, 'Obama':'winner']

# Print the output of middle_columns.head()
print(middle_columns.head())

               Obama     Romney  winner
county                                 
Adams      35.482334  63.112001  Romney
Allegheny  56.640219  42.185820   Obama
Armstrong  30.696985  67.901278  Romney
Beaver     46.032619  52.637630  Romney
Bedford    22.057452  76.986570  Romney


In [13]:
# Slice the columns from 'Romney' to the end: 'right_columns'
right_columns = election.loc[:, 'Romney':]

# Print the output of right_columns.head()
print(right_columns.head())

              Romney  winner  voters    turnout     margin
county                                                    
Adams      63.112001  Romney   61156  68.632677  27.629667
Allegheny  42.185820   Obama  924351  66.497575  14.454399
Armstrong  67.901278  Romney   42147  67.198140  37.204293
Beaver     52.637630  Romney  115157  69.483401   6.605012
Bedford    76.986570  Romney   32189  66.619031  54.929118


# Subselecting DataFrames with lists
Lists can be used to select specific row and column labels with the `.loc[]` accessor. In this exercise, your job is to select the counties `['Philadelphia', 'Centre', 'Fulton']` and the columns `['winner','Obama','Romney']` from the election DataFrame.

- Create the list of row labels `['Philadelphia', 'Centre', 'Fulton']` and assign it to rows.
- Create the list of column labels `['winner', 'Obama', 'Romney']` and assign it to cols.
- Create a new DataFrame by selecting with rows and cols in `.loc[]` and assign it to three_counties.
- Print the three_counties DataFrame.

In [14]:
# Create the list of row labels: rows
rows = ['Philadelphia', 'Centre', 'Fulton']

# Create the list of column labels: cols
cols = ['winner', 'Obama', 'Romney']

# Create the new DataFrame: three_counties
three_counties = election.loc[rows,cols]

In [15]:
# Print the three_counties DataFrame
three_counties

Unnamed: 0_level_0,winner,Obama,Romney
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Philadelphia,Obama,85.224251,14.051451
Centre,Romney,48.948416,48.977486
Fulton,Romney,21.096291,77.748861


# Thresholding data
- Create a boolean array of the condition where the `'turnout'` column is greater than 70 and assign it to `high_turnout`.
- Filter the election DataFrame with the `high_turnout` array and assign it to `high_turnout_df`.

In [16]:
# Create the boolean array: high_turnout
high_turnout = election['turnout'] > 70

# Filter the election DataFrame with the high_turnout array: high_turnout_df
high_turnout_df = election[high_turnout]

In [18]:
# Print the high_turnout_results DataFrame
high_turnout_df

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bucks,PA,319407,49.96697,48.801686,Obama,435606,73.324748,1.165284
Butler,PA,88924,31.920516,66.816607,Romney,122762,72.436096,34.896091
Chester,PA,248295,49.228539,49.650617,Romney,337822,73.498766,0.422079
Forest,PA,2308,38.734835,59.835355,Romney,3232,71.410891,21.10052
Franklin,PA,62802,30.110506,68.583803,Romney,87406,71.850903,38.473297
Montgomery,PA,401787,56.637223,42.286834,Obama,551105,72.905708,14.35039
Westmoreland,PA,168709,37.567646,61.306154,Romney,238006,70.884347,23.738508


# Filtering columns using other columns
The election results DataFrame has a column labeled 'margin' which expresses the number of extra votes the winner received over the losing candidate. This number is given as a percentage of the total votes cast. It is reasonable to assume that in counties where this margin was less than 1%, the results would be too-close-to-call.

- Create a boolean array for the condition where the `'margin'` column is less than 1 and assign it to too_close.
- Convert the entries in the `'winner'` column where the result was too close to call to `np.nan`.
- Print the output of `election.info()`. 

In [20]:
# Create the boolean array: too_close
too_close = election['margin'] < 1

# Assign np.nan to the 'winner' column where the results were too close to call
election.loc[too_close, 'winner'] = np.nan

In [21]:
# Print the output of election.info()
election.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67 entries, Adams to York
Data columns (total 8 columns):
state      67 non-null object
total      67 non-null int64
Obama      67 non-null float64
Romney     67 non-null float64
winner     64 non-null object
voters     67 non-null int64
turnout    67 non-null float64
margin     67 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 7.2+ KB


# Using apply() to transform a column
The `.apply()` method can be used on a pandas DataFrame to apply an arbitrary Python function to every element. In this exercise you'll take daily weather data in Pittsburgh in 2013.

- Apply the to_celsius function over the `['Mean TemperatureF','Mean Dew PointF']` columns of the weather DataFrame.
- Reassign the columns of `df_celsius` to `['Mean TemperatureC','Mean Dew PointC']`.
- Print to see the new DataFrame with the converted units.


In [23]:
weather = pd.read_csv('underground_weather.csv')

In [24]:
# Write a function to convert degrees Fahrenheit to degrees Celsius: to_celsius
def to_celsius(F):
    return 5/9*(F - 32)

In [27]:
# Apply the function over 'Mean TemperatureF' and 'Mean Dew PointF': df_celsius
df_celsius = weather[['Mean TemperatureF','Mean Dew PointF']].apply(to_celsius)

In [28]:
# Reassign the columns df_celsius
df_celsius.columns = ['Mean TemperatureC', 'Mean Dew PointC']

In [29]:
df_celsius.head()

Unnamed: 0,Mean TemperatureC,Mean Dew PointC
0,-2.222222,-2.777778
1,-6.111111,-11.111111
2,-4.444444,-9.444444
3,-2.222222,-7.222222
4,-1.111111,-6.666667


# Using .map() with a dictionary
The `.map()` method is used to transform values according to a Python dictionary look-up. In this excercise we are going to use a dictionary to map the values `'Obama'` and `'Romney'` in the `'winner'` column to the values `'blue'` and `'red'`, and assign the output to the new column `'color'`.

In [30]:
# Create the dictionary: red_vs_blue
red_vs_blue = {'Obama':'blue', 'Romney':'red'}

# Use the dictionary to map the 'winner' column to the new column: election['color']
election['color'] = election['winner'].map(red_vs_blue)

In [31]:
election.head()

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin,color
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Adams,PA,41973,35.482334,63.112001,Romney,61156,68.632677,27.629667,red
Allegheny,PA,614671,56.640219,42.18582,Obama,924351,66.497575,14.454399,blue
Armstrong,PA,28322,30.696985,67.901278,Romney,42147,67.19814,37.204293,red
Beaver,PA,80015,46.032619,52.63763,Romney,115157,69.483401,6.605012,red
Bedford,PA,21444,22.057452,76.98657,Romney,32189,66.619031,54.929118,red
