# Error Handling 

#### - The Dataset

In this mission, we'll be working with legislators.csv, which records information on every historical member of the U.S. Congress. 

In [2]:
import pandas as pd
legislators = pd.read_csv("legislators.csv")

FileNotFoundError: File b'legislators.csv' does not exist

In [22]:
type(legislators)

pandas.core.frame.DataFrame

In [23]:
legislators[:5]

Unnamed: 0,last_name,first_name,birthday,gender,type,state,party
0,Bassett,Richard,1745-04-02,M,sen,DE,Anti-Administration
1,Bland,Theodorick,1742-03-21,,rep,VA,
2,Burke,Aedanus,1743-06-16,,rep,SC,
3,Carroll,Daniel,1730-07-22,M,rep,MD,
4,Clymer,George,1739-03-16,M,rep,PA,


In [24]:
# Unique values of gender column
legislators['gender'].unique()

array(['M', nan, 'F'], dtype=object)

In [25]:
# Extract the party column from legislators and convert it to a set.
party = set(legislators['party'])

In [26]:
gender = legislators['gender']

In [27]:
gender.loc[0]

'M'

In [28]:
# Fill empty rows with "M"
gender = gender.fillna(value="M")

In [29]:
birthday = legislators['birthday']
type(birthday)

pandas.core.series.Series

In [30]:
birthday = pd.to_datetime(birthday)
type(birthday)

pandas.core.series.Series

In [31]:
# birth_years will be a list containing the birth years of 
# all the legislators in legislators
birth_years = []
for x in birthday:
    birth_years.append(x.year)
    
birth_years[:5]

[1745, 1742, 1743, 1730, 1739]

In [32]:
type(birth_years)

list

#### The Pass Keyword
Write a for loop that iterates over birth_years, using the iterator variable year.

* In the try block:
    -  Convert year to an integer using the int() function and assign to int_year.
    - Append the converted year value to converted_years.
* In the except block:
    -Add a pass statement.


In [33]:
converted_years = []
for x in birth_years:
    try:
        int_year = int(year)
        converted_years.append(int_year)
    except Exception:
        pass

- Legislators should have an extra column for birth year.

In [34]:
legislators['birth_year'] = birth_years
legislators[:7]

Unnamed: 0,last_name,first_name,birthday,gender,type,state,party,birth_year
0,Bassett,Richard,1745-04-02,M,sen,DE,Anti-Administration,1745.0
1,Bland,Theodorick,1742-03-21,,rep,VA,,1742.0
2,Burke,Aedanus,1743-06-16,,rep,SC,,1743.0
3,Carroll,Daniel,1730-07-22,M,rep,MD,,1730.0
4,Clymer,George,1739-03-16,M,rep,PA,,1739.0
5,Contee,Benjamin,,M,rep,MD,,
6,Dalton,Tristram,1738-05-28,,sen,MA,Pro-Administration,1738.0


####  Fill in Years Without a Value

In [35]:
# check if  there is any null value in birth_year column
legislators['birth_year'].isnull().values.any()

True

In [36]:
# get the total number of NaN values in birth_year
legislators['birth_year'].isnull().sum().sum()

555

In [37]:
legislators['birth_year'][:7]

0    1745.0
1    1742.0
2    1743.0
3    1730.0
4    1739.0
5       NaN
6    1738.0
Name: birth_year, dtype: float64

In [38]:
# fill all NaN values with zeros
#legislators['birth_year'] = legislators['birth_year'].fillna(0)
#legislators['birth_year'][:7]

# OR

# fill all NaN values with previous row's value
legislators['birth_year'] = legislators['birth_year'].fillna(method = "pad")
legislators['birth_year'][:7]

0    1745.0
1    1742.0
2    1743.0
3    1730.0
4    1739.0
5    1739.0
6    1738.0
Name: birth_year, dtype: float64