# Iterate over two lists simulteneously

In [2]:
ships = ["Andrea Doria", "Titanic", "Lusitania"]
cars = ["Ford Edsel", "Ford Pinto", "Yugo"]

for i, ship in enumerate(ships):
    print(ship)
    print(cars[i])
    

Andrea Doria
Ford Edsel
Titanic
Ford Pinto
Lusitania
Yugo


# Merge two lists

By iterating two lists simultaneously, items from one list can be added to their corresponding places in another list:

In [3]:
things = [["apple", "monkey"], ["orange", "dog"], ["banana", "cat"]]
print("original 'things':" + str(things) + "\n")

trees = ["cedar", "maple", "fig"]

for i, pair in enumerate(things):
    pair.append(trees[i])
print("modified 'things':" + str(things) + "\n")


original 'things':[['apple', 'monkey'], ['orange', 'dog'], ['banana', 'cat']]

modified 'things':[['apple', 'monkey', 'cedar'], ['orange', 'dog', 'maple'], ['banana', 'cat', 'fig']]



# Practice: Compact Notation for For Loops (List Comprehension) 

In [4]:
apple_prices = [100, 101, 102, 105]

apple_prices_doubled = [price*2 for price in apple_prices]
print("apple_prices_doubled is: " + str(apple_prices_doubled))

apple_prices_lowered = [price-100 for price in apple_prices]
print("apple_prices_lowered is: " + str(apple_prices_lowered))


apple_prices_doubled is: [200, 202, 204, 210]
apple_prices_lowered is: [0, 1, 2, 5]


# Prepare data for next exercises

In [5]:
#------------------------------
# Open, read, and tokenize data
#------------------------------
# Open and read data to variable. The resulting variable is simply a long string, and thus, what is indexed is just letters of this string. 
data_string = open("legislators.csv").read()

# Split this long string by the row separator character. This assigns each row of data into a list item, and thus, allows each row to be indexed.
data_splitted = data_string.split("\n")

# Tokenize each row by using column separator character. This splits each row into sub-lists, and thus, allows not only rows, but also values in these rows (i.e., columns) to be indexed.
data_tokenized = []
for item in data_splitted:
    item = item.split(",")
    data_tokenized.append(item)

# Assign the final version to data varialble. 
data = data_tokenized

#----------------------------------------------------------------------
##Extract year from YYYY-MM-DD format and add it as a column to the end
#----------------------------------------------------------------------
for row in data:
    # extract birth year from the birthday column
    birthdate_current = row[2]
    birthdate_current_splitted = birthdate_current.split("-")
    birth_year_current = birthdate_current_splitted[0]
    
    # convert birth year to integer 
    try:
        birth_year_current_converted = int(birth_year_current)
    # if a value cannot be converted, change it to '0'
    except Exception:
        birth_year_current_converted = 0
    row.append(birth_year_current_converted)
   # row[2] = birth_year_current_converted

#---------------------------------------------------------------
# Step #2: Fill missing values by extrapolation to previous year 
#---------------------------------------------------------------
data_extrapolated = data

last_value = 1
for row in data_extrapolated: 
    if row[7] == 0:
        row[7] = last_value
    last_value = row[7]
    
print("data is: \n" + str(data[0:10]) + "\n")


data is: 
[['last_name', 'first_name', 'birthday', 'gender', 'type', 'state', 'party', 1], ['Bassett', 'Richard', '1745-04-02', 'M', 'sen', 'DE', 'Anti-Administration', 1745], ['Bland', 'Theodorick', '1742-03-21', '', 'rep', 'VA', '', 1742], ['Burke', 'Aedanus', '1743-06-16', '', 'rep', 'SC', '', 1743], ['Carroll', 'Daniel', '1730-07-22', 'M', 'rep', 'MD', '', 1730], ['Clymer', 'George', '1739-03-16', 'M', 'rep', 'PA', '', 1739], ['Contee', 'Benjamin', '', 'M', 'rep', 'MD', '', 1739], ['Dalton', 'Tristram', '1738-05-28', '', 'sen', 'MA', 'Pro-Administration', 1738], ['Elmer', 'Jonathan', '1745-11-29', 'M', 'sen', 'NJ', 'Pro-Administration', 1745], ['Few', 'William', '1748-06-08', 'M', 'sen', 'GA', 'Anti-Administration', 1748]]



# Step #1: Count occurences of female names after 1940

In [13]:
name_counts = {}

for row in data:
    if (row[3] == "F") and (row[7] > 1940):
        name = row[1]
        if name in name_counts:
            name_counts[name] = name_counts[name] + 1
        else:
            name_counts[name] = 1
            
name_counts

{'Ann Marie': 1,
 'Anne': 1,
 'Barbara': 1,
 'Betsy': 1,
 'Betty': 1,
 'Blanche': 1,
 'Carolyn': 1,
 'Cynthia': 1,
 'Deborah': 2,
 'Denise': 1,
 'Ellen': 1,
 'Enid': 1,
 'Gabrielle': 1,
 'Heather': 1,
 'Hilda': 1,
 'Hillary': 1,
 'Jane': 1,
 'Jean': 1,
 'Jennifer': 1,
 'Jo Ann': 2,
 'Karen': 2,
 'Katherine': 1,
 'Kathleen': 2,
 'Kay': 1,
 'Laura': 1,
 'Lynn': 1,
 'Marilyn': 1,
 'Mary': 2,
 'Mary Jo': 1,
 'Melissa': 2,
 'Nan': 1,
 'Nancy': 1,
 'Olympia': 1,
 'Sandy': 1,
 'Shelley': 2,
 'Stephanie': 2,
 'Sue': 1,
 'Suzanne': 1,
 'Thelma': 1,
 'Virginia': 1}

# Practice: Ignore 'None' values and make a comparison 

In [14]:
values = [None, 10, 20, 30, None, 50]
checks = []

for value in values:
    if value is not None and value > 30:
        checks.append(True)
    else: 
        checks.append(False)
print(checks)

[False, False, False, False, False, True]


# Step #2: Find the most common name in a dictionary by iterating and using 'is not None'
Step 8 in Dataquest

In [30]:
max_value = None

for key in name_counts:
    count = name_counts[key]
    if max_value is None or count > max_value:
        max_value = count
print(max_value)

2


# Practice: Print both keys and values using .items method 

In [29]:
plant_types = {"orchid": "flower", "cedar": "tree", "maple": "tree"}

for key, value in plant_types.items():
    print(key)
    print(value)

orchid
flower
cedar
tree
maple
tree


# Step #3: Create a list of most common female names 

In [54]:
print(name_counts)


{'Enid': 1, 'Lynn': 1, 'Karen': 2, 'Jennifer': 1, 'Denise': 1, 'Katherine': 1, 'Melissa': 2, 'Blanche': 1, 'Cynthia': 1, 'Anne': 1, 'Shelley': 2, 'Nancy': 1, 'Hillary': 1, 'Barbara': 1, 'Jo Ann': 2, 'Thelma': 1, 'Stephanie': 2, 'Marilyn': 1, 'Deborah': 2, 'Heather': 1, 'Virginia': 1, 'Kathleen': 2, 'Mary': 2, 'Carolyn': 1, 'Mary Jo': 1, 'Suzanne': 1, 'Betsy': 1, 'Hilda': 1, 'Ellen': 1, 'Gabrielle': 1, 'Jane': 1, 'Kay': 1, 'Olympia': 1, 'Sandy': 1, 'Ann Marie': 1, 'Nan': 1, 'Sue': 1, 'Laura': 1, 'Jean': 1, 'Betty': 1}


In [60]:
top_female_names = []

for key, value in name_counts.items():
    if value == 2:
        top_female_names.append(key)
print(top_female_names)
        

['Karen', 'Melissa', 'Shelley', 'Jo Ann', 'Stephanie', 'Deborah', 'Kathleen', 'Mary']


# Step #4: Find highest male name and the number of its occurences

In [63]:
print("data is: \n" + str(data[0:3]) + "\n" + "..." + "\n")

data is: 
[['last_name', 'first_name', 'birthday', 'gender', 'type', 'state', 'party', 1], ['Bassett', 'Richard', '1745-04-02', 'M', 'sen', 'DE', 'Anti-Administration', 1745], ['Bland', 'Theodorick', '1742-03-21', '', 'rep', 'VA', '', 1742]]
...



In [110]:
male_name_counts = {}
for row in data:
    name = row[1]
    gender = row[3]
    year = row[7]
    if gender is "M" and year > 1940:
        if name not in male_name_counts:
            male_name_counts[name] = 1        
        else:
            male_name_counts[name] = male_name_counts[name] + 1

print("male_name_counts is: \n" + str(male_name_counts))

highest_male_count = 0
top_male_names = []

for key, value in male_name_counts.items():
    if value > highest_male_count:
        highest_male_count = value
    if value == highest_male_count:
        top_male_names = key
        
print("\n" + "highest_male_count is: " + str(highest_male_count))
print("\n" + "top_male_names is:  " + str(top_male_names))



male_name_counts is: 
{'Michael': 20, 'David': 24, 'James': 31, 'John': 35, 'Paul': 6, 'Joseph': 5, 'Donald': 4, 'William': 21, 'Thomas': 18, 'Anthony': 3, 'Charles': 17, 'Raymond': 2, 'Fred': 2, 'Robert': 24, 'Kenneth': 1, 'Mark': 9, 'Harold': 3, 'Daniel': 4, 'Patrick': 3, 'Richard': 9, 'George': 6, 'Peter': 9, 'Chester': 1, 'Harry': 2, 'Earl': 3, 'Edward': 3, 'Albert': 3, 'Carl': 1, 'Martin': 3, 'Lawrence': 1, 'Milton': 1, 'Samuel': 2, 'Frederick': 2, 'Timothy': 3, 'Philip': 2, 'Frank': 6, 'Victor': 3, 'Rod': 2, 'Lewis': 1, 'Walter': 3, 'Henry': 2, 'Phil': 2, 'Tim': 3, 'Tom': 2, 'Bob': 9, 'Rick': 4, 'Ron': 3, 'Steven': 3, 'Bill': 2, 'Don': 1, 'Ken': 2, 'Lincoln': 3, 'Gary': 2, 'Jon': 3, 'Greg': 1, 'Tony': 1, 'Brian': 3, 'Steve': 4, 'Dan': 2, 'Joe': 4, 'Dean': 1, 'AnÃƒ\xadbal': 1, 'Wayne': 2, 'Chris': 2, 'Max': 3, 'Brad': 2, 'Norm': 1, 'Larry': 3, 'Calvin': 1, 'Ernest': 2, 'Jonas': 1, 'Christopher': 6, 'Gerald': 2, 'Scott': 3, 'Doug': 1, 'Jack': 1, 'Gordon': 1, 'Evan': 1, 'Jeb': 1, 'E