In [74]:
import pandas as pd
import os

In [75]:
DATA_PATH = f'{os.path.join(os.getcwd(), "names")}'

In [76]:
def load_data(year):
    file_path = os.path.join(DATA_PATH, 'yob{}.txt'.format(year))
    with open(file_path) as f:
        names = pd.read_csv(f, names=['Name','Gender','Count'])
    return names

In [77]:
def get_merged_names(years):
    names_by_year = dict()
    
    for year in years:
        names = load_data(year)
        names_by_year[year] = names
    
    merged_names = pd.concat(names_by_year, names=['Year', 'Pos'])
    return merged_names

In [78]:
def count_top3(years):
    top_names = []    
    merged_names = get_merged_names(years)
    
    top = merged_names.groupby('Name').sum().reset_index().sort_values(by='Count', ascending=False).head(3)
    return top['Name'].tolist()

In [79]:
count_top3([1880]) == ['John', 'William', 'Mary']

True

In [80]:
count_top3([1900, 1950, 2000]) == ['James', 'John', 'Robert']

True

In [81]:
def count_dynamics(years):
    gender_dynamics = dict()
    merged_names = get_merged_names(years)
    
    slices = ['Year', 'Gender']
    top = merged_names.groupby(slices).sum().reset_index().sort_values(by=slices, ascending=True)
    
    for gender in ['F', 'M']:
        gender_dynamics[gender] = top[top.Gender.str.contains(gender)].Count.tolist()
    return gender_dynamics

In [82]:
count_dynamics([1900, 1950, 2000])

{'F': [299800, 1713151, 1815110], 'M': [150483, 1790549, 1962969]}