# Инструменты для оперативной работы с данными




In [153]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

PATH = 'names/'

#### 1) Загрузка указанных годов и вывод ТОП-3 популярных имен:

In [154]:
def read_to_dataframe(years):
    res_df = pd.DataFrame()
    # записываем всё в один датафрэйм
    for year in years:
        filepath = PATH + 'yob' + str(year) + '.txt'
        df = pd.read_csv(filepath, names=['Name','Gender','Count'])
        res_df = res_df.append(df)
    return res_df

def count_top3(years):
    df = read_to_dataframe(years)
    pt = df.pivot_table(['Count'], ['Name', 'Gender'], aggfunc='sum')
    pt = pt.sort_values(by='Count', ascending=False)
    return pt.head(3)

In [155]:
top3 = count_top3([1900, 1920, 1930, 1940, 1890])
top3

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
Name,Gender,Unnamed: 2_level_1
Mary,F,220093
John,M,182444
Robert,M,178383


In [156]:
top3 = count_top3([1900, 1950, 2000])
top3

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
Name,Gender,Unnamed: 2_level_1
James,M,111489
John,M,109360
Robert,M,101145


#### 2) Динамика изменения количества имен за указанные года в разрезе полов:

In [157]:
def count_dynamics(years):
    res = pd.DataFrame()
    for year in years:
        df = read_to_dataframe([year])
        pt = df.pivot_table(['Count'], ['Gender'], aggfunc='sum')
        pt.insert(loc=1, column='Year', value=year)
        res = res.append(pt)
    return res

In [158]:
res = count_dynamics([1900, 1920, 1950, 1980, 2000])

res = res.sort_index()
res

Unnamed: 0_level_0,Count,Year
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,299822,1900
F,1198287,1920
F,1713450,1950
F,1660042,1980
F,1814738,2000
M,150490,1900
M,1064445,1920
M,1791474,1950
M,1784088,1980
M,1962556,2000
