In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import linregress

In [3]:
# Returns top names of top numNames people from year 
def getTopNames(numNames, year):
    fileName = "Names/yob" + str(year) + ".txt"
    data = pd.read_csv(fileName, names=['Name', 'Sex', 'Frequency'])
    topNames = data.head(numNames)
    return(topNames)

getTopNames(10, 1990)

Unnamed: 0,Name,Sex,Frequency
0,Jessica,F,46470
1,Ashley,F,45553
2,Brittany,F,36534
3,Amanda,F,34405
4,Samantha,F,25865
5,Sarah,F,25810
6,Stephanie,F,24859
7,Jennifer,F,22219
8,Elizabeth,F,20742
9,Lauren,F,20499


getTopNames() returns the topNames people with the highest frequency of a given year. 

In [4]:
# Create a temporary dataframe that holds frequencies for name we are looking for
# Add dataframe to a list and return the list (holds information for both female and male frequency)
def freqName(name):
    tempList = []
    for i in range(1880, 2016):
        fileName = "Names/yob" + str(i) + ".txt"
        data = pd.read_csv(fileName, names=['Name', 'Sex', 'Frequency'])
        data['Year'] = i                               # Adding year column to make results easier to read
        tempDataframe = data.loc[data['Name'] == name] # Create a dataframe holding rows with a name we are looking for
        tempList.append(tempDataframe)
           
    return tempList

freqName("Bob")

[     Name Sex  Frequency  Year
 1185  Bob   M         46  1880,      Name Sex  Frequency  Year
 1155  Bob   M         50  1881,      Name Sex  Frequency  Year
 1239  Bob   M         59  1882,      Name Sex  Frequency  Year
 1243  Bob   M         62  1883,      Name Sex  Frequency  Year
 1411  Bob   M         50  1884,      Name Sex  Frequency  Year
 1423  Bob   M         52  1885,      Name Sex  Frequency  Year
 1481  Bob   M         66  1886,      Name Sex  Frequency  Year
 1477  Bob   M         74  1887,      Name Sex  Frequency  Year
 1701  Bob   M         59  1888,      Name Sex  Frequency  Year
 1701  Bob   M         52  1889,      Name Sex  Frequency  Year
 1760  Bob   M         53  1890,      Name Sex  Frequency  Year
 1746  Bob   M         55  1891,      Name Sex  Frequency  Year
 1883  Bob   M         61  1892,      Name Sex  Frequency  Year
 1873  Bob   M         59  1893,      Name Sex  Frequency  Year
 1950  Bob   M         48  1894,      Name Sex  Frequency  Year
 2050  B

freqNames() returns a list of frequencies for each year of the indicated name. The list contains frequencies for males and females if a name was used for both sexes. 

In [5]:
# Create a temporary dataframe that holds relative frequencies for name we are looking for
# Add dataframe to a list and return the list (holds information for both female and male relative frequency)
def relativeFreqName(name):
    tempList = []
    for i in range(1880, 2016):
        fileName = "Names/yob" + str(i) + ".txt"
        data = pd.read_csv(fileName, names=['Name', 'Sex', 'Frequency'])
        sumPerYear = data['Frequency'].sum()           
        data['Year'] = i                               # Adding year column to make results easier to read
        data['Frequency'] = data['Frequency'].divide(sumPerYear, fill_value=1)
        tempDataframe = data.loc[data['Name'] == name] # Create a dataframe holding rows with a name we are looking for
        tempList.append(tempDataframe)
           
    return tempList

relativeFreqName("Bob")

[     Name Sex  Frequency  Year
 1185  Bob   M   0.000228  1880,      Name Sex  Frequency  Year
 1155  Bob   M   0.000259  1881,      Name Sex  Frequency  Year
 1239  Bob   M   0.000266  1882,      Name Sex  Frequency  Year
 1243  Bob   M   0.000286  1883,      Name Sex  Frequency  Year
 1411  Bob   M   0.000205  1884,      Name Sex  Frequency  Year
 1423  Bob   M   0.000216  1885,      Name Sex  Frequency  Year
 1481  Bob   M   0.000259  1886,      Name Sex  Frequency  Year
 1477  Bob   M   0.000299  1887,      Name Sex  Frequency  Year
 1701  Bob   M   0.000197  1888,      Name Sex  Frequency  Year
 1701  Bob   M    0.00018  1889,      Name Sex  Frequency  Year
 1760  Bob   M   0.000176  1890,      Name Sex  Frequency  Year
 1746  Bob   M   0.000192  1891,      Name Sex  Frequency  Year
 1883  Bob   M   0.000182  1892,      Name Sex  Frequency  Year
 1873  Bob   M   0.000181  1893,      Name Sex  Frequency  Year
 1950  Bob   M   0.000142  1894,      Name Sex  Frequency  Year
 2050  B

relativeFreqNames() returns a list of relative frequencies for each year of the indicated name. The lists contains relative frequencies for males and females if a name was used for both sexes. 

In [8]:
males = {}
females = {}
return_names = []

# Iterate through each file and create a temporary dataframe to hold names that are male and female
# Then go through the rows of each dataframe and store the name and relative frequency into males/females dictionaries
for i in range(1880, 2016):    
    fileName = "Names/yob" + str(i) + ".txt"
    data = pd.read_csv(fileName, names=['Name', 'Sex', 'Frequency'])
    temp = data[data.duplicated('Name', keep=False) == True] # Each temporary dataframe holds names that are 
                                                             # both female and male for each year
    
    # For each year calculate the sum and use that sum to calculate relative
    # frequency of each name each year
    sum = temp['Frequency'].sum()
    
    # Iterate through each name and store relative frequency in corresponding 
    # dictionary for each year
    for row in temp.itertuples():
        if(row[2] == 'M'):
            tempDict = {i:row[3] / sum}
            if(row[1] in males):
                males[row[1]].update(tempDict)
            else:
                males[row[1]] = tempDict
        else:
            tempDict = {i:row[3] / sum}
            if(row[1] in females):
                females[row[1]].update(tempDict)
            else:
                females[row[1]] = tempDict
                
# Iterate through males and females, looking for instances when one gender's relative frequency is greater than the other
# but then the other gender overtakes the original popular one 
for key in males:
    first_flag = True
    m_flag = False
    f_flag = False
    for year in males[key]:
        if(first_flag):
            if(males[key][year] >= females[key][year]):
                m_flag = True
                first_flag = False
            else:
                f_flag = True
                first_flag = False
        else:
            if(m_flag):
                if(males[key][year] <= females[key][year]):
                    return_names.append(key)
                    break
            elif(f_flag):
                if(males[key][year] >= females[key][year]):
                    return_names.append(key)
                    break
                    
print(return_names)                  

['Marion', 'Jessie', 'Sidney', 'Leslie', 'Alva', 'Ollie', 'Allie', 'Lynn', 'Jimmie', 'Dee', 'Merle', 'Dell', 'Fay', 'Tommie', 'Lou', 'Carey', 'Augustine', 'Clair', 'Jean', 'Shirley', 'Freddie', 'Lacy', 'Ossie', 'Theo', 'Bird', 'Alpha', 'Erie', 'Oda', 'Sammie', 'Vernie', 'Artie', 'Augusta', 'Cleo', 'Jewel', 'Johnie', 'Byrd', 'Ocie', 'Guadalupe', 'Lue', 'Odie', 'Lonie', 'Mannie', 'Dana', 'Gene', 'Cary', 'Gail', 'Olie', 'Lennie', 'Claudie', 'Donnie', 'Ottie', 'Verne', 'Elza', 'Arlie', 'Bee', 'Orrie', 'Orie', 'Jodie', 'Tracy', 'Osie', 'Golden', 'Valentine', 'Love', 'Laverne', 'Courtney', 'Rene', 'Rennie', 'Billie', 'Pleasant', 'Sydney', 'Oral', 'Hollie', 'Jule', 'Gay', 'Laurie', 'Montie', 'Patsy', 'Otha', 'Ivory', 'Laurel', 'Alvie', 'Woodie', 'Earlie', 'Lorenza', 'Dennie', 'Maxie', 'Kay', 'Santos', 'Vannie', 'Aubrey', 'Carlie', 'Mell', 'Ernie', 'Garnet', 'Odell', 'Genie', 'Unknown', 'Garnett', 'Pearley', 'Robbie', 'Avery', 'Bernie', 'Shellie', 'Arley', 'Beryl', 'Elvie', 'Linnie', 'Roe', 'W

We iterate through each file and extract the names that are used by both sexes. The names are stored in the male and female dictionaries, as well as their relative frequencies and the year for each relative frequency (example of dictionary - male = {"Name" : {Year : Frequency}}). Then we iterate through the dictionaries and compare the relative frequencies between male and female. If there is a point where one sex's frequency surpasses the other, than we indicate that as a name that has switched popularity between sexes. We add all the names that meet this condition into a list. 