In [1]:
import numpy as np
import pandas as pd
import zipfile

In [2]:
import plotly.express as px
import plotly.figure_factory as ff

---

## National Names

Get the popular baby names data from the [Social Security webpage](https://www.ssa.gov/oact/babynames/limits.html).  If you click on [National Data](https://www.ssa.gov/oact/babynames/names.zip), the file "`names.zip"` should be downloaded automatically. 

Put the downloaded file somewhere that you can access it and run the following code to put it into a dataset.

In [3]:
names_file = 'names.zip'
with zipfile.ZipFile(names_file, 'r') as z:
    # Create a list to hold all the DataFrames
    dfs = []
    # Get a list of all the files in the zip file
    files = [file for file in z.namelist() if file.endswith('.txt')]
    # Loop over each file
    for file in files:
        # Read the CSV file into a DataFrame and append it to the list
        with z.open(file) as f:
            df = pd.read_csv(f)
            df.columns = ['name','sex','count']
            df['year'] = file[3:7]
            dfs.append(df)
    # Concatenate all the DataFrames into a single DataFrame
    names = pd.concat(dfs, ignore_index=True)

In [4]:
del dfs
del df

In [7]:
names.tail()

Unnamed: 0,name,sex,count,year
2085010,Zuberi,M,5,2022
2085011,Zydn,M,5,2022
2085012,Zylon,M,5,2022
2085013,Zymeer,M,5,2022
2085014,Zymeire,M,5,2022


In [12]:
brandon = names[names["name"] == "Brandon"]
rachael = names[names["name"] == "Rachael"]
lucy = names[names["name"] == "Lucy"]

# px.line(brandon, x="year", y="count", color="sex", title="Brandon through the years")
# px.line(rachael, x="year", y="count", color="sex", title="Rachael through the years")
px.line(lucy, x="year", y="count", color="sex", title="Lucy through the years")

In [14]:
# # This is a smaller file with only the most popular names
# url = 'https://github.com/esnt/Data/raw/main/Names/popular_names.csv'
# names = pd.read_csv(url)

---

## Names by State

If you click on this link, the [State specific names data](https://www.ssa.gov/oact/babynames/state/namesbystate.zip) will be downloaded as a file called `namesbystate.zip`

In [6]:
names_file = 'namesbystate.zip'
with zipfile.ZipFile(names_file, 'r') as z:
    # Create a list to hold all the DataFrames
    dfs = []
    # Get a list of all the files in the zip file
    files = [file for file in z.namelist() if file.endswith('.TXT')]
    # Loop over each file
    for file in files:
        # Read the CSV file into a DataFrame and append it to the list
        with z.open(file) as f:
            df = pd.read_csv(f)
            df.columns = ['state','sex','year','name','count']
            dfs.append(df)
    # Concatenate all the DataFrames into a single DataFrame
    state_names = pd.concat(dfs, ignore_index=True)

In [22]:
state_names.tail()

Unnamed: 0,state,sex,year,name,count
6407985,WY,M,2022,Lane,5
6407986,WY,M,2022,Michael,5
6407987,WY,M,2022,Nicholas,5
6407988,WY,M,2022,River,5
6407989,WY,M,2022,Silas,5


In [13]:
brandon = state_names[(state_names["name"] == "Brandon") & (state_names["state"] == "UT")]
rachael = state_names[(state_names["name"] == "Rachael") & (state_names["state"] == "UT")]
lucy = state_names[(state_names["name"] == "Lucy") & (state_names["state"] == "UT")]

# px.line(brandon, x="year", y="count", color="sex", title="Brandon through the years in UT", hover_data="state")
# px.line(rachael, x="year", y="count", color="sex", title="Rachael through the years in UT", hover_data="state")
px.line(lucy, x="year", y="count", color="sex", title="Lucy through the years in UT", hover_data="state")