In [5]:
# 07_02: Loading Baby Names

# Ensure Python version compatibility (3.6 or higher recommended)
# Import required libraries
import math
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

# Set up inline plotting for Jupyter Notebook
%matplotlib inline

# Adjust pandas display options
pd.options.display.max_rows = 8

# Import zipfile module to handle compressed data
import zipfile

# Extract the compressed file containing names data
zipfile.ZipFile('names.zip').extractall('.')

# List the files in the current directory
!ls

# List the files in the extracted 'names' directory
!ls names

# Preview the first 10 lines of the file for the year 2011
open('/content/yob2011.txt', 'r').readlines()[:10]

# Load the CSV file into a pandas DataFrame with default settings
pd.read_csv('/content/yob2011.txt')

# Load the CSV file with specified column names
pd.read_csv('/content/yob2011.txt', names=['name', 'sex', 'number'])

# Load the CSV file and add a new column "year" with a fixed value (2011)
pd.read_csv('/content/yob2011.txt', names=['name', 'sex', 'number']).assign(year=2011)

# Combine data from all years (1880-2018) into a single DataFrame
# Assigns the year dynamically based on the filename
allyears = pd.concat(
    pd.read_csv(f'names/yob{year}.txt', names=['name', 'sex', 'number']).assign(year=year)
    for year in range(1880, 2019)
)

# Display information about the combined DataFrame
allyears.info()

# Check the range of years in the dataset
allyears.year.min(), allyears.year.max()

# Save the combined DataFrame to a compressed CSV file, dropping the index
allyears.to_csv('allyears.csv.gz', index=False)


names  names.zip  sample_data  yob2011.txt
NationalReadMe.pdf  yob1899.txt  yob1919.txt  yob1939.txt  yob1959.txt	yob1979.txt  yob1999.txt
yob1880.txt	    yob1900.txt  yob1920.txt  yob1940.txt  yob1960.txt	yob1980.txt  yob2000.txt
yob1881.txt	    yob1901.txt  yob1921.txt  yob1941.txt  yob1961.txt	yob1981.txt  yob2001.txt
yob1882.txt	    yob1902.txt  yob1922.txt  yob1942.txt  yob1962.txt	yob1982.txt  yob2002.txt
yob1883.txt	    yob1903.txt  yob1923.txt  yob1943.txt  yob1963.txt	yob1983.txt  yob2003.txt
yob1884.txt	    yob1904.txt  yob1924.txt  yob1944.txt  yob1964.txt	yob1984.txt  yob2004.txt
yob1885.txt	    yob1905.txt  yob1925.txt  yob1945.txt  yob1965.txt	yob1985.txt  yob2005.txt
yob1886.txt	    yob1906.txt  yob1926.txt  yob1946.txt  yob1966.txt	yob1986.txt  yob2006.txt
yob1887.txt	    yob1907.txt  yob1927.txt  yob1947.txt  yob1967.txt	yob1987.txt  yob2007.txt
yob1888.txt	    yob1908.txt  yob1928.txt  yob1948.txt  yob1968.txt	yob1988.txt  yob2008.txt
yob1889.txt	    yob1909.txt  yob1