# Place of Birth, Citizenship Status, and Year of Entry Script
Run all of these code blocks in order to create the PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY data table.

### Import Statements

In [63]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [64]:
def read_file_details(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Keep only full population stats
    df.drop([1, 2, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15], inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Margin of Error'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Native':'native', 'Foreign born; naturalized U.S. citizen':'foreign_born_naturalized', 'Foreign born; not a U.S. citizen':'foreign_born_noncitizen', 'Population born outside the United States':'total_foreign_born'}, inplace=True)

    # Change data types to integers
    df['native'] = df['native'].apply(lambda x: int(x.replace(',','')))
    df['foreign_born_naturalized'] = df['foreign_born_naturalized'].apply(lambda x: int(x.replace(',','')))
    df['foreign_born_noncitizen'] = df['foreign_born_noncitizen'].apply(lambda x: int(x.replace(',','')))
    df['total_foreign_born'] = df['total_foreign_born'].apply(lambda x: int(x.replace(',','')))

    return df

### Test that function works

In [65]:
read_file_details('data/2014/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2014)

Label,native,foreign_born_naturalized,foreign_born_noncitizen,total_foreign_born
2014,31183,35542,12883,48425


### Test that function works for each year
Should return only "No file for year '2020'"

In [66]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV'

    try:
        read_file_details(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [67]:
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2022 = read_file_details('data/2022/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2022)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2021 = read_file_details('data/2021/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2021)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2019 = read_file_details('data/2019/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2019)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2018 = read_file_details('data/2018/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2018)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2017 = read_file_details('data/2017/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2017)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2016 = read_file_details('data/2016/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2016)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2015 = read_file_details('data/2015/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2015)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2014 = read_file_details('data/2014/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2014)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2013 = read_file_details('data/2013/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2013)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2012 = read_file_details('data/2012/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2012)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2011 = read_file_details('data/2011/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2011)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2010 = read_file_details('data/2010/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV',2010)

### Combine each year into single dataframe containing all years

In [68]:
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY = pd.concat([PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2022,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2021,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2019,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2018,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2017,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2016,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2015,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2014,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2013,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2012,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2011,PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY_2010])
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY

Label,native,foreign_born_naturalized,foreign_born_noncitizen,total_foreign_born
2022,40028,39005,11533,50538
2021,38668,38575,11167,49742
2019,38860,37633,14111,51744
2018,34992,38154,15716,53870
2017,33632,35849,14229,50078
2016,30603,28941,15686,44627
2015,33727,33897,14008,47905
2014,31183,35542,12883,48425
2013,30397,34591,18484,53075
2012,30785,31304,16024,47328


### Reset index on combined dataframe

In [69]:
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.reset_index(inplace=True)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.rename(columns={'index':'year'}, inplace=True)
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY

Label,year,native,foreign_born_naturalized,foreign_born_noncitizen,total_foreign_born
0,2022,40028,39005,11533,50538
1,2021,38668,38575,11167,49742
2,2019,38860,37633,14111,51744
3,2018,34992,38154,15716,53870
4,2017,33632,35849,14229,50078
5,2016,30603,28941,15686,44627
6,2015,33727,33897,14008,47905
7,2014,31183,35542,12883,48425
8,2013,30397,34591,18484,53075
9,2012,30785,31304,16024,47328


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [70]:
PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.to_csv("data/final/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV", index=False)

### Check that dataframe was saved properly as CSV

In [71]:
df = pd.read_csv('data/final/PLACE_OF_BIRTH_CITIZENSHIP_STATUS_AND_YEAR_OF_ENTRY.CSV')
df

Unnamed: 0,year,native,foreign_born_naturalized,foreign_born_noncitizen,total_foreign_born
0,2022,40028,39005,11533,50538
1,2021,38668,38575,11167,49742
2,2019,38860,37633,14111,51744
3,2018,34992,38154,15716,53870
4,2017,33632,35849,14229,50078
5,2016,30603,28941,15686,44627
6,2015,33727,33897,14008,47905
7,2014,31183,35542,12883,48425
8,2013,30397,34591,18484,53075
9,2012,30785,31304,16024,47328
