# Commuting to Work Script
Run all of these code blocks in order to create the COMMUTING_TO_WORK data table.

### Import Statements

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [14]:
def commuting(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Keep only full population stats (excluded mean travel time to work)
    df = df.iloc[:8]

    # Move Total Workers 16+ from row to column
    df['Total Workers 16+'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Workers 16+']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Workers 16+'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Calculate extra columns
    df['Total Workers 16+'] = df.sum(axis=1)

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Car, truck, or van - drove alone':'drove_alone', 'Car, truck, or van - carpooled':'carpooled', 'Public transportation (excluding taxicab)':'pub_trans', 'Walked':'walked', 'Other means':'other', 'Total Workers 16+':'total_workers_16_plus', 'Worked at home':'wah', 'Worked from home':'wfh1'}, inplace=True)

    return df

### Test that function works

In [15]:
commuting('data/2014/COMMUTING_TO_WORK.CSV',2014)

Label,drove_alone,carpooled,pub_trans,walked,other,wah,total_workers_16_plus
2014,28027.818,5614.336,6184.542,2017.652,921.102,1096.55,43862.0


### Test that function works for each year
Should return only "No file for year '2020'"

In [16]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/COMMUTING_TO_WORK.CSV'

    try:
        commuting(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [17]:
COMMUTING_TO_WORK_2022 = commuting('data/2022/COMMUTING_TO_WORK.CSV',2022)
COMMUTING_TO_WORK_2021 = commuting('data/2021/COMMUTING_TO_WORK.CSV',2021)
COMMUTING_TO_WORK_2019 = commuting('data/2019/COMMUTING_TO_WORK.CSV',2019)
COMMUTING_TO_WORK_2018 = commuting('data/2018/COMMUTING_TO_WORK.CSV',2018)
COMMUTING_TO_WORK_2017 = commuting('data/2017/COMMUTING_TO_WORK.CSV',2017)
COMMUTING_TO_WORK_2016 = commuting('data/2016/COMMUTING_TO_WORK.CSV',2016)
COMMUTING_TO_WORK_2015 = commuting('data/2015/COMMUTING_TO_WORK.CSV',2015)
COMMUTING_TO_WORK_2014 = commuting('data/2014/COMMUTING_TO_WORK.CSV',2014)
COMMUTING_TO_WORK_2013 = commuting('data/2013/COMMUTING_TO_WORK.CSV',2013)
COMMUTING_TO_WORK_2012 = commuting('data/2012/COMMUTING_TO_WORK.CSV',2012)
COMMUTING_TO_WORK_2011 = commuting('data/2011/COMMUTING_TO_WORK.CSV',2011)
COMMUTING_TO_WORK_2010 = commuting('data/2010/COMMUTING_TO_WORK.CSV',2010)

### Combine each year into single dataframe containing all years

In [18]:
COMMUTING_TO_WORK = pd.concat([COMMUTING_TO_WORK_2022,COMMUTING_TO_WORK_2021,COMMUTING_TO_WORK_2019,COMMUTING_TO_WORK_2018,COMMUTING_TO_WORK_2017,COMMUTING_TO_WORK_2016,COMMUTING_TO_WORK_2015,COMMUTING_TO_WORK_2014,COMMUTING_TO_WORK_2013,COMMUTING_TO_WORK_2012,COMMUTING_TO_WORK_2011,COMMUTING_TO_WORK_2010])
COMMUTING_TO_WORK

Label,drove_alone,carpooled,pub_trans,walked,other,wfh1,total_workers_16_plus,wah
2022,27745.377,6429.41,4599.501,1582.624,1681.538,7418.55,49457.0,
2021,26519.519,5503.156,3273.429,2182.286,1138.584,8824.026,47441.0,
2019,29234.948,7271.384,8815.308,1743.14,647.452,2141.572,49853.804,
2018,31948.32,4363.2,7417.44,2278.56,1163.52,,48480.0,1308.96
2017,28833.696,5258.976,7480.44,1677.432,1133.4,,45336.0,952.056
2016,26647.74,4610.482,6344.7,1015.152,1861.112,,42298.0,1818.814
2015,29000.702,5466.586,6717.415,2640.639,880.213,,46327.0,1621.445
2014,28027.818,5614.336,6184.542,2017.652,921.102,,43862.0,1096.55
2013,26292.952,4717.008,8473.144,2664.236,960.872,,43676.0,567.788
2012,27037.912,7362.234,5553.966,2109.646,129.162,,43010.946,818.026


### Combine work from/at home into single columns

In [19]:
COMMUTING_TO_WORK['wfh'] = COMMUTING_TO_WORK['wfh1'].combine_first(COMMUTING_TO_WORK['wah'])
COMMUTING_TO_WORK = COMMUTING_TO_WORK.drop(columns=['wfh1', 'wah'])
COMMUTING_TO_WORK

Label,drove_alone,carpooled,pub_trans,walked,other,total_workers_16_plus,wfh
2022,27745.377,6429.41,4599.501,1582.624,1681.538,49457.0,7418.55
2021,26519.519,5503.156,3273.429,2182.286,1138.584,47441.0,8824.026
2019,29234.948,7271.384,8815.308,1743.14,647.452,49853.804,2141.572
2018,31948.32,4363.2,7417.44,2278.56,1163.52,48480.0,1308.96
2017,28833.696,5258.976,7480.44,1677.432,1133.4,45336.0,952.056
2016,26647.74,4610.482,6344.7,1015.152,1861.112,42298.0,1818.814
2015,29000.702,5466.586,6717.415,2640.639,880.213,46327.0,1621.445
2014,28027.818,5614.336,6184.542,2017.652,921.102,43862.0,1096.55
2013,26292.952,4717.008,8473.144,2664.236,960.872,43676.0,567.788
2012,27037.912,7362.234,5553.966,2109.646,129.162,43010.946,818.026


### Rearrange columns

In [20]:
COMMUTING_TO_WORK = COMMUTING_TO_WORK[['drove_alone','carpooled','pub_trans','walked','other','wfh','total_workers_16_plus']]
COMMUTING_TO_WORK

Label,drove_alone,carpooled,pub_trans,walked,other,wfh,total_workers_16_plus
2022,27745.377,6429.41,4599.501,1582.624,1681.538,7418.55,49457.0
2021,26519.519,5503.156,3273.429,2182.286,1138.584,8824.026,47441.0
2019,29234.948,7271.384,8815.308,1743.14,647.452,2141.572,49853.804
2018,31948.32,4363.2,7417.44,2278.56,1163.52,1308.96,48480.0
2017,28833.696,5258.976,7480.44,1677.432,1133.4,952.056,45336.0
2016,26647.74,4610.482,6344.7,1015.152,1861.112,1818.814,42298.0
2015,29000.702,5466.586,6717.415,2640.639,880.213,1621.445,46327.0
2014,28027.818,5614.336,6184.542,2017.652,921.102,1096.55,43862.0
2013,26292.952,4717.008,8473.144,2664.236,960.872,567.788,43676.0
2012,27037.912,7362.234,5553.966,2109.646,129.162,818.026,43010.946


### Reset index on combined dataframe

In [21]:
COMMUTING_TO_WORK.reset_index(inplace=True)
COMMUTING_TO_WORK.rename(columns={'index':'year'}, inplace=True)
COMMUTING_TO_WORK

Label,year,drove_alone,carpooled,pub_trans,walked,other,wfh,total_workers_16_plus
0,2022,27745.377,6429.41,4599.501,1582.624,1681.538,7418.55,49457.0
1,2021,26519.519,5503.156,3273.429,2182.286,1138.584,8824.026,47441.0
2,2019,29234.948,7271.384,8815.308,1743.14,647.452,2141.572,49853.804
3,2018,31948.32,4363.2,7417.44,2278.56,1163.52,1308.96,48480.0
4,2017,28833.696,5258.976,7480.44,1677.432,1133.4,952.056,45336.0
5,2016,26647.74,4610.482,6344.7,1015.152,1861.112,1818.814,42298.0
6,2015,29000.702,5466.586,6717.415,2640.639,880.213,1621.445,46327.0
7,2014,28027.818,5614.336,6184.542,2017.652,921.102,1096.55,43862.0
8,2013,26292.952,4717.008,8473.144,2664.236,960.872,567.788,43676.0
9,2012,27037.912,7362.234,5553.966,2109.646,129.162,818.026,43010.946


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [22]:
COMMUTING_TO_WORK.to_csv('data/final/COMMUTING_TO_WORK.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [23]:
df = pd.read_csv('data/final/COMMUTING_TO_WORK.CSV')
df

Unnamed: 0,year,drove_alone,carpooled,pub_trans,walked,other,wfh,total_workers_16_plus
0,2022,27745.377,6429.41,4599.501,1582.624,1681.538,7418.55,49457.0
1,2021,26519.519,5503.156,3273.429,2182.286,1138.584,8824.026,47441.0
2,2019,29234.948,7271.384,8815.308,1743.14,647.452,2141.572,49853.804
3,2018,31948.32,4363.2,7417.44,2278.56,1163.52,1308.96,48480.0
4,2017,28833.696,5258.976,7480.44,1677.432,1133.4,952.056,45336.0
5,2016,26647.74,4610.482,6344.7,1015.152,1861.112,1818.814,42298.0
6,2015,29000.702,5466.586,6717.415,2640.639,880.213,1621.445,46327.0
7,2014,28027.818,5614.336,6184.542,2017.652,921.102,1096.55,43862.0
8,2013,26292.952,4717.008,8473.144,2664.236,960.872,567.788,43676.0
9,2012,27037.912,7362.234,5553.966,2109.646,129.162,818.026,43010.946
