# Final project

# Generating data

The presentation phase has generated the `areas.json` and `vehicles.json` files,
containing the "L" train stations and the total number of vehicles in each area,
respectively.

In [1]:
from os import makedirs

from colorama import Fore, Style
from pandas import DataFrame
from us.states import IL

from _lib import FRAME_COLUMNS, ACS_DIR, get_census, load_areas

SELECTED_YEARS = [2009, 2013, 2018, 2023]
SELECTED_TABLE = {'B08301_002E': 'Population with personal vehicle'}


def fetch_tracts(tracts, call_client, year, columns):
    frame = \
        DataFrame(
            call_client(get_census()).get(
                list(columns.keys()),
                geo={'for': 'tract:*', 'in': f'state:{IL.fips} county:031'},
                year=year,
            ),
        ).rename(columns=columns)
    return frame[frame['tract'].isin(tracts)] \
        [FRAME_COLUMNS + list(columns.values())].copy()


print(f'{Fore.YELLOW}Fetching ACS 5-Year Estimates...{Style.RESET_ALL}')
makedirs(ACS_DIR, exist_ok=True)

for area in load_areas():
    print(f'{Style.BRIGHT}{area.area}{Style.RESET_ALL}{'.' * (25 - len(area.area))} ', end='')
    for i, year in enumerate(SELECTED_YEARS):
        fetch_tracts(
            area.tracts,
            lambda census: census.acs5,
            year,
            SELECTED_TABLE,
        ).to_csv(f'{ACS_DIR}/{area.area.lower().replace(' ', '_')}_{year}.csv', index=False)
        if i < 3:
            print(f'{(i + 1) * 25}%... ', end='')
        else:
            print('100%')
print(f'{Fore.GREEN}Done.{Style.RESET_ALL}')

[33mFetching ACS 5-Year Estimates...[0m
[1mRogers Park[0m.............. 25%... 50%... 75%... 100%
[1mWest Ridge[0m............... 25%... 50%... 75%... 100%
[1mUptown[0m................... 25%... 50%... 75%... 100%
[1mLincoln Square[0m........... 25%... 50%... 75%... 100%
[1mNorth Center[0m............. 25%... 50%... 75%... 100%
[1mLake View[0m................ 25%... 50%... 75%... 100%
[1mLincoln Park[0m............. 25%... 50%... 75%... 100%
[1mNear North Side[0m.......... 25%... 50%... 75%... 100%
[1mEdison Park[0m.............. 25%... 50%... 75%... 100%
[1mNorwood Park[0m............. 25%... 50%... 75%... 100%
[1mJefferson Park[0m........... 25%... 50%... 75%... 100%
[1mForest Glen[0m.............. 25%... 50%... 75%... 100%
[1mNorth Park[0m............... 25%... 50%... 75%... 100%
[1mAlbany Park[0m.............. 25%... 50%... 75%... 100%
[1mPortage Park[0m............. 25%... 50%... 75%... 100%
[1mIrving Park[0m.............. 25%... 50%... 75%... 100

In [3]:
from os import makedirs

from colorama import Fore, Style
from pandas import DataFrame
from us.states import IL

from _lib import FRAME_COLUMNS, DECENNIAL_DIR, get_census, load_areas

SELECTED_TABLE1 = {'P001001': 'Total population 2010'}
SELECTED_TABLE2 = {'P1_001N': 'Total population 2020'}


def fetch_tracts(tracts, call_client, year, columns):
    frame = \
        DataFrame(
            call_client(get_census()).get(
                list(columns.keys()),
                geo={'for': 'tract:*', 'in': f'state:{IL.fips} county:031'},
                year=year,
            ),
        ).rename(columns=columns)
    return frame[frame['tract'].isin(tracts)] \
        [FRAME_COLUMNS + list(columns.values())].copy()


print(f'{Fore.YELLOW}Fetching Decennial Data...{Style.RESET_ALL}')
makedirs(DECENNIAL_DIR, exist_ok=True)

for area in load_areas():
    print(f'{Style.BRIGHT}{area.area}{Style.RESET_ALL}{'.' * (25 - len(area.area))} ', end='')
    fetch_tracts(
        area.tracts,
        lambda census: census.sf1,
        2010,
        SELECTED_TABLE1,
    ).to_csv(f'{DECENNIAL_DIR}/{area.area.lower().replace(' ', '_')}_2010.csv', index=False)
    print('50%... ', end='')
    fetch_tracts(
        area.tracts,
        lambda census: census.pl,
        2020,
        SELECTED_TABLE2,
    ).to_csv(f'{DECENNIAL_DIR}/{area.area.lower().replace(' ', '_')}_2020.csv', index=False)
    print('100%')
print(f'{Fore.GREEN}Done.{Style.RESET_ALL}')

[33mFetching Decennial Data...[0m
[1mRogers Park[0m.............. 50%... 100%
[1mWest Ridge[0m............... 50%... 100%
[1mUptown[0m................... 50%... 100%
[1mLincoln Square[0m........... 50%... 100%
[1mNorth Center[0m............. 50%... 100%
[1mLake View[0m................ 50%... 100%
[1mLincoln Park[0m............. 50%... 100%
[1mNear North Side[0m.......... 50%... 100%
[1mEdison Park[0m.............. 50%... 100%
[1mNorwood Park[0m............. 50%... 100%
[1mJefferson Park[0m........... 50%... 100%
[1mForest Glen[0m.............. 50%... 100%
[1mNorth Park[0m............... 50%... 100%
[1mAlbany Park[0m.............. 50%... 100%
[1mPortage Park[0m............. 50%... 100%
[1mIrving Park[0m.............. 50%... 100%
[1mDunning[0m.................. 50%... 100%
[1mMontclare[0m................ 50%... 100%
[1mBelmont Cragin[0m........... 50%... 100%
[1mHermosa[0m.................. 50%... 100%
[1mAvondale[0m................. 50%... 100

# Printing statistics

Compare vehicle ownership with population, the information are tabulated in the report.

In [1]:
import re
from glob import glob

from pandas import read_csv, DataFrame, concat

from _lib import DECENNIAL_DIR, YEAR_KEY, create_tract_lookup_table, REGION_KEY, ACS_DIR

TOTAL_POPULATION_KEY: str = 'Total population'
VEHICLE_OWNERSHIP_KEY: str = 'Vehicle ownership'

VEHICLE_COL = 'Population with personal vehicle'


def generic_data_loader(data_dir: str) -> list[DataFrame]:
    file_list = glob(f'{data_dir}/*.csv')
    frames = []
    pattern = re.compile(r'.*?(\d{4})\.csv')

    for file_path in file_list:
        f = read_csv(file_path)

        match = pattern.search(file_path)
        if not match: continue

        f[YEAR_KEY] = int(match.group(1))
        f['tract'] = f['tract'].astype(str).str.zfill(6)
        frames.append(
            f.merge(create_tract_lookup_table(), on='tract', how='left') \
                .dropna(subset=[REGION_KEY]),
        )
    return frames


def load_decennial() -> DataFrame:
    frames = []
    for f in generic_data_loader(DECENNIAL_DIR):
        if f.empty:
            continue
        f.rename(
            columns={
                next(
                    (col for col in f.columns if
                     'population' in col.lower() and str(f[YEAR_KEY].iloc[0]) in col),
                    None,
                ): TOTAL_POPULATION_KEY,
            },
            inplace=True,
        )
        f[TOTAL_POPULATION_KEY] = f[TOTAL_POPULATION_KEY].astype(float)
        frames.append(f[[REGION_KEY, YEAR_KEY, TOTAL_POPULATION_KEY]])
    return DataFrame(columns=[REGION_KEY, YEAR_KEY, TOTAL_POPULATION_KEY]) \
        if not frames \
        else concat(frames, ignore_index=True) \
        .groupby([REGION_KEY, YEAR_KEY])[TOTAL_POPULATION_KEY] \
        .sum() \
        .reset_index()


def load_acs() -> DataFrame:
    frames = []
    for f in generic_data_loader(ACS_DIR):
        if f.empty:
            continue
        f.rename(columns={VEHICLE_COL: VEHICLE_OWNERSHIP_KEY}, inplace=True)
        f[VEHICLE_OWNERSHIP_KEY] = f[VEHICLE_OWNERSHIP_KEY].astype(float)
        frames.append(f[[REGION_KEY, YEAR_KEY, VEHICLE_OWNERSHIP_KEY]])
    return DataFrame(columns=[REGION_KEY, YEAR_KEY, VEHICLE_OWNERSHIP_KEY]) \
        if not frames \
        else concat(frames, ignore_index=True) \
        .groupby([REGION_KEY, YEAR_KEY])[VEHICLE_OWNERSHIP_KEY] \
        .sum() \
        .reset_index()


frame = \
    load_decennial().merge(
        load_acs(),
        on=[REGION_KEY, YEAR_KEY],
        how='outer',
    ).sort_values(by=[REGION_KEY, YEAR_KEY])
frame[TOTAL_POPULATION_KEY] = frame[TOTAL_POPULATION_KEY].fillna(0).astype(int)
frame[VEHICLE_OWNERSHIP_KEY] = frame[VEHICLE_OWNERSHIP_KEY].fillna(0).astype(int)
print(frame.to_string(index=False))

            Region  Year  Total population  Vehicle ownership
           Central  2009                 0              12355
           Central  2010             86830                  0
           Central  2013                 0              16749
           Central  2018                 0              17516
           Central  2020            149977                  0
           Central  2023                 0              26517
    Far North Side  2009                 0              97646
    Far North Side  2010            487023                  0
    Far North Side  2013                 0             153773
    Far North Side  2018                 0             157462
    Far North Side  2020            496376                  0
    Far North Side  2023                 0             147586
Far Southeast Side  2009                 0              43904
Far Southeast Side  2010            197740                  0
Far Southeast Side  2013                 0              44811
Far Sout

In [12]:
from collections import Counter

from colorama import Fore, Style

from _lib import load_areas

station_counts = Counter()
for area in load_areas():
    station_count = len(area.stations) if area.stations else 0
    if area.region:
        station_counts[area.region] += station_count

print(f'{Fore.YELLOW}Station counts...{Style.RESET_ALL}')
[
    print(f'{region:18s}: {Style.BRIGHT}{count}{Style.RESET_ALL} ')
    for region, count in station_counts.most_common()
]
print(f'{Fore.GREEN}Done.{Style.RESET_ALL}')

[33mStation counts...[0m
West Side         : [1m34[0m 
Central           : [1m26[0m 
Far North Side    : [1m22[0m 
North Side        : [1m15[0m 
South Side        : [1m14[0m 
Southwest Side    : [1m8[0m 
Northwest Side    : [1m3[0m 
Far Southeast Side: [1m3[0m 
Far Southwest Side: [1m0[0m 
[32mDone.[0m
