In [None]:
from turfpy.measurement import boolean_point_in_polygon
from geojson import Point, Polygon, Feature
import pandas as pd
from shapely.geometry import shape
import json
import geojson
def get_filepath(state):
    return '../data/{state}/'.format(state=state)
def get_filepath_city(city):
    return '../data/' + city + '-bike/'

STATES = ['dc', 'ma', 'il', 'ny', 'pa']
CITIES = ['dc', 'boston', 'chicago', 'nyc', 'philadelphia']
START_YEARS = [2010, 2011, 2013, 2013, 2016]
df_storage = {}
c = 0
for STATE, CITY in zip(STATES, CITIES):
    print(STATE)
    data = pd.read_csv(get_filepath(STATE) + f'{STATE}_preprocessed_data.csv')
    
    output_df = pd.DataFrame()

    white = []
    black = []
    asian = []
    other = []
    high_income = []
    low_income = []
    population = []
    years = range(START_YEARS[c], 2023)
    for year in years:
        temp_data = data.loc[data['year'] == year]
        # print('study area tracts', len(temp_data), CITY, year)
        bikes = temp_data['num_bikes'].to_list()
        # print('number of bike stations', sum(bikes), year)
        white_pop = temp_data['num_white'].tolist()
        black_pop = temp_data['num_black'].tolist()
        asian_pop = temp_data['num_asian'].to_list()
        other_pop = [i+j for (i, j) in zip(temp_data['num_other'].to_list(), temp_data['num_2om'].to_list())]
        pop = temp_data['households'].tolist()
        total_pop = sum(pop)
        income = temp_data['median_income'].tolist()
        avg_income = sum(income)/len(income)
        # print('income',avg_income, year, CITY)
        
        total_high = 0
        total_low = 0
        for idx, val in enumerate(income):
            if val > avg_income:
                total_high += pop[idx]
            if val < avg_income:
                total_low += pop[idx]
            
        inArea = temp_data['inServiceArea']
        total_white = sum(white_pop)
        total_black = sum(black_pop)
        total_asian = sum(asian_pop)
        total_other = sum(other_pop)
        in_white, in_black, in_asian, in_other, in_high, in_low, in_total = 0,0,0,0,0, 0, 0
        in_serv_count = 0
        for idx, val in enumerate(inArea):
            if val:
                in_serv_count+=1
                in_white += white_pop[idx]
                in_black += black_pop[idx]
                in_asian += asian_pop[idx]
                in_other += other_pop[idx]
                in_total += pop[idx]

                inc = income[idx]
                if inc > avg_income:
                    in_high += pop[idx]
                elif inc < avg_income:
                    in_low += pop[idx]

        white.append((in_white/total_white)*100)
        black.append((in_black/total_black)*100)
        asian.append((in_asian/total_asian)*100)
        other.append((in_other/total_other)*100)
        high_income.append((in_high/total_high)*100)
        low_income.append((in_low/total_low)*100)
        population.append((in_total/total_pop)*100)

    output_df['year'] = years
    output_df['total_percent'] = population
    output_df['white_percent'] = white
    output_df['black_percent'] = black
    output_df['asian_percent'] = asian
    output_df['other_percent'] = other
    output_df['high_income_percent'] = high_income
    output_df['low_income_percent'] = low_income


    df_storage[CITY] = output_df
    c+=1



In [None]:
import matplotlib.pyplot as plt


issues_1 = ['total_percent', 'white_percent', 'black_percent', 'asian_percent', 'other_percent']
colors_1 = ['black', 'red', 'blue', 'green', 'orange']
issues_2 = ['total_percent', 'high_income_percent', 'low_income_percent']
colors_2 = ['black', 'green', 'red']
labels_1 = ['Total', 'White', 'Black', 'Asian', 'Other']
labels_2 = ['Total','Above avg income', 'Below avg income']
fig, axs = plt.subplots(nrows = 2, ncols = 5, figsize = (10, 5),sharey = 'row',sharex = 'col',  dpi = 600)
yabs_max = 0
yabs_min = 100
plt.style.use('default')

for i, city in enumerate(CITIES):
    for j, issue in enumerate(issues_1):
        df = df_storage[city]
        x = df['year'].tolist()
        y = df[issue].tolist()
        c = colors_1[j]
        if j == 0: 
            style = 'dashed' 
            axs[1, i].plot(x, y, color = 'black', label = labels_1[j], linestyle = style)
        else: 
            style = 'solid'
            axs[1, i].plot(x, y, label = labels_1[j], linestyle = style)
    for j, issue in enumerate(issues_2):
        df = df_storage[city]
        x = df['year'].tolist()
        y = df[issue].tolist()
        c = colors_2[j]
        if j == 0: 
            style = 'dashed' 
            axs[0, i].plot(x, y, color = 'black', label = labels_1[j], linestyle = style)

        else: 
            style = 'solid'
            axs[0, i].plot(x, y, label = labels_2[j], linestyle = style)

    axs[0, i].set_title(city.upper(), fontsize = 13)
    axs[0, i].set_ylim(ymin=0, ymax=100) 
    axs[0, i].set_xlim(2010, 2022)
    axs[1, i].set_ylim(ymin=0, ymax=100) 
    axs[1, i].set_xlim(2010, 2022)
    axs[1, i].set_xticks([2012, 2014, 2016, 2018, 2020, 2022])
    axs[1, i].set_xticklabels([2012, 2014, 2016, 2018, 2020, 2022], rotation=90)

axs[0, i].legend(framealpha = 1, handlelength = 1)
axs[1, i].legend(framealpha = 1, handlelength = 1)
axs[1, 0].set_xticks([2010, 2012, 2014, 2016, 2018, 2020, 2022])
axs[1, 0].set_xticklabels([2010, 2012, 2014, 2016, 2018, 2020, 2022], rotation=90)
# handles, labels = axs[1, i].get_legend_handles_labels()
# fig.legend(handles, labels)  
fig.supylabel('% Population in service area', fontsize = 'x-large')
fig.supxlabel("Year", fontsize = 'x-large')
# fig.suptitle("Percents of Demographics Included in Service Area by Year", fontsize = 'xx-large')
fig.tight_layout()
fig.subplots_adjust(left=.08)
fig.subplots_adjust(right = .93)
fig.subplots_adjust(wspace=.08, hspace=.08)
