In [55]:
import geopandas as gpd
import pandas as pd
import numpy as np

import folium

## facility shape files

In [57]:
ccc = gpd.read_file('dropoutC_boundary_shape/dropoutC_boundary.shp')
ddd = gpd.read_file('dropoutD_boundary_shape/dropoutD_boundary.shp')


## census data

In [59]:
age2022 = gpd.read_file('agesub2022_census.gpkg')
age2023 = gpd.read_file('agesub2023_census.gpkg')

## Aggregation

In [61]:
gdf_merged = gpd.GeoDataFrame(pd.concat([ccc, ddd], axis=0, ignore_index=True))

state_codes = ["04", "08", "12", "13", "20", "22", "36", "41", "42", "48", "51", "55"]
filtered_gdf = gdf_merged[~gdf_merged['STATEFP'].isin(state_codes)]

In [15]:
filtered_gdf = filtered_gdf.to_crs(epsg=2163)
age2022 = age2022.to_crs(epsg=2163)
age2023 = age2023.to_crs(epsg=2163)
filtered_gdf['sd_area'] = filtered_gdf.geometry.area
age2022['div_area'] = age2022.geometry.area
age2023['div_area'] = age2023.geometry.area

In [17]:
intersection2023 = gpd.overlay(filtered_gdf, age2022, how='intersection', keep_geom_type=False)
intersection2023['overlap_area'] = intersection2023.geometry.area
intersection2023['over_pct'] = round((intersection2023['overlap_area']/intersection2023['div_area']),4)
intersection2022 = gpd.overlay(filtered_gdf, age2023, how='intersection', keep_geom_type=False)
intersection2022['overlap_area'] = intersection2022.geometry.area
intersection2022['over_pct'] = round((intersection2022['overlap_area']/intersection2022['div_area']),4)

In [19]:
import shapely
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection

def to_polygon_or_multi(geom):
    if geom.geom_type == 'Polygon' or geom.geom_type == 'MultiPolygon':
        return geom
    
    if geom.geom_type == 'GeometryCollection':
        polys = []
        for g in geom.geoms:
            if g.geom_type == 'Polygon':
                polys.append(g)
            elif g.geom_type == 'MultiPolygon':
                polys.extend(list(g.geoms))
        
        if not polys:
            return None

      
        merged = shapely.ops.unary_union(polys)
        return merged 

    
    return None

In [21]:
intersection2023['geometry'] = intersection2023['geometry'].apply(to_polygon_or_multi)
intersection2022['geometry'] = intersection2022['geometry'].apply(to_polygon_or_multi)

## median age calculation variables

In [25]:
variables = [ '0-5', '5-9', '10-14', '15-17', '18-19', '20', '21',
       '22-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59',
       '60-61', '62-64', '65-66', '67-69', '70-74', '75-79', '80-84', '85+',
       'female_0-5', 'female_5-9', 'female_10-14', 'female_15-17',
       'female_18-19', 'female_20', 'female_21', 'female_22-24',
       'female_25-29', 'female_30-34', 'female_35-39', 'female_40-44',
       'female_45-49', 'female_50-54', 'female_55-59', 'female_60-61',
       'female_62-64', 'female_65-66', 'female_67-69', 'female_70-74',
       'female_75-79', 'female_80-84', 'female_85+']
def adj_values(intersection, variables, over='over_pct'):
    for var in variables:
        adj_name = var
        intersection[adj_name] = intersection[over] * intersection[var]
    return intersection

intersection2022 = adj_values(intersection2022, variables)
intersection2023 = adj_values(intersection2023, variables)

In [31]:
constant = ['GEOID_1']
sum_vars = ['overlap_area']
aggregation_dict = {}
aggregation_dict = {var: 'first' for var in constant}
aggregation_dict.update({var: 'sum' for var in variables})
aggregation_dict.update({var: 'sum' for var in sum_vars})


In [33]:
aggregated2023 = intersection2023.dissolve(
    by="GEOID_1", 
    aggfunc=aggregation_dict
).reset_index(drop=True)

aggregated2022 = intersection2022.dissolve(
    by="GEOID_1", 
    aggfunc=aggregation_dict
).reset_index(drop=True)

In [37]:
def calculate_median(row, income_columns, income_ranges):
    cumulative_freq = row[income_columns[0]]
    cumulative_freqs = [cumulative_freq]
    
    for col in income_columns[1:]:
        cumulative_freq += row[col]
        cumulative_freqs.append(cumulative_freq)
    
    T = cumulative_freqs[-1]
    T2 = T / 2

    for i, cumulative_freq in enumerate(cumulative_freqs):
        if cumulative_freq >= T2:
            median_class = income_columns[i]
            lower_bound = income_ranges[i][1]
            upper_bound = income_ranges[i][2]
            CF_m_minus_1 = cumulative_freqs[i-1] if i > 0 else 0
            fm = row[median_class]
            W = upper_bound - lower_bound
            
            if fm == 0:
                return None
            
            median = lower_bound + (T2 - CF_m_minus_1) / fm * W
            return median
    return None

income_columns = ['0-5', '5-9', '10-14',
       '15-17', '18-19', '20', '21', '22-24', '25-29', '30-34', '35-39',
       '40-44', '45-49', '50-54', '55-59', '60-61', '62-64', '65-66', '67-69',
       '70-74', '75-79', '80-84', '85+']

income_ranges = [
    ('0-5', 0, 5),
    ('5-9', 5, 9),
    ('10-14', 10, 14),
    ('15-17', 15, 17),
    ('18-19', 18, 19),
    ('20', 20, 20),
    ('21', 21, 21),
    ('22-24', 22, 24),
    ('25-29', 25, 29),
    ('30-34', 30, 34),
    ('35-39', 35, 39),
    ('40-44', 40, 44),
    ('45-49', 45, 49),
    ('50-54', 50, 54),
    ('55-59', 55, 59),
    ('60-61', 60, 61),
    ('62-64', 62, 64),
    ('65-66', 65, 66),
    ('67-69', 67, 69),
    ('70-74', 70, 74),
    ('75-79', 75, 79),
    ('80-84', 80, 84),
    ('85+', 85, float('inf'))
]

In [39]:
aggregated2022['Median_age'] = aggregated2022.apply(calculate_median, axis=1, income_columns=income_columns, income_ranges=income_ranges)
aggregated2022['Median_age'] = aggregated2022['Median_age'].replace(float('inf'), 200000)

aggregated2023['Median_age'] = aggregated2023.apply(calculate_median, axis=1, income_columns=income_columns, income_ranges=income_ranges)
aggregated2023['Median_age'] = aggregated2023['Median_age'].replace(float('inf'), 200000)

In [43]:
age2022 = aggregated2022[['GEOID_1','Median_age']]
age2023 = aggregated2022[['GEOID_1','Median_age']]

In [45]:
age2022['Year'] = '2022'
age2023['Year'] = '2023'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age2022['Year'] = '2022'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age2023['Year'] = '2023'


In [51]:
age = pd.concat([age2022, age2023], axis =0)

## Median Calculation

In [53]:
age.to_csv("age.csv", index=False)
