In [1]:
import geopandas as gpd
import plotly.graph_objects as go
import json

In [9]:
gdf = gpd.read_file("dataset/building/Building_Footprints.shp")
gdf = gdf.to_crs(epsg=4326)

gdf.head()

Unnamed: 0,FEATURECOD,DESCRIPTIO,CAPTUREYEA,CAPTUREACT,GIS_ID,OBJECTID,geometry
0,2000,Building,2015-04-24,E,BldgPly_1,1,"POLYGON ((-77.08521 38.9398, -77.0851 38.93975..."
1,2000,Building,2015-04-24,E,BldgPly_2,2,"POLYGON ((-77.07723 38.93686, -77.07715 38.936..."
2,2000,Building,2015-04-24,E,BldgPly_3,3,"POLYGON ((-77.07596 38.93971, -77.07592 38.939..."
3,2000,Building,2015-04-24,E,BldgPly_4,4,"POLYGON ((-77.07363 38.92978, -77.07363 38.929..."
4,2000,Building,2015-04-24,E,BldgPly_5,5,"POLYGON ((-77.07657 38.93682, -77.07656 38.936..."


In [11]:
simplified_gdf = gdf.simplify(tolerance=0.00001)
geojson_data = json.loads(simplified_gdf.to_json())


# with open('output.json', 'w') as f:
#     json.dump(geojson_data, f, indent=4)

In [12]:
fig = go.Figure()

print(f"starting trace {gdf.index}")

fig.add_trace(go.Choroplethmapbox(
    geojson=geojson_data,
    locations=gdf.index,
    z=gdf.index,
    colorscale=[[0, 'rgb(65, 105, 225)'], [1, 'rgb(65, 105, 225)']],
    showscale=False,
    marker={
        'opacity': 0.7,
        'line': {'width': 0.5, 'color': 'rgb(40, 40, 40)'}
    },
    hoverinfo='none'
))

print("end trace")


starting trace RangeIndex(start=0, stop=164957, step=1)
end trace


In [None]:

fig.update_layout(
    mapbox=dict(
        style='open-street-map', # 'carto-positron',
        center=dict(
            lon=-77.0369,  # DC centers coordinates
            lat=38.9072
        ),
        zoom=13
    ),
    margin=dict(l=0, r=0, t=0, b=0),
    height=800,
    paper_bgcolor='white',
    plot_bgcolor='white'
)

In [14]:
fig.show()

KeyboardInterrupt: 

In [19]:
import plotly.express as px


feature_codes = {
    2000: 'BUILDING',
    2010: 'MEMORIAL',
    2020: 'BLEACHER',
    2030: 'PARKING GARAGE',
    2040: 'FOUNTAIN',
    2050: 'CANOPY STRUCTURE'
}

building_types = gdf['FEATURECOD'].value_counts()
fig1 = px.pie(
    values=building_types.values,
    names=[feature_codes[code] for code in building_types.index],
    title='Distribution of Building Types'
)
fig1.show()

In [17]:
capture_years = gdf['CAPTUREYEA'].value_counts().sort_index()
fig2 = px.bar(
    x=capture_years.index,
    y=capture_years.values,
    title='Number of Buildings Captured by Year',
    labels={'x': 'Capture Year', 'y': 'Number of Buildings'}
)
fig2.show()


In [7]:
capture_actions = gdf['CAPTUREACT'].value_counts()
fig3 = px.bar(
    x=capture_actions.index,
    y=capture_actions.values,
    title='Distribution of Capture Actions',
    labels={'x': 'Capture Action', 'y': 'Count'},
    color=capture_actions.index
)
fig3.show()

In [27]:
import plotly.express as px
import pandas as pd

gdf_projected = gdf.to_crs(epsg=2248)
gdf_projected['area_sqm'] = gdf_projected.geometry.area

Q1 = gdf_projected['area_sqm'].quantile(0.25)
Q3 = gdf_projected['area_sqm'].quantile(0.75)
IQR = Q3 - Q1
filtered_gdf = gdf_projected[
    (gdf_projected['area_sqm'] >= Q1 - 1.5 * IQR) & 
    (gdf_projected['area_sqm'] <= Q3 + 1.5 * IQR)
]

fig4 = px.histogram(
    filtered_gdf,
    x='area_sqm',
    title='Distribution of Building Areas in Washington DC',
    labels={'area_sqm': 'Building Area (square meters)'},
    nbins=100,
    opacity=0.75,
    color_discrete_sequence=['skyblue']
)

fig4.update_layout(
    title_x=0.5,
    xaxis_title="Building Area (square meters)",
    yaxis_title="Number of Buildings",
    bargap=0.1,
    plot_bgcolor='white',
    yaxis=dict(
        gridcolor='lightgray',
        gridwidth=0.5,
    ),
    xaxis=dict(
        gridcolor='lightgray',
        gridwidth=0.5,
    ),
    showlegend=False
)

mean_area = filtered_gdf['area_sqm'].mean()
median_area = filtered_gdf['area_sqm'].median()

fig4.add_vline(x=mean_area, 
               line_dash="dash", 
               line_color="red",
               annotation_text=f"Mean: {mean_area:.0f} m²",
               annotation_position="top")

fig4.add_vline(x=median_area, 
               line_dash="dash", 
               line_color="green",
               annotation_text=f"Median: {median_area:.0f} m²",
               annotation_position="bottom")

fig4.show()

print("\nOriginal Data Statistics (square meters):")
print(f"Mean area: {gdf_projected['area_sqm'].mean():.2f}")
print(f"Median area: {gdf_projected['area_sqm'].median():.2f}")
print(f"Min area: {gdf_projected['area_sqm'].min():.2f}")
print(f"Max area: {gdf_projected['area_sqm'].max():.2f}")
print(f"Number of buildings: {len(gdf_projected)}")

print("\nFiltered Data Statistics (square meters):")
print(f"Mean area: {filtered_gdf['area_sqm'].mean():.2f}")
print(f"Median area: {filtered_gdf['area_sqm'].median():.2f}")
print(f"Min area: {filtered_gdf['area_sqm'].min():.2f}")
print(f"Max area: {filtered_gdf['area_sqm'].max():.2f}")
print(f"Number of buildings: {len(filtered_gdf)}")

bins = [0, 100, 500, 1000, 5000, 10000, float('inf')]
labels = ['<100', '100-500', '500-1000', '1000-5000', '5000-10000', '>10000']

gdf_projected['size_category'] = pd.cut(gdf_projected['area_sqm'], 
                                      bins=bins, 
                                      labels=labels, 
                                      right=False)

print("\nBuilding Size Distribution:")
size_dist = gdf_projected['size_category'].value_counts().sort_index()
for category, count in size_dist.items():
    percentage = (count / len(gdf_projected)) * 100
    print(f"{category} m²: {count} buildings ({percentage:.1f}%)")


Original Data Statistics (square meters):
Mean area: 1772.92
Median area: 900.42
Min area: 0.04
Max area: 525440.54
Number of buildings: 164957

Filtered Data Statistics (square meters):
Mean area: 906.05
Median area: 853.58
Min area: 0.04
Max area: 2455.16
Number of buildings: 150711

Building Size Distribution:
<100 m²: 4144 buildings (2.5%)
100-500 m²: 27267 buildings (16.5%)
500-1000 m²: 64498 buildings (39.1%)
1000-5000 m²: 62729 buildings (38.0%)
5000-10000 m²: 2751 buildings (1.7%)
>10000 m²: 3568 buildings (2.2%)


In [25]:
import pandas as pd
time_series = pd.crosstab(gdf['CAPTUREYEA'], gdf['CAPTUREACT'])
fig5 = px.line(
    time_series,
    title='Building Changes Over Time by Action Type',
    labels={'value': 'Number of Buildings', 'CAPTUREYEA': 'Year'}
)
fig5.show()