In [5]:
!pip install altair



In [14]:
import pandas as pd
import altair as alt
import numpy as np

alt.renderers.enable('default')
alt.data_transformers.disable_max_rows()

url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/building_inventory.csv"
df = pd.read_csv(url)

# Data preprocessing
df = df.dropna(subset=['Year Constructed', 'Square Footage', 'Agency Name'])

# Calculate agency statistics
agency_stats = df.groupby('Agency Name').agg({
    'Square Footage': 'sum',
    'Year Constructed': 'count'
}).reset_index()
agency_stats.columns = ['Agency Name', 'Total Square Footage', 'Number of Buildings']

# Calculate building distribution by decade
df['Decade'] = (df['Year Constructed'] // 10) * 10
decade_stats = df.groupby(['Decade', 'Agency Name']).agg({
    'Square Footage': 'sum',
    'Year Constructed': 'count'
}).reset_index()
decade_stats.columns = ['Decade', 'Agency Name', 'Square Footage', 'Building Count']


# First visualization: Building distribution by decade
base = alt.Chart(decade_stats).encode(
    x=alt.X('Decade:O',
            title='Construction Decade',
            axis=alt.Axis(format='d')),
    color=alt.Color('Agency Name:N',
                    legend=alt.Legend(title='Agency Name')),
    tooltip=[
        alt.Tooltip('Decade:O', title='Decade'),
        alt.Tooltip('Agency Name:N', title='Agency'),
        alt.Tooltip('Building Count:Q', title='Number of Buildings'),
        alt.Tooltip('Square Footage:Q', title='Total Square Footage', format=',.0f')
    ]
).properties(
    width=800,
    height=400,
    title='Distribution of Illinois State Buildings by Construction Decade'
)

# Create bar chart with legend interaction

agency_selector = alt.selection_multi(
    fields=['Agency Name'],
    bind='legend'
)

bars = base.mark_bar().encode(
    y=alt.Y('sum(Building Count):Q',
            title='Number of Buildings'),
).add_selection(agency_selector).transform_filter(agency_selector)

# Second visualization: Building Size vs Count Relationship
agency_chart = alt.Chart(agency_stats).mark_circle(opacity=0.7).encode(
    x=alt.X('Total Square Footage:Q',
            scale=alt.Scale(type='log'),
            title='Total Square Footage (log scale)'),
    y=alt.Y('Number of Buildings:Q',
            scale=alt.Scale(type='log'),
            title='Number of Buildings (log scale)'),
    size=alt.Size('Total Square Footage:Q',
                  legend=alt.Legend(title='Total Square Footage'),
                  scale=alt.Scale(range=[100, 1000])),
    color=alt.Color('Agency Name:N', legend=None),
    tooltip=[
        alt.Tooltip('Agency Name:N', title='Agency'),
        alt.Tooltip('Number of Buildings:Q', title='Number of Buildings'),
        alt.Tooltip('Total Square Footage:Q', title='Total Square Footage', format=',.0f')
    ]
).properties(
    width=800,
    height=500,
    title='Illinois State Building Size vs Count Relationship by Agency'
)

# Save as JSON files
bars.save('decade_chart.json')
agency_chart.save('size_count_scatter.json')


display(bars)
display(agency_chart)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
