# Notebook for HW6

In [13]:
import altair as alt
import pandas as pd
import numpy as np
import re

In [14]:
building = 'https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/building_inventory.csv'

In [15]:
building_data = pd.read_csv(building)
building_data.columns


Index(['Agency Name', 'Location Name', 'Address', 'City', 'Zip code', 'County',
       'Congress Dist', 'Congressional Full Name', 'Rep Dist', 'Rep Full Name',
       'Senate Dist', 'Senator Full Name', 'Bldg Status', 'Year Acquired',
       'Year Constructed', 'Square Footage', 'Total Floors',
       'Floors Above Grade', 'Floors Below Grade', 'Usage Description',
       'Usage Description 2', 'Usage Description 3'],
      dtype='object')

In [16]:
building_data['Agency Name'].unique()

array(['Department of Natural Resources', 'Department of Corrections',
       'Department of Human Services', 'Department of Transportation',
       'Department of State Police', 'Department of Military Affairs',
       'Department of Agriculture', 'Governors State University',
       'Department of Central Management Services',
       'Illinois State University', 'Historic Preservation Agency',
       'Department of Juvenile Justice', 'Southern Illinois University',
       'Illinois Medical District Commission', 'University of Illinois',
       "Department of Veterans' Affairs", 'Chicago State University',
       'Northern Illinois University', 'Office of the Secretary of State',
       'Illinois Emergency Management Agency',
       'Western Illinois University', 'Eastern Illinois University',
       'Northeastern Illinois University',
       'Illinois Community College Board',
       'Illinois Board of Higher Education',
       'IL State Board of Education', 'Department of Revenue',


# Plot 1

In [17]:
myJekyllDir = 'C:/Users/ZhenLi/OneDrive/UIUC/2024 Fall/IS445/Lab4/Zhen3408.github.io/assets/json/'


In [18]:
# Function to sanitize file names
def sanitize_agencyname(name):
    # Replace spaces and special characters with underscores, for later process agency name
    return re.sub(r'[^\w\-\.]', '_', name)

In [19]:
unique_agencies = building_data['Agency Name'].dropna().unique()
options = []  # record <option> tags for dropdown selection box

for agency in unique_agencies:
    # Filter dataset for the current agency
    filtered_data = building_data[building_data["Agency Name"] == agency]
    
    # Generate bar chart for the current agency
    bar_chart = (
        alt.Chart(filtered_data)
        .mark_bar()
        .encode(
            x=alt.X("City:N", sort="-y", title="City"),
            y=alt.Y("count():Q", title="Building Count"),
            color=alt.Color("City:N", legend=None)
        )
        .properties(
            title=f"Building Count by City for {agency}",
            width=600,
            height=400
        )
    )
    # Save the chart as a JSON file
    sanitized_name = sanitize_agencyname(agency)
    bar_chart.properties(width='container').save(myJekyllDir + f'building_count_by_city_for_{sanitized_name}.json')
    option_tag = f'<option value="building_count_by_city_for_{sanitized_name}.json">{agency}</option>'
    options.append(option_tag)

In [20]:
# integrate option tag, print out and copy/paste to the markdown file
dropdown_html = "\n".join(options)
print(dropdown_html)

<option value="building_count_by_city_for_Department_of_Natural_Resources.json">Department of Natural Resources</option>
<option value="building_count_by_city_for_Department_of_Corrections.json">Department of Corrections</option>
<option value="building_count_by_city_for_Department_of_Human_Services.json">Department of Human Services</option>
<option value="building_count_by_city_for_Department_of_Transportation.json">Department of Transportation</option>
<option value="building_count_by_city_for_Department_of_State_Police.json">Department of State Police</option>
<option value="building_count_by_city_for_Department_of_Military_Affairs.json">Department of Military Affairs</option>
<option value="building_count_by_city_for_Department_of_Agriculture.json">Department of Agriculture</option>
<option value="building_count_by_city_for_Governors_State_University.json">Governors State University</option>
<option value="building_count_by_city_for_Department_of_Central_Management_Services.json">

# Plot 2

In [21]:
filtered_data = building_data.dropna(subset=['Agency Name', 'Year Constructed', 'Square Footage'])
filtered_data = filtered_data[(filtered_data['Year Constructed']>= 1950)]
average_square_footage = (
    filtered_data.groupby(['Agency Name', 'Year Constructed'])['Square Footage']
    .sum()
    .reset_index()
)
scatter = (
    alt.Chart(average_square_footage)
    .mark_circle(size=60)
    .encode(
        x=alt.X('Year Constructed:O', title='Year Constructed'),
        y=alt.Y('Square Footage:Q', title='Total Square Footage'),
        color=alt.Color('Agency Name:N', title='Agency'),
        size=alt.Size('Square Footage:Q', scale=alt.Scale(range=[10, 100]), title='Square Footage'),
        tooltip=['Agency Name', 'Year Constructed', 'Square Footage']
    )
    .properties(
        width=1200,
        height=600
    )
    .interactive()  # Allows zooming and panning
)

In [22]:
scatter

In [23]:
# save to /assets/json
scatter.properties(width='container').save(myJekyllDir + 'building_scatter.json')