In [1]:
import pandas as pd

In [2]:
def build_education_by_age(df):
    '''
    The build_education_by_age function is used to build a compartive bar plot of education levels by participant age
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    df_ages = df.groupby('age').count().reset_index().rename(columns={'haveKids':'Total'})
    df_ages = df_ages[['age','Total']]
    low = df[df['educationLevel'] == 'Low']
    low = low.groupby('age').count().reset_index()
    low = pd.merge(low,df_ages,how='left')
    low['educationLevel'] = low['educationLevel'] / low['Total']

    hs = df[df['educationLevel'] == 'HighSchoolOrCollege']
    hs = hs.groupby('age').count().reset_index()
    hs = pd.merge(hs,df_ages,how='left')
    hs['educationLevel'] = hs['educationLevel'] / hs['Total']

    b = df[df['educationLevel'] == 'Bachelors']
    b = b.groupby('age').count().reset_index()
    b = pd.merge(b,df_ages,how='left')
    b['educationLevel'] = b['educationLevel'] / b['Total']

    m = df[df['educationLevel'] == 'Graduate']
    m = m.groupby('age').count().reset_index()
    m = pd.merge(m,df_ages,how='left')
    m['educationLevel'] = m['educationLevel'] / m['Total']

    education_fig = go.Figure(data=[
        go.Bar(x=low["educationLevel"],y=low['age'],orientation='h',name='Low'),
        go.Bar(x=hs["educationLevel"],y=hs['age'],orientation='h',name='High School / College'),
        go.Bar(x=b["educationLevel"],y=b['age'],orientation='h', name='Bachelors'),
        go.Bar(x=m["educationLevel"],y=m['age'],orientation='h', name='Graduate')
    ])
    education_fig.update_layout(barmode='stack',height=800,title="What Level of Education Exists By Age?",xaxis_range=[0,1],   
                                bargap = 0.0, bargroupgap = 0.0,                       
                        xaxis = dict(tickvals = [0,0.2,0.4,0.6,0.8,1],ticktext = ['0%','20%','40%','60%','80%','100%'],title = 'Percentage of Population'),
                        yaxis = dict(title='Age'))
    return education_fig

In [3]:
def build_kids_pyramid(df):
    '''
    The build_kids_pyramid function is used to build a compartive bar plot of participant age by wether or not they have children
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    kids = df[df['haveKids']]
    kids['count'] = 1
    kids = kids.groupby('age').count().reset_index()
    kids['count'] = kids['count'] * -1
    # kids['age'] = kids['age'] 

    no_kids = df[~df['haveKids']]
    no_kids['count'] = 1
    no_kids = no_kids.groupby('age').count().reset_index()

    kids_fig = go.Figure(data=[
        go.Bar(name="Has Kids", x = kids['count'],y=kids['age'],orientation='h'),
        go.Bar(name="Does Not Have Kids", x=no_kids['count'], y = no_kids['age'],orientation='h')
    ])
    kids_fig.update_layout(barmode='relative',height=800,title="At What Age are They Having Kids?",xaxis_range=[-30,30],
                           bargap = 0.0, bargroupgap = 0.0, 
                           xaxis = dict(tickvals = [-30,-20,-10,0,10,20,30],ticktext = ['30','20','10','0','10','20','30'],title = 'Population'),
                           yaxis = dict(title='Age'))

    return kids_fig

In [4]:
def build_household_by_age(df):
    '''
    The build_household_by_age function is used to build a compartive bar plot of houseold size by participant age
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    df_ages = df.groupby('age').count().reset_index().rename(columns={'haveKids':'Total'})
    df_ages = df_ages[['age','Total']]

    hs = df[df['householdSize'] == 1]
    hs = hs.groupby('age').count().reset_index()
    hs = pd.merge(hs,df_ages,how='left')
    hs['householdSize'] = hs['householdSize'] / hs['Total']

    b = df[df['householdSize'] == 2]
    b = b.groupby('age').count().reset_index()
    b = pd.merge(b,df_ages,how='left')
    b['householdSize'] = b['householdSize'] / b['Total']

    m = df[df['householdSize'] == 3]
    m = m.groupby('age').count().reset_index()
    m = pd.merge(m,df_ages,how='left')
    m['householdSize'] = m['householdSize'] / m['Total']


    education_fig = go.Figure(data=[
        go.Bar(y=hs["householdSize"],x=hs['age'],orientation='v',name='1'),
        go.Bar(y=b["householdSize"],x=b['age'],orientation='v', name='2'),
        go.Bar(y=m["householdSize"],x=m['age'],orientation='v', name='3')
    ])
    education_fig.update_layout(barmode='stack',height=800,title="What Size household do people live in, By Age?", yaxis_range=[0,1],
                                bargap = 0.0, bargroupgap = 0.0,                       
                        yaxis = dict(tickvals = [0,0.2,0.4,0.6,0.8,1],ticktext = ['0%','20%','40%','60%','80%','100%'],title = 'Percentage of Population'),
                        xaxis = dict(title='Age'))
    return education_fig

In [5]:
def build_homes_scatter(df):
    '''
    The build_homes_scatter function is used to build a scatter plot to analyze apartment rent and size
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    fig = px.scatter(df,x='rentalCost',y='numberOfRooms', color='maxOccupancy ',
                     marginal_x='box',marginal_y='histogram',
                     title="Apartment Price vs Size")
    return fig

In [6]:
def build_jobs_level(df):
    '''
    the build_jobs_level function builds a histogram of the requried education level for jobs
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    fig = px.histogram(df,x='educationRequirement',
                       title='City Job Education Requirments')
    return fig
def build_jobs_salary(df):
    '''
    The build_jobs_salary function builds a histogram of the job wages
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    fig = px.histogram(df,x='hourlyRate',
                       title='Hourly Wage')
    return fig
def build_level_salary(df):
    '''
    The build_level_salary function is used to build a scatter plot of educationRequiremnet and wage for employment
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    fig = px.scatter(df,x='educationRequirement',y='hourlyRate',title='Education Level vs Hourly Rate')
    return fig

In [7]:
def build_food_cost_dist(df):
    '''
    The build food cost dist function is used to build a boxplot of restaurant and pub costs
    params:
    -------
    df: The dataframe to build the figure with
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    if 'foodCost' in df.columns:
        fig = px.box(df,x='foodCost',title='Restaurant Food Costs')
    else:
        fig = px.box(df,x='hourlyCost',title='Pub Cost')
    return fig

In [8]:
def make_pop_comparison(df,x_var,y_var):
    '''
    The make pop comparison function is used to build a heatmap comparing 2 population demographic variables
    params:
    -------
    df: The dataframe to build the figure with
    x_var: The x variable for the visualization
    y_var: The y variable for the visualization
    -------
    return:
    -------
    fig: The built figure
    -------
    '''
    df_grouped = df.copy()
    df_grouped['count'] = 1
    df_grouped = df_grouped.groupby(by=[x_var,y_var]).count().reset_index()
    df_grouped = df_grouped.pivot(index=y_var, columns=x_var,values='count')
    fig = px.imshow(df_grouped, x=df_grouped.columns, y=df_grouped.index)
    return fig

In [9]:
def read_dates():
    """
    A simple function to read the date mappings into a dictionary.
    This allows for us to only load the data files as needed and prevent 
    reloading an existing set
    Returns:
        dates - a dictionary mapping dates to a list of files the data appears in
    """
    dates_file = open("date_mappings.csv", "r")
    dates = {}
    for line in dates_file:
        line = line.strip().split(",")
    
        if line[-1] == "":
            dates[line[0]] = [line[1]]
        else:
            dates[line[0]] = [line[1], line[2]]
    dates_file.close()
    return dates

In [10]:
def build_fake_dataset():
    """
    A function to build fake datapoints for a single day. Used to fix a quirk
    in plotly's animations where if a color option doesnt appear in all frames
    datapoints may appear and disappear as these values change
    Returns:
        fake_points - a pandas dataframe of fake activity data
    """
    fake_points = pd.DataFrame({"timestamp": [datetime(2022, 3, 1, 0, 0, 0), datetime(2022, 3, 1, 0, 0, 0), datetime(2022, 3, 1, 0, 0, 0), datetime(2022, 3, 1, 0, 0, 0), datetime(2022, 3, 1, 0, 0, 0)],
                                "participantId": [-1, -2, -3, -4, -5], 
                                "currentMode": ["AtHome", "Transport", "AtRecreation", "AtResturant", "AtWork"], 
                                "hungerStatus": ["JustAte", "BecameFull", "BecomingHungry", "Hungry", "Starving"], 
                                "sleepStatus": ["Sleeping", "Awake", "PrepareToSleep", "Sleeping", "Sleeping"], 
                                "apartmentId": [-1, -1, -1, -1, -1], 
                                "availableBalance": [0, 0, 0, 0, 0], 
                                "jobId": [-1, -1, -1, -1, -1], 
                                "financialStatus": ["Stable", "Unstable", "Unknown", "Stable", "Stable"], 
                                "dailyFoodBudget": [0, 0, 0, 0, 0], 
                                "weeklyExtraBudget": [0, 0, 0, 0, 0], 
                                "x": [-6000, -6000, -6000, -6000, -6000], 
                                "y": [-6000, -6000, -6000, -6000, -6000], 
                                "time": [time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S")]})
    
    for i in range(24):
        for j in range(0, 56, 5):
            if i == 0 and j == 0:
                continue
            else:
                fake_points = pd.concat([fake_points, pd.DataFrame({"timestamp": [datetime(2022, 3, 1, i, j, 0), datetime(2022, 3, 1, i, j, 0), datetime(2022, 3, 1, i, j, 0), datetime(2022, 3, 1, i, j, 0), datetime(2022, 3, 1, i, j, 0)], 
                                                                    "participantId": [-1, -2, -3, -4, -5], 
                                                                    "currentMode": ["AtHome", "Transport", "AtRecreation", "AtResturant", "AtWork"], 
                                                                    "hungerStatus": ["JustAte", "BecameFull", "BecomingHungry", "Hungry", "Starving"], 
                                                                    "sleepStatus": ["Sleeping", "Awake", "PrepareToSleep", "Sleeping", "Sleeping"],  
                                                                    "apartmentId": [-1, -1, -1, -1, -1],  
                                                                    "availableBalance": [0, 0, 0, 0, 0], 
                                                                    "jobId": [-1, -1, -1, -1, -1], 
                                                                    "financialStatus": ["Stable", "Unstable", "Unknown", "Stable", "Stable"],
                                                                    "dailyFoodBudget": [0, 0, 0, 0, 0],
                                                                    "weeklyExtraBudget": [0, 0, 0, 0, 0], 
                                                                    "x": [-6000, -6000, -6000, -6000, -6000], 
                                                                    "y": [-6000, -6000, -6000, -6000, -6000], 
                                                                    "time": [time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S"), time(0,0,0).strftime("%H:%M:%S")]})], 
                                                                    ignore_index=True)
    return fake_points


In [11]:
def build_color_map():
    """
    A function to make a static map of colors for the population movement animations
    A way to keep things contained
    Returns:
        colors - A dictionary of statuses mapped to color for each of the four
                  main status types
    """
    colors = {
        "sleepStatus": {
            "Sleeping": "blue",
            "Awake": "red",
            "PrepareToSleep": "purple"
        },
        "hungerStatus": {
            'JustAte': "blue",
            "BecameFull": "green",
            'BecomingHungry': "yellow",
            'Hungry': "orange",
            'Starving': "red"
        },
        "financialStatus": {
            'Stable': "green", 
            'Unstable': "red",
            'Unknown': "yellow"
        },
        "currentMode": {
            'AtHome': "blue", 
            'Transport': "green",
            'AtRecreation': "purple",
            'AtRestaurant': "yellow", 
            'AtWork': "red"
        }
    }
    return colors

In [12]:
def read_status_data(files):
    """
    A function to read in the participant status logs.
    Globals:
        DATA_DIR - The directory the project is stored in
    Args:
        files - a list of file numbers to be loaded 
    Returns:
        result - a pandas dataframe of the loaded data
    """
    result = pd.read_csv(f"{PROJ_DIR}VAST-Challenge-2022/Datasets/Activity Logs/ParticipantStatusLogs{files[0]}.csv")

    if len(files) > 1:
        result = pd.concat([result, pd.read_csv(f"{PROJ_DIR}VAST-Challenge-2022/Datasets/Activity Logs/ParticipantStatusLogs{files[1]}.csv")], ignore_index=True)

    result["timestamp"] = pd.to_datetime(result["timestamp"], format="%Y-%m-%dT%H:%M:%SZ")
    return result

In [13]:
def filter_data_date(date_time):
    """
    A function to filter the loaded data by date. The function also cleans the data 
    to be more useful by making a new time column for easier animating and splitting 
    the string location data into an x and y coordinate
    Globals:
        loaded_data - The currently loaded participant status dataframe
    Args:
        date_time - A string representing the date to filter by
    Returns:
        result - A pandas dataframe of the filtered and cleaned data
    """
    result = loaded_data[loaded_data["timestamp"].dt.strftime("%Y-%m-%d") == date_time]
    result[["junk", "x", "y"]] = result.currentLocation.str.split(expand=True)
    result = result.drop(columns=["currentLocation", "junk"])
    result["time"] = result.timestamp.dt.strftime("%H:%M:%S")
    result["x"] = result.x.str.strip("(").astype(float)
    result["y"] = result.y.str.strip(")").astype(float)
    return result

def filter_data_time(date_time):
    """
    A function to further filter the filtered data by time
    Globals:
        filtered_data_day - The pandas dataframe of filterd data for the selected date
    Args:
        date_time - the time to filter by
    Returns:
        result - The pandas data frame filtered by both day and time
    """
    result = filtered_data_day[filtered_data_day["timestamp"].dt.strftime("%H:%M:%S") == date_time]
    return result

In [20]:

def make_population_animation(date_value, color_value):
    """
    A function to build the main population animation. As part of this the function
    loads the necessary data, filters it, and updates all global variables for use in
    the other movement graphs
    Globals:
        COLOR_MAPS - The static color dictionary to determine which colors to use for 
                        each status type and value
        DATES - The dictionary of date to file mappings
        FAKE_POINTS - The dictionary of fake data to fix animation bugs
        current_data - The files currently loaded
        loaded_data - The raw data read from the files noted by current_data
        current_date - The current date being visualized
        filtered_data_day - The filtered dataframe for the currently selected day
    Args:
        date_value - The selected date to filter the data by
        color_value - The column to use for the colors in the animations
    Returns: 
        temp_fig - The animated scatter plot
    """
    global current_data
    global loaded_data
    global current_date
    global filtered_data_day

    # Check what files the selected date appears in
    files = DATES[date_value]
    if not (files == current_data): # If the date is not already in the loaded data
        loaded_data = read_status_data(files) # load the new files
        current_data = files
    
    if not (date_value == current_date): # if the date has changed
        filtered_data_day = filter_data_date(date_value) # refilter the data
        current_date = date_value
    
    # add the fake points for the animation
    test_filtered = pd.concat([filtered_data_day, FAKE_POINTS], ignore_index=True)

    # build the inital scatter plot
    temp_fig = px.scatter(test_filtered, x="x", y="y", animation_frame="time", animation_group="participantId", title=f"Participant Activity Throughout {date_value}", 
                          color=color_value, color_discrete_map=COLOR_MAPS[color_value], width=.7*IMAGE_XDIM, height=.7*IMAGE_YDIM,
                          hover_name="participantId", hover_data=['currentMode', "hungerStatus", "sleepStatus", "financialStatus", "apartmentId", "jobId"])
    
    
    # Add the map of engagement to the plot
    temp_fig.add_layout_image(
        dict(
        source=app.get_asset_url("BaseMap.png"),
        xref="paper",
        yref="paper",
        x=0.04,
        y=.95,
        sizex=.925,
        sizey=.91,
        opacity=0.5,
        sizing="stretch",
        layer="below"
        )
    )
    
    # fix the axis and some other refinement
    temp_fig.update_layout({
        # "plot_bgcolor": "rgba(0,0,0,0)",
        "paper_bgcolor": "rgba(0,0,0,0)",
        "xaxis": {
            "range" : [-5100,2950],
            "visible": False,
            "showticklabels": False
         },
        "yaxis": {
            "range": [-425,  8300],
            "visible": False,
            "showticklabels": False
        }
    })

    return temp_fig

In [15]:
def make_heatmap_animation(date_value, numx, numy):
    """
    A function to build an animated heatmap representation of people moving
    throught the city for the given day
    Globals:
        current_date - The date the data is currently filtered by
        filtered_data_day - The data filtered for the selected day
    Args:
        date_value - The currently selected date
        numx - The number of xbins to use for the heatmap
        numy - The number of ybins to use for the heatmap
    Returns:
        temp_fig - The generated heat map
    """
    global current_date
    global filtered_data_day

    # If the filtered data has not been loaded wait until the processing is complete
    while(current_date != date_value):
        tm.sleep(1)

    # build the initial heatmap
    temp_fig = px.density_heatmap(filtered_data_day, x="x", y="y", animation_frame="time", title=f"Heat Map of Participants Throughout {date_value}",
                                  nbinsx=numx, nbinsy=numy, width=.875*IMAGE_XDIM, height=.875*IMAGE_YDIM)
    
    # Add the map to the background
    temp_fig.add_layout_image(
        dict(
        source=app.get_asset_url("BaseMap.png"),
        xref="paper",
        yref="paper",
        x=0.04,
        y=.95,
        sizex=.925,
        sizey=.91,
        opacity=0.3,
        sizing="stretch",
        layer="above"
        )
    )

    # Fix the axes
    temp_fig.update_layout({
        "paper_bgcolor": "rgba(0,0,0,0)",
        "xaxis": {
            "range" :[-5100,2950],
            "visible": False,
            "showticklabels": False
         },
        "yaxis": {
            "range": [-425,  8300],
            "visible": False,
            "showticklabels": False
        }
    })

    return temp_fig

In [21]:
def make_static_scatter(date_value, hour, minutes, color_value):
    global current_date
    global current_time
    global filtered_data_time
    date_changed = current_date != date_value
    temp_time = time(hour, minutes, 0).strftime("%H:%M:%S")

    while(current_date != date_value):
        tm.sleep(1)

    if(date_changed or (temp_time != current_time)):
        filtered_data_time = filter_data_time(temp_time)
        current_time = temp_time

    temp_fig = px.scatter(filtered_data_time, x="x", y="y", title=f"Participant Activity at {temp_time}",
                          color=color_value, color_discrete_map=COLOR_MAPS[color_value], width=.7*IMAGE_XDIM, height=.7*IMAGE_YDIM,
                          hover_name="participantId", hover_data=['currentMode', "hungerStatus", "sleepStatus", "financialStatus", "apartmentId", "jobId"])
    
    temp_fig.add_layout_image(
        dict(
        source=app.get_asset_url("BaseMap.png"),
        xref="paper",
        yref="paper",
        x=0.04,
        y=.95,
        sizex=.925,
        sizey=.91,
        opacity=0.5,
        sizing="stretch",
        layer="below"
        )
    )
    temp_fig.update_layout({
        "paper_bgcolor": "rgba(0,0,0,0)",
        "xaxis": {
            "range" :[-5100,2950],
            "visible": False,
            "showticklabels": False
         },
        "yaxis": {
            "range": [-425,  8300],            
            "visible": False,
            "showticklabels": False
        }
    })

    return temp_fig

In [24]:
# Don't run this here
# Copy this code into a ney .py. Then run with `python dash_2.py` in terminal
# Then visit http://127.0.0.1:8050/ in your web browser.

import plotly.express as px

from plotly.subplots import make_subplots

import plotly.graph_objects as go

from jupyter_dash import JupyterDash

from dash import Dash, html, dcc, Input, Output

import dash_bootstrap_components as dbc

import pandas as pd

import time as tm

from datetime import date, time, datetime

app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# To make our lives easier between devices
PROJ_DIR = "../iste-782_project/"

# Variables needed for the population animations to prevent constant data loading
DATES = read_dates()
FAKE_POINTS = build_fake_dataset()
COLOR_MAPS = build_color_map()
IMAGE_XDIM = 1076
IMAGE_YDIM = 1144
current_data = []
loaded_data = None
current_date = None
current_time = None
filtered_data_day = None
filtered_data_time = None

df = pd.read_csv(f'{PROJ_DIR}VAST-Challenge-2022/Datasets/Attributes/Participants.csv')
df_homes = pd.read_csv(f'{PROJ_DIR}VAST-Challenge-2022/Datasets/Attributes/Apartments.csv')
df_jobs = pd.read_csv(f'{PROJ_DIR}VAST-Challenge-2022/Datasets/Attributes/Jobs.csv')
df_pub = pd.read_csv(f'{PROJ_DIR}VAST-Challenge-2022/Datasets/Attributes/Pubs.csv')
df_res = pd.read_csv(f'{PROJ_DIR}VAST-Challenge-2022/Datasets/Attributes/Restaurants.csv')

fig = px.histogram(df, x="age", title="Age Breakdown")
kids_fig = build_kids_pyramid(df)
education_fig = build_education_by_age(df)
household_fig = build_household_by_age(df)
figo = make_subplots(rows=1,cols=3,specs=[[{'type':'domain'},{'type':'domain'},{'type':'domain'}]])
education_level_overview = df.groupby('educationLevel').count().reset_index()
fig_e = px.pie(education_level_overview,values='age', names='educationLevel', title='Education Level')
education_level_overview = df.groupby('householdSize').count().reset_index()
fig_h = px.pie(education_level_overview,values='age', names='householdSize', title='Household Size')
education_level_overview = df.groupby('haveKids').count().reset_index().sort_values(by='haveKids')
fig_k = px.pie(education_level_overview,values='age', names='haveKids', title='Has Kids')


app.layout = html.Div(children=[
    dcc.Tabs(id="tabs-example-graph", value='tab-1-example-graph', children=[
        dcc.Tab(label='Introduction', value='tab-1-example-graph'),
        dcc.Tab(label='Population Demographics', value='tab-2-example-graph'),
        dcc.Tab(label='City Demographics', value='tab-3-example-graph'),
        dcc.Tab(label='Population Movement', value='tab-4-example-graph')
    ]),
    html.Div(id='tabs-content-example-graph')
])

@app.callback(Output('tabs-content-example-graph', 'children'),
              Input('tabs-example-graph', 'value'))
def render_content(tab):
    if tab == 'tab-1-example-graph':
        return html.Div([
            dbc.Card(
                dbc.CardBody([
                    html.H1('Introduction'),
                    html.Div(children='''
                        Utkarsh, Taylor Liam
                    '''),
                    html.Div(''' 
                        Dr. Bilgic
                    '''),
                    html.Div('''
                        ISTE 782
                    '''),
                    html.Div('''
                        May 1, 2023
                    '''),
                    html.H3('Introduction'),
                    html.Div('''
                        The annual IEEE Visual Analytics Science and Technology (VAST) Challenge seeks to push the field of visual analytics by providing both industry and academia the ability to test their tools on realistic tasks and datasets. The VAST challenge in 2022~\cite{vast_2022} focused on the urban development of Engagement Ohio. As per the challenge, the city of Engagement Ohio had received a large city renewal grant. To best utilize this funding, the city collected data on the daily activities of its citizens to locate the best areas for investment. Specifically, the city sought to understand 3 aspects of the city: The demographics and relationships of its residents, The daily routines of its residents, The financial health of the city. In this work, we focus solely on the first two topics. Using Plotly Dash we visualize the provided data to explore the demographics of both Engament's citizens as well as its buildings and businesses. We also use the animation functionality of Dash to display participants' movements throughout the city for a selected day.
                    '''),
                    html.H3('The Dataset'),
                    html.Div('''
                        As mentioned above, the dataset used in this project comes from the VAST 2022 challenge. The dataset contains a year's worth of information from 1000 residents of Engagement, OH gathered from March 1, 2022 to May 31, 2023 in five-minute increments. Some of the information in the dataset is synthetic to provide competitors with more data to work with. This data was collected by an app that tracked each participant's location, spending, and items purchased. The dataset consists of activity logs, journals, and attributes. The activity logs track participant locations and spending, the journals track various participant features and check their status in the project and the attributes track various demographics of the city and individuals. Within our first challenge of exploring the city and resident demographics, we used the various attribute datasets included, including jobs, restaurants \& pubs, available apartments, and participant details. For our second challenge, we used the participant activity logs to analyze the participant movement across the city. 
                    '''),
                    html.H3('Our Visualizatons'),
                    html.Div('''
                        For our first research question, we focused on the attribute datasets, analyzing and visualizing these to understand the attributes of the city and population. Firstly looking at the demographics of the population, within the datasets we can analyze participant age, education level, household size, and whether the participant has kids or not. To start analyzing this dataset we built simple histograms and pie charts, shown in Figure \ref{fig:pie} to analyze the population-level attributes. We then dug deeper into these datasets looking into the interaction between various participant attributes. Primarily we looked at the participant attributes within the ages of the participants. Building various comparative bar graphs to investigate how education level, household size, and kids are spread across the various age categories in Engagement Ohio. After investigating the population demographics of the city of Engagement, we looked into the city attributes specifically, the apartments, restaurants, and jobs. When looking at the apartments we looked at the price of apartments compared to the available rooms in the apartment also incorporating the max capacity of apartments. After looking at the housing within the city we looked towards the restaurants and pubs in the city to determine how expensive it is and how much a night out will cost. Finally looking toward employment opportunities we looked at the level of education required and earning opportunities available at various jobs throughout the city.
                    '''),
                    html.Div('''
                        To explore the daily routines of participants, we generated three visualizations. Each visual maps participants' locations onto an image of the city at various points throughout the day and across different days. For the initial mapping, we use the location information provided in the dataset. Participant locations are represented as a string containing the x and y coordinate of the participant relative to the provided city map. To graph these positions, we first needed to convert the provided strings into an actual (x, y) coordinate pair. Once converted we are able to plot the participants on a scatter plot and overlay the plot on top of the provided map. For the temporal aspect, we restrict each visualization to a single day. Due to the size of the dataset, it is prohibitively slow to load and render all the data at once. However, the data is spread across multiple status logs, each containing about 5.5 days of data. As such we only load a single file at a time and as needed to minimize the overhead of loading the data and share the cleaned data across all three visuals to avoid duplicate loads and filtering. Similarly, we filter the data to a single day to improve the rendering time of each visual. The first visual, shown in Figure \ref{fig:move}, provides an animation of participants' movements throughout the city during the selected day. This not only allows the viewer to see each participant's routine individually but also gain insights into commonalities across participants such as the busiest times of the day to travel, where participants are employed, where they go for food and/or recreation, as well as information on the city structure itself such as where the residential districts are compared to commercial zones. Participants can further be color-coded based on their Hunger, Financial, Sleep, and Location statuses to see how they change throughout the day. The second visualization is very similar but only provides a mapping of the participants' locations and statuses at a specific time during the day. This static mapping allows the viewer to more easily view the data at specific times than scrubbing through the animation. Similarly, this second mapping can also be color coded to the same statuses and is displayed next to the animation. This allows the viewer to easily explore points of interest within the animation across the other statuses without needing to change and rerun the animation. The final visual provides a density heat map of participants within the city. This heat map provides insight into the most used/traversed portions of the city throughout the day and can be altered to provide either broader or more fine-grained sectioning of the city.
                    '''),
                    html.H3('Our Findings'),
                    html.Div('''
                        Engagement OH is a unique area with a young population and a diverse education level. The majority of individuals have completed some college or a bachelor's degree, with fewer individuals having graduate or professional degrees. Most households consist of 1-2 people, and the majority of residents do not have children. Our analysis of education levels across age categories found that younger residents (20-29 years old) have a higher percentage of individuals with some college education, while older residents (30-39 years old) have a higher percentage of individuals with a bachelor's degree. This highlights the importance of education in Engagement, OH, as individuals with higher education levels tend to have smaller households and higher-paying jobs.
                    '''),
                    html.Div('''
                        Our findings suggest that most individuals tend to stay within the same general area of Engagement, OH, and visit grocery stores, restaurants, and their workplaces most frequently. It is interesting to note that grocery store visits are more common during the weekdays, while restaurant visits are more common on weekends. Additionally, we found that residents who frequent more expensive restaurants tend to have higher grocery bills and incomes. Finally, our analysis of job levels and income in Engagement, OH shows that individuals in higher education-level jobs tend to earn more on average. This highlights the importance of education and job skills in achieving financial success in this area. Overall, these findings provide insight into the unique demographics and daily routines of residents in Engagement, OH.
                    '''),
                    html.H3('Conclusion'),
                    html.Div('''
                        In conclusion, through our analysis of the attribute datasets and participant activity logs, we were able to gain insights into the demographics of the city and its residents, as well as their daily routines and behaviors. Our findings suggest that the city should focus its resources on improving infrastructure and amenities in the areas where residents spend the most time, such as grocery stores, restaurants, and workplaces, while also providing incentives for businesses to open in these areas. Additionally, the city should prioritize the development of affordable housing options for smaller households, particularly for younger residents with some college education facing job opportunities that require a higher level of education and offer higher earning potential. By leveraging these insights, the city of Engagement can make informed decisions and effectively allocate its resources to improve the lives of its residents and promote economic growth.
                    ''')
                ])
            )
        ])
    elif tab == 'tab-2-example-graph':
        return html.Div([
            dbc.Card(
                dbc.CardBody([
                    html.H1('Engagment Ohio, Population Demographics'),
                    html.H3('Overview'),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='age_breakdown',
                                    figure=fig
                                )
                            ])
                        ])
                    ]),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='overview',
                                    figure=fig_e
                                ),
                            ])
                        ],width=4),
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='overview',
                                    figure=fig_h
                                ),
                            ])
                        ],width=4),
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='overview',
                                    figure=fig_k
                                ),
                            ])
                        ],width=4),
                    ]),
                    html.H3('Demographic by Age'),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='education_level_breakdown',
                                    figure=education_fig
                                )
                            ])
                        ], width=6),
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='have_kids_age_breakdown',
                                    figure=kids_fig
                                ),
                            ])
                        ], width=6),
                    ], align='center'),
                   dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='household_size_breakdown',
                                    figure=household_fig
                                ),
                            ])
                        ]),
                    ], align='center'),
                    html.H3('Demographic comparison'),
                    dbc.Row([
                        dbc.Col([
                            html.Label([
                                    "X Variable: ", 
                                    dcc.Dropdown(
                                        options=[
                                            {"label": "Age", "value": "age"},
                                            {"label": "Has Kids", "value": "haveKids"},
                                            {"label": "Education Level", "value": "educationLevel"},
                                            {"label": "Household Size", "value": "householdSize"}
                                        ],
                                        value="age",
                                        id="pop_x_var_selection",
                                    ),
                                ],
                                style={
                                    "width": "100%"
                                }
                            )
                        ],width=6),
                        dbc.Col([
                            html.Label([
                                    "Y Variable: ", 
                                    dcc.Dropdown(
                                        options=[
                                            {"label": "Age", "value": "age"},
                                            {"label": "Has Kids", "value": "haveKids"},
                                            {"label": "Education Level", "value": "educationLevel"},
                                            {"label": "Household Size", "value": "householdSize"}
                                        ],
                                        value="haveKids",
                                        id="pop_y_var_selection",
                                    ),
                                ],
                                style={
                                    "width": "100%"
                                }
                            )
                        ],width=6)
                    ]),

                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='population_demo_comp'
                                )
                            ])
                        ])
                    ])
                ])
            )
        ])
    elif tab == 'tab-3-example-graph':
        return html.Div([
            dbc.Card(
                dbc.CardBody([
                    html.H1('Engagment Ohio, City Demographics'),
                    html.H3('Home'),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='apt_breakdown',
                                    figure=build_homes_scatter(df_homes)
                                )
                            ])
                        ])
                    ], align='center'),
                    html.H3('Restaurant & Bar'),
                    dbc.Row([
                        dbc.Col([],width=3),
                        dbc.Col([
                            html.Div([
                                dcc.Dropdown(
                                    id='food_type_dropdown',
                                    options=[
                                            {'label': 'Pubs', 'value': 'pub'},
                                            {'label': 'Restaurants', 'value': 'res'}
                                    ],
                                    value= 'res'
                                ),
                                dcc.Graph(
                                    id='food_cost',
                                    figure=build_food_cost_dist(df_pub)
                                )
                            ])
                        ], width=6),    
                    ]),
                    html.H3('Employment'),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='apt_breakdown',
                                    figure=build_jobs_salary(df_jobs)
                                )
                            ])
                        ],width=6),
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='job_level',
                                    figure=build_jobs_level(df_jobs)
                                )

                            ])
                        ],width=6),
                    ], align='center'),
                    dbc.Row([
                        dbc.Col([
                            html.Div([
                                dcc.Graph(
                                    id='job_level',
                                    figure=build_level_salary(df_jobs)
                                )
                            ])
                        ])
                    ])
                ])
            )
        ])
    elif tab == 'tab-4-example-graph':
        return html.Div([
            dbc.Card(
                dbc.CardBody([
                    html.H1('Engagment Ohio, Population Movement'),
                    html.H3(children=f"Status: Loading",
                            id="status_message"),
                    dbc.Row([
                        dbc.Col([
                            html.Label([
                                "Day to observe: ",
                                dcc.DatePickerSingle(
                                    id="day-picker",
                                    month_format="M/D/Y",
                                    placeholder="M/D/Y",
                                    date=date(2022, 3, 1),
                                )
                            ])
                        ])
                    ]),
                    dbc.Row([
                        dbc.Col([
                            html.Label([
                                "Participant Status: ", 
                                dcc.Dropdown(
                                    options=[
                                        {"label": "Finances", "value": "financialStatus"},
                                        {"label": "Hunger", "value": "hungerStatus"},
                                        {"label": "Location", "value": "currentMode"},
                                        {"label": "Sleep", "value": "sleepStatus"}
                                    ],
                                    value="sleepStatus",
                                    id="color_selector_animation",
                                )
                            ],
                            style={
                                "width": "100%"
                            }
                            ),
                        ],width=6),
                        dbc.Col([
                            html.Label([
                                "Hour: ",
                                dcc.Slider(
                                    0, 23, 1,
                                    id="hour_slider",
                                    value=0
                                ),
                                "Minutes: ",
                                dcc.Slider(
                                    0, 55, 5,
                                    id="minute_slider",
                                    value=0
                                ),
                                "Participant Status",      
                                dcc.Dropdown(
                                    options=[
                                        {"label": "Finances", "value": "financialStatus"},
                                        {"label": "Hunger", "value": "hungerStatus"},
                                        {"label": "Location", "value": "currentMode"},
                                        {"label": "Sleep", "value": "sleepStatus"}
                                    ],
                                    value="sleepStatus",
                                    id="color_selector_static"
                                )    
                            ],
                            style={
                                "width": "100%"
                            }),
                        ],width=6),
                    ]),
                    dbc.Row([
                        dbc.Col([
                            # html.Label([
                            #     "Day to observe: ",
                            #     dcc.DatePickerSingle(
                            #         id="day-picker",
                            #         month_format="M/D/Y",
                            #         placeholder="M/D/Y",
                            #         date=date(2022, 3, 1),
                            #     )
                            # ]),
                            # html.Label([
                            #     "Participant Status: ", 
                            #     dcc.Dropdown(
                            #         options=[
                            #             {"label": "Finances", "value": "financialStatus"},
                            #             {"label": "Hunger", "value": "hungerStatus"},
                            #             {"label": "Location", "value": "currentMode"},
                            #             {"label": "Sleep", "value": "sleepStatus"}
                            #         ],
                            #         value="sleepStatus",
                            #         id="color_selector_animation",
                            #     ),
                            #     html.Br(),
                            #     html.Br()
                            # ],
                            # style={
                            #     "width": "100%"
                            # }
                            # ),
                            dcc.Graph(
                                id='animation',
                            ),
                        ], width=6),
                        dbc.Col([
                            # html.Label([
                            #     "Hour: ",
                            #     dcc.Slider(
                            #         0, 23, 1,
                            #         id="hour_slider",
                            #         value=0
                            #     ),
                            #     "Minutes: ",
                            #     dcc.Slider(
                            #         0, 55, 5,
                            #         id="minute_slider",
                            #         value=0
                            #     ),
                            #     "Participant Status",      
                            #     dcc.Dropdown(
                            #         options=[
                            #             {"label": "Finances", "value": "financialStatus"},
                            #             {"label": "Hunger", "value": "hungerStatus"},
                            #             {"label": "Location", "value": "currentMode"},
                            #             {"label": "Sleep", "value": "sleepStatus"}
                            #         ],
                            #         value="sleepStatus",
                            #         id="color_selector_static"
                            #     )    
                            # ],
                            # style={
                            #     "width": "100%"
                            # }),
                            dcc.Graph(
                                id="static_frames"
                            ),
                        ], width=6)
                    ], align="center"),
                    dbc.Row([
                        dbc.Col([], width=3),
                        dbc.Col([
                            dcc.Graph(
                                id='heatmap'
                            ),
                            html.Label([
                                "The number of X partitions: ",
                                dcc.Input(id="xbins", type="number", value=100, debounce=True),                        
                                " The number of Y partitions: ",
                                dcc.Input(id="ybins", type="number", value=100, debounce=True)
                            ])
                        ], width=9, align='center')
                    ], align="center")
                ])
            )
        ])
    

@app.callback(
    Output(component_id='food_cost', component_property='figure'),
    [Input(component_id='food_type_dropdown', component_property='value')]
)    
def update_food_cost(value):
    if value == 'res':
        fig = build_food_cost_dist(df_res)
        return fig
    elif value == 'pub':
        fig = build_food_cost_dist(df_pub)
        return fig
    
@app.callback(
        Output("status_message", "children", allow_duplicate=True),
        Input("day-picker", "date"),
        prevent_initial_call=True
)
def update_status_message_loading(date_value):
    return "Status: Loading"

@app.callback(
    Output("animation", "figure"),
    Output("status_message", "children"),
    Input("day-picker", "date"),
    Input("color_selector_animation", "value")
)
def update_population_animation(date_value, color_value):
    return make_population_animation(date_value=date_value, color_value=color_value), "Status: Ready"

@app.callback(
    Output("heatmap", "figure"),
    Input("day-picker", "date"),
    Input("xbins", "value"),
    Input("ybins", "value")
)
def update_heatmap(date_value, numx, numy):
    return make_heatmap_animation(date_value=date_value, numx=numx, numy=numy)

@app.callback(
    Output("static_frames", "figure"),
    Input("day-picker", "date"),
    Input("hour_slider", "value"),
    Input("minute_slider", "value"),
    Input("color_selector_static", "value")
)
def update_static_graph(date_value, hour, minutes, color_value):
    return make_static_scatter(date_value=date_value, hour=hour, minutes=minutes, color_value=color_value)

@app.callback(
    Output("population_demo_comp","figure"),
    Input("pop_x_var_selection","value"),
    Input("pop_y_var_selection","value")
)
def update_pop_comparison(x_var,y_var):
    return make_pop_comparison(df,x_var,y_var)

app.config["suppress_callback_exceptions"]=True
app.run_server(mode="external")




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

