In [1]:
# Load Libraries
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [5]:
file_path = 'most_visited_nps_species_data_tidy.csv'

df = pd.read_csv(file_path)

# Define food chain pyramid order
food_chain_order = {
    "Vascular Plant": 1,
    "Non-vascular Plant": 1,
    "Chromista": 1,
    "Fungi": 2,
    "Insect": 3,
    "Spider/Scorpion": 3,
    "Other Non-vertebrates": 3,
    "Fish": 4,
    "Bird": 5,
    "Mammal": 6,
}

# Map CategoryName with foodchain level
df["FoodChainLevel"] = df["CategoryName"].map(food_chain_order)

# Group CategoryName with foodchain level and sort order
def filter_data(parks):
    filtered_df = df[df['ParkName'].isin(parks)]
    aggregated_df = (
        filtered_df.groupby(['CategoryName', 'FoodChainLevel'])['References']
        .sum()
        .reset_index()
    )
    return aggregated_df

# Dash App
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("National Park Species References"),

    # Create drop down menu for Park filter
    dcc.Dropdown(
        id='park-dropdown',
        options=[{'label': park, 'value': park} for park in df['ParkName'].unique()],
        multi=True,
        value=[df['ParkName'].unique()[0]],
        clearable=False
    ),

    dcc.Graph(id='scatter-plot'),
    dcc.Graph(id='bar-chart')
])

# Scatter plot
@app.callback(
    Output('scatter-plot', 'figure'),
    Input('park-dropdown', 'value')
)
def update_scatter(selected_parks):
    data = filter_data(selected_parks)
    data = data.sort_values("FoodChainLevel")

    fig = px.scatter(
        data,
        x='FoodChainLevel',
        y='References',
        title=f"Species References in Selected Parks",
        labels={'FoodChainLevel': 'Food Chain Level', 'References': 'Total References'},
        size='References',
        size_max=60,  # Increase this value to make dots larger
        color='CategoryName',  # Add color
        hover_data=['CategoryName']
    )

    # Compute regression line
    x = data['FoodChainLevel']
    y = data['References']
    if len(x) > 1:  # Ensure enough data points for regression
        slope, intercept = np.polyfit(x, y, 1)
        regression_y = slope * x + intercept

        fig.add_trace(
            go.Scatter(
                x=x, y=regression_y,
                mode='lines',
                name='Regression Line',
                line=dict(color='red')
            )
        )

    return fig

# Bar chart
@app.callback(
    Output('bar-chart', 'figure'),
    Input('scatter-plot', 'clickData')
)
def update_bar_chart(clickData):
    if clickData is None:
        return px.bar(title="Select a Category to see Top 10 Common Names")

    # Access click data
    category_name = clickData['points'][0]['customdata'][0]
    print("selected_category: ", category_name)

    filtered_df = df[df['CategoryName'] == category_name].copy()

    # Check if CommonNames exist
    if 'CommonNames' in filtered_df.columns:
        # Extract first common name from comma-separated values
        filtered_df['PrimaryCommonName'] = filtered_df['CommonNames'].str.split(',').str[0]
    else:
        # If there is no CommonNames
        filtered_df['PrimaryCommonName'] = filtered_df['ScientificName']

    # Get top 10 Common names
    top_common_names = (filtered_df.groupby('PrimaryCommonName')['References'].sum().reset_index().nlargest(10, 'References'))

    fig = px.bar(
        top_common_names,
        x='PrimaryCommonName',
        y='References',
        title=f"Top 10 Common Names in {category_name}",
        labels={'PrimaryCommonName': 'Common Name', 'References': 'Total References'}
    )
    return fig


if __name__ == '__main__':
    app.run(port= 8080, jupyter_height=1000)   # Run app

selected_category:  Bird
