## Setup

### Plotly Base Template

In [3]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated base template
base = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#FFF5CC',
        plot_bgcolor='#FFF5CC',
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the x-axis line
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the y-axis line
        ),
        font=dict(
            color='#333333',
            size=28,
            family='Open Sans, sans-serif'
        ),
        # Updated colorway to ensure more distinguishable colors
        colorway=["#470945", # D: Violet
                  "#E67E5A", # H: Orange (Sienna)
                  "#297FB9", # H: Blue (Steel)
                  "#163748", # D: Charcoal
                  "#4F1787", # H: Purple
                  "#EFE04E", # H: Yellow (Maize)
                  "#214F70", # D: Indigo
                  "#DF14AA", # H: Pink (Cerise)
                  "#100B1A", # D: Black
                  "#12C4CF", # H: Teal
                  "#14193D", # D: Space
                  "#CC5500"],# H: Cream
        title=go.layout.Title(
            text='',
            font=dict(
                size=34,
                color='#333333',
                family='Open Sans, sans-serif'
            ),
            x=0.05,
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=6)  # Set the line width for scatter plots
            )
        ]
    )
)

# Register the updated base template
pio.templates['base'] = base
pio.templates.default = 'base'

### rbt_squirrel

In [4]:
import pandas as pd
from IPython.display import display, HTML

# Load the datasets
squirrel_data = pd.read_csv("squirrel_data.csv")
hectare_data = pd.read_csv("hectare_data.csv")
squirrel_stories = pd.read_csv("squirrel_stories.csv")
hectare_data_processed = pd.read_csv("hectare_data_processed.csv")

# Rename 'Date' to 'date_raw' in all datasets
squirrel_data.rename(columns={'Date': 'date_raw'}, inplace=True)
hectare_data.rename(columns={'Date': 'date_raw'}, inplace=True)
squirrel_stories.rename(columns={'Date': 'date_raw'}, inplace=True)
hectare_data_processed.rename(columns={'Date': 'date_raw'}, inplace=True)

# Create a new 'Date' column in proper datetime format (rounded to day) in all datasets
squirrel_data['Date'] = pd.to_datetime(squirrel_data['date_raw'], format='%m%d%Y')
hectare_data['Date'] = pd.to_datetime(hectare_data['date_raw'], format='%m%d%Y')
squirrel_stories['Date'] = pd.to_datetime(squirrel_stories['date_raw'], format='%m%d%Y')
hectare_data_processed['Date'] = pd.to_datetime(hectare_data_processed['date_raw'], format='%m%d%Y')

# Function to convert temperature string (e.g., "58º F") to float
def convert_to_float(temp_str):
    if isinstance(temp_str, str):
        # Remove the 'º F' and convert the remaining string to float
        return float(temp_str.replace('º F', '').strip())
    return None  # For NaN or non-string values

# Add 'temp_f' column by converting 'Temperature' to a float in hectare_data_processed
hectare_data_processed['temp_f'] = hectare_data_processed['Temperature'].apply(convert_to_float)

# Add 'temp_c' column by converting Fahrenheit to Celsius and rounding to 0 decimal places
# Handle NaN values before applying rounding
hectare_data_processed['temp_c'] = hectare_data_processed['temp_f'].apply(
    lambda x: round((x - 32) * 5.0/9.0) if pd.notna(x) else None
)

# Ensure 'Above Ground Sighter Measurement' is an integer
# Handle NaN by filling with a placeholder (like -1) or dropping NaN values before conversion
squirrel_data['Above Ground Sighter Measurement'] = pd.to_numeric(squirrel_data['Above Ground Sighter Measurement'], errors='coerce').fillna(-1).astype(int)

# Merge squirrel_data and hectare_data on 'Hectare', 'Date', and 'Shift'
rbt_squirrel_tmp_1 = pd.merge(squirrel_data, hectare_data, how='left', on=['Hectare', 'Date', 'Shift'])

# Merge the result with squirrel_stories on 'Hectare', 'Date', and 'Shift'
rbt_squirrel_tmp_2 = pd.merge(rbt_squirrel_tmp_1, squirrel_stories, how='left', on=['Hectare', 'Date', 'Shift'])

# Drop the 'date_raw' column from hectare_data_processed to avoid conflicts in merging
hectare_data_processed = hectare_data_processed.drop(columns=['date_raw'])

# Merge with hectare_data_processed on 'Hectare', 'Date', and 'Shift'
rbt_squirrel = pd.merge(rbt_squirrel_tmp_2, hectare_data_processed, how='left', on=['Hectare', 'Date', 'Shift'])

# Reorder columns logically (you can adjust this order as needed)
ordered_columns = [
    'Hectare', 'Shift', 'date_raw', 'Date',  # Hectare, Shift, and Date
    'Sighter Observed Weather Data', 'Weather', 'Temperature', 'temp_f', 'temp_c',  # Weather data
    'Clear/Sunny', 'Cloudy', 'Precipitation', 'Windy', 'Humid',  # Weather conditions
    'Unique Squirrel ID', 'Hectare Squirrel Number', 'Age', 'Primary Fur Color', 'Highlight Fur Color',  # Squirrel details
    'Combination of Primary and Highlight Color', 'Color notes', 'Location', 'Above Ground Sighter Measurement', 'Specific Location',  # Additional squirrel details
    'Running', 'Chasing', 'Climbing', 'Eating', 'Foraging', 'Other Activities',  # Squirrel activities
    'Kuks', 'Quaas', 'Moans', 'Tail flags', 'Tail twitches',  # Squirrel vocalizations and behavior
    'Approaches', 'Indifferent', 'Runs from', 'Other Interactions',  # Squirrel interactions
    'Lat/Long', 'X', 'Y',  # Location data
    # Add squirrel_stories or other columns as necessary...
]
# Ensure that only columns present in the final dataframe are selected
ordered_columns = [col for col in ordered_columns if col in rbt_squirrel.columns]

# Reorder the dataframe
rbt_squirrel = rbt_squirrel[ordered_columns]

In [5]:
import pandas as pd
from IPython.display import display, HTML

def display_sorted_filtered_table(squirrel_data, hectare_data, squirrel_stories, hectare_data_processed, 
                                  order_by, ascending=True, filter_column=None, filter_value=None):
    """
    Displays a sorted and filtered scrollable table from the merged squirrel dataset.
    
    Parameters:
    - squirrel_data (DataFrame): The squirrel data.
    - hectare_data (DataFrame): The hectare data.
    - squirrel_stories (DataFrame): The squirrel stories data.
    - hectare_data_processed (DataFrame): The processed hectare data.
    - order_by (str): The column name to sort by.
    - ascending (bool): Whether to sort in ascending order (default is True).
    - filter_column (str, optional): The column name to filter on.
    - filter_value (optional): The value to filter by in the filter_column.
    
    Returns:
    - Displays the HTML scrollable table.
    """
    # Rename 'Date' to 'date_raw' in all datasets
    squirrel_data.rename(columns={'Date': 'date_raw'}, inplace=True)
    hectare_data.rename(columns={'Date': 'date_raw'}, inplace=True)
    squirrel_stories.rename(columns={'Date': 'date_raw'}, inplace=True)
    hectare_data_processed.rename(columns={'Date': 'date_raw'}, inplace=True)

    # Create a new 'Date' column in proper datetime format (rounded to day) in all datasets
    squirrel_data['Date'] = pd.to_datetime(squirrel_data['date_raw'], format='%m%d%Y')
    hectare_data['Date'] = pd.to_datetime(hectare_data['date_raw'], format='%m%d%Y')
    squirrel_stories['Date'] = pd.to_datetime(squirrel_stories['date_raw'], format='%m%d%Y')
    hectare_data_processed['Date'] = pd.to_datetime(hectare_data_processed['date_raw'], format='%m%d%Y')

    # Function to convert temperature string (e.g., "58º F") to float
    def convert_to_float(temp_str):
        if isinstance(temp_str, str):
            # Remove the 'º F' and convert the remaining string to float
            return float(temp_str.replace('º F', '').strip())
        return None  # For NaN or non-string values

    # Add 'temp_f' column by converting 'Temperature' to a float in hectare_data_processed
    hectare_data_processed['temp_f'] = hectare_data_processed['Temperature'].apply(convert_to_float)

    # Add 'temp_c' column by converting Fahrenheit to Celsius and rounding to 0 decimal places
    hectare_data_processed['temp_c'] = hectare_data_processed['temp_f'].apply(
        lambda x: round((x - 32) * 5.0/9.0) if pd.notna(x) else None
    )

    # Ensure 'Above Ground Sighter Measurement' is an integer
    squirrel_data['Above Ground Sighter Measurement'] = pd.to_numeric(squirrel_data['Above Ground Sighter Measurement'], 
                                                                      errors='coerce').fillna(-1).astype(int)

    # Merge squirrel_data and hectare_data on 'Hectare', 'Date', and 'Shift'
    rbt_squirrel_tmp_1 = pd.merge(squirrel_data, hectare_data, how='left', on=['Hectare', 'Date', 'Shift'])

    # Merge the result with squirrel_stories on 'Hectare', 'Date', and 'Shift'
    rbt_squirrel_tmp_2 = pd.merge(rbt_squirrel_tmp_1, squirrel_stories, how='left', on=['Hectare', 'Date', 'Shift'])

    # Drop the 'date_raw' column from hectare_data_processed to avoid conflicts in merging
    hectare_data_processed = hectare_data_processed.drop(columns=['date_raw'])

    # Merge with hectare_data_processed on 'Hectare', 'Date', and 'Shift'
    rbt_squirrel = pd.merge(rbt_squirrel_tmp_2, hectare_data_processed, how='left', on=['Hectare', 'Date', 'Shift'])

    # Apply filtering if filter_column and filter_value are provided
    if filter_column and filter_value is not None:
        rbt_squirrel = rbt_squirrel[rbt_squirrel[filter_column] == filter_value]

    # Reorder columns logically
    ordered_columns = [
        'Hectare', 'Shift', 'date_raw', 'Date',  # Hectare, Shift, and Date
        'Sighter Observed Weather Data', 'Weather', 'Temperature', 'temp_f', 'temp_c',  # Weather data
        'Clear/Sunny', 'Cloudy', 'Precipitation', 'Windy', 'Humid',  # Weather conditions
        'Unique Squirrel ID', 'Hectare Squirrel Number', 'Age', 'Primary Fur Color', 'Highlight Fur Color',  # Squirrel details
        'Combination of Primary and Highlight Color', 'Color notes', 'Location', 'Above Ground Sighter Measurement', 'Specific Location',  # Additional squirrel details
        'Running', 'Chasing', 'Climbing', 'Eating', 'Foraging', 'Other Activities',  # Squirrel activities
        'Kuks', 'Quaas', 'Moans', 'Tail flags', 'Tail twitches',  # Squirrel vocalizations and behavior
        'Approaches', 'Indifferent', 'Runs from', 'Other Interactions',  # Squirrel interactions
        'Lat/Long', 'X', 'Y',  # Location data
    ]
    ordered_columns = [col for col in ordered_columns if col in rbt_squirrel.columns]

    # Reorder the dataframe
    rbt_squirrel = rbt_squirrel[ordered_columns]

    # Sort the dataframe by the specified column
    rbt_squirrel = rbt_squirrel.sort_values(by=order_by, ascending=ascending)

    # Convert the final_data DataFrame to an HTML table and make it scrollable
    html_table = rbt_squirrel.to_html(classes='table table-striped', index=False)

    # Add custom CSS for scrollable table with fixed header
    scrollable_table = f"""
        <style>
            .scrollable-table-container {{
                height: 600px;
                overflow-y: auto;
                overflow-x: auto;
                border: 1px solid black;
            }}
            .scrollable-table-container table {{
                border-collapse: collapse;
                width: 100%;
            }}
            .scrollable-table-container th, .scrollable-table-container td {{
                padding: 8px 12px;
                border: 1px solid #ddd;
            }}
            .scrollable-table-container th {{
                background-color: #f2f2f2;
                position: sticky;
                top: 0;
                z-index: 1;
            }}
        </style>
        <div class="scrollable-table-container">
            {html_table}
        </div>
    """

    # # Display the scrollable HTML table
    # display(HTML(scrollable_table))

In [6]:
import pandas as pd
from IPython.display import display, HTML

def display_sorted_filtered_table(rbt_squirrel, order_by, ascending=True, filter_column=None, filter_value=None):
    """
    Display the sorted and filtered table based on the input parameters.
    
    Parameters:
    rbt_squirrel (DataFrame): The merged dataset.
    order_by (str or list of str): The column(s) to sort by.
    ascending (bool or list of bool): Whether to sort in ascending order. Can be a single bool or a list of bool values.
    filter_column (str): The column to filter by (default is None).
    filter_value (str): The value to filter by (default is None).
    """
    
    # Apply filter if filter_column and filter_value are provided
    if filter_column and filter_value:
        filtered_data = rbt_squirrel[rbt_squirrel[filter_column] == filter_value]
    else:
        filtered_data = rbt_squirrel
    
    # Sort the data based on the order_by column(s) and the ascending parameter
    sorted_data = filtered_data.sort_values(by=order_by, ascending=ascending)
    
    # Convert the DataFrame to an HTML table and make it scrollable
    html_table = sorted_data.to_html(classes='table table-striped', index=False)
    
    # Add custom CSS for scrollable table with fixed header
    scrollable_table = f"""
        <style>
            .scrollable-table-container {{
                height: 600px;
                overflow-y: auto;
                overflow-x: auto;
                border: 1px solid black;
            }}
            .scrollable-table-container table {{
                border-collapse: collapse;
                width: 100%;
            }}
            .scrollable-table-container th, .scrollable-table-container td {{
                padding: 8px 12px;
                border: 1px solid #ddd;
            }}
            .scrollable-table-container th {{
                background-color: #f2f2f2;
                position: sticky;
                top: 0;
                z-index: 1;
            }}
        </style>
        <div class="scrollable-table-container">
            {html_table}
        </div>
    """
    
    # Display the table
    display(HTML(scrollable_table))

In [7]:
import plotly.graph_objects as go
from IPython.display import display, HTML

# Filter the DataFrame for rows where the Primary Fur Color is black
black_squirrels_df = rbt_squirrel[rbt_squirrel['Primary Fur Color'] == 'Black']

# Group by Hectare, Shift, and Date, then count distinct 'Unique Squirrel ID'
black_squirrel_counts = black_squirrels_df.groupby(['Date', 'Hectare', 'Shift'])['Unique Squirrel ID'].nunique().reset_index(name='Black Squirrel Count')

# Convert the DataFrame to an HTML table
html_table = black_squirrel_counts.to_html(index=False)

# Add custom CSS for scrollable table with fixed header
scrollable_table = f"""
    <style>
        .scrollable-table-container {{
            height: 600px;
            overflow-y: auto;
            overflow-x: auto;
            border: 1px solid black;
        }}
        .scrollable-table-container table {{
            border-collapse: collapse;
            width: 100%;
        }}
        .scrollable-table-container th, .scrollable-table-container td {{
            padding: 8px 12px;
            border: 1px solid #ddd;
        }}
        .scrollable-table-container th {{
            background-color: #f2f2f2;
            position: sticky;
            top: 0;
            z-index: 1;
        }}
    </style>
    <div class="scrollable-table-container">
        {html_table}
    </div>
"""

# Display the scrollable table inline in Jupyter
display(HTML(scrollable_table))

Date,Hectare,Shift,Black Squirrel Count
2018-10-06,01G,AM,2
2018-10-06,02H,AM,1
2018-10-06,02H,PM,1
2018-10-06,04G,AM,1
2018-10-06,04H,AM,1
2018-10-06,04I,AM,1
2018-10-06,07H,PM,1
2018-10-06,09C,PM,1
2018-10-06,10G,AM,1
2018-10-06,11D,PM,1


In [8]:
display_sorted_filtered_table(rbt_squirrel,
                              order_by=['Date', 'Hectare', 'Hectare Squirrel Number'], 
                              ascending=True, 
                              filter_column='Primary Fur Color', 
                              filter_value='Black')

Hectare,Shift,date_raw,Date,Weather,Temperature,temp_f,temp_c,Clear/Sunny,Cloudy,Precipitation,Windy,Humid,Unique Squirrel ID,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,Combination of Primary and Highlight Color,Color notes,Location,Above Ground Sighter Measurement,Specific Location,Running,Chasing,Climbing,Eating,Foraging,Other Activities,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long,X,Y
01G,AM,10062018.0,2018-10-06,Cloudy,64º F,64.0,18.0,No,Yes,Yes,No,No,1G-AM-1006-01,1,Adult,Black,,Black+,,Ground Plane,-1,,False,True,False,False,True,,False,False,False,False,True,False,True,False,,POINT (-73.9745818450109 40.7654498873627),-73.974582,40.76545
01G,AM,10062018.0,2018-10-06,Cloudy,64º F,64.0,18.0,No,Yes,Yes,No,No,1G-AM-1006-03,3,Adult,Black,Cinnamon,Black+Cinnamon,,Ground Plane,-1,,False,True,False,False,True,,False,False,False,False,False,True,False,False,,POINT (-73.9745908987975 40.7655191512308),-73.974591,40.765519
02H,AM,10062018.0,2018-10-06,Cloudy,65º F,65.0,18.0,No,Yes,No,No,No,2H-AM-1006-01,1,Adult,Black,,Black+,,Ground Plane,-1,,True,False,True,False,False,,False,False,False,False,False,False,True,False,,POINT (-73.9731880656287 40.7660026186155),-73.973188,40.766003
02H,PM,10062018.0,2018-10-06,Cloudy,67º F,67.0,19.0,No,Yes,Yes,No,No,2H-PM-1006-03,3,Adult,Black,,Black+,,Ground Plane,-1,,False,False,False,True,False,,False,False,False,False,False,False,True,False,,POINT (-73.9733905460282 40.7656337147768),-73.973391,40.765634
04G,AM,10062018.0,2018-10-06,Sprinkling,65º F,65.0,18.0,No,Yes,Yes,No,No,4G-AM-1006-04,4,Adult,Black,,Black+,,Ground Plane,-1,,False,False,False,True,False,,False,False,False,False,False,False,True,False,,POINT (-73.9733545802515 40.7675056025357),-73.973355,40.767506
04H,AM,10062018.0,2018-10-06,Overcast,64º F,64.0,18.0,No,Yes,No,No,No,4H-AM-1006-03,3,Adult,Black,,Black+,,Ground Plane,-1,,False,False,False,False,True,met #4 while foraging and briefly tussled,False,False,False,False,False,False,True,False,,POINT (-73.9729297485331 40.7673278541438),-73.97293,40.767328
04I,AM,10062018.0,2018-10-06,Overcast,65º F,65.0,18.0,No,Yes,Yes,No,No,4I-AM-1006-01,1,Adult,Black,,Black+,,Above Ground,3,On top of park lamp,True,False,False,False,False,,False,False,False,False,False,False,True,False,,POINT (-73.9712022111028 40.7672504368263),-73.971202,40.76725
07H,PM,10062018.0,2018-10-06,Drizzle,68º F,68.0,20.0,No,Yes,Yes,No,No,7H-PM-1006-09,9,Adult,Black,,Black+,,Ground Plane,-1,by tree,False,False,False,True,True,,False,False,False,True,False,True,False,True,,POINT (-73.9704575848926 40.7693499326095),-73.970458,40.76935
09C,PM,10062018.0,2018-10-06,Mostly Cloudy,65º F,65.0,18.0,No,Yes,No,No,No,9C-PM-1006-07,7,Adult,Black,"Cinnamon, White","Black+Cinnamon, White",,Ground Plane,-1,on top of tarped sand pile,False,False,False,True,False,,False,False,False,False,False,False,False,True,(me),POINT (-73.9744080700098 40.7731515721826),-73.974408,40.773152
10G,AM,10062018.0,2018-10-06,Overcast,64º F,64.0,18.0,No,Yes,No,No,No,10G-AM-1006-08,8,Adult,Black,Cinnamon,Black+Cinnamon,,Above Ground,3,,False,False,False,True,False,,False,False,False,True,True,False,True,False,,POINT (-73.9706949930717 40.772362045852),-73.970695,40.772362


## Basics

In [9]:
import pandas as pd

# Load the hectare_data DataFrame
hectare_data = pd.read_csv("hectare_data.csv")

# Count the distinct anonymized sighters
distinct_sighters_count = hectare_data['Anonymized Sighter'].nunique()

# Count the distinct anonymized sighters
distinct_squirrels_count = rbt_squirrel['Unique Squirrel ID'].nunique()

print(f"Distinct anonymized sighters: {distinct_sighters_count}")
print(f"Distinct anonymized sighters: {distinct_squirrels_count}")

Distinct anonymized sighters: 235
Distinct anonymized sighters: 3018


In [10]:
import plotly.express as px

def create_bar_chart(dataframe, x_axis, y_axis, y_aggregation=None, plot_type='absolute', title='Bar Chart'):
    """
    Creates a bar chart using Plotly with the specified x and y axis, and the option to plot absolute or percentage values.
    
    Parameters:
    dataframe (pd.DataFrame): The input DataFrame.
    x_axis (str): The column name for the x-axis.
    y_axis (str): The column name for the y-axis.
    y_aggregation (str): If provided, aggregates the y_axis (e.g., 'count' for counting unique values).
    plot_type (str): Choose between 'absolute' or 'percentage' to plot either absolute values or percentages of total.
    title (str): Title of the chart.
    
    Returns:
    Plotly figure object.
    """
    if y_aggregation == 'count':
        agg_data = dataframe.groupby(x_axis)[y_axis].nunique().reset_index(name='count')
    else:
        agg_data = dataframe.groupby(x_axis)[y_axis].sum().reset_index(name='total')

    # If 'percentage' is selected, convert the values to percentages
    if plot_type == 'percentage':
        total_sum = agg_data['count' if y_aggregation == 'count' else 'total'].sum()
        agg_data['percentage'] = agg_data['count' if y_aggregation == 'count' else 'total'] / total_sum * 100
        fig = px.bar(agg_data, x=x_axis, y='percentage', title=title)
    else:
        fig = px.bar(agg_data, x=x_axis, y='count' if y_aggregation == 'count' else 'total', title=title)

    # Update the layout to use the base template
    fig.update_layout(template='base')
    
    return fig

In [11]:
# Call the function to create a bar chart with x='Primary Fur Color' and y=count distinct 'Unique Squirrel ID'
create_bar_chart(rbt_squirrel, x_axis='Primary Fur Color', y_axis='Unique Squirrel ID', y_aggregation='count', plot_type='percentage', title='Squirrel Count by Primary Fur Color')

In [12]:
# create_bar_chart(rbt_squirrel, x_axis='Date', y_axis='Unique Squirrel ID', y_aggregation='count', title='Squirrel Count by Date')

In [13]:
create_bar_chart(rbt_squirrel, x_axis='Age', y_axis='Unique Squirrel ID', y_aggregation='count', plot_type='percentage', title='Squirrel Count by Age')

In [14]:
# create_bar_chart(rbt_squirrel, x_axis='Shift', y_axis='Unique Squirrel ID', y_aggregation='count', title='Squirrel Count by Shift')

In [15]:
import pandas as pd
import plotly.express as px

# Function to create a single pie chart based on True values for activities with customizable sorting
def create_single_activity_pie_chart(data, activities, sort_by='descending'):
    """
    Creates a single pie chart based on the True values for activities and allows sorting of the chart.
    
    Parameters:
    data (DataFrame): The dataset containing activity data.
    activities (list): The list of activity columns to be plotted.
    sort_by (str): Order of the data ('descending' or 'ascending'). Default is 'descending'.
    """
    # Create a dictionary to store the count of True values for each activity
    true_counts = {activity: data[activity].sum() for activity in activities}

    # Convert the dictionary into a DataFrame
    activity_counts = pd.DataFrame(list(true_counts.items()), columns=['Activity', 'Count'])

    # Sort the DataFrame based on the 'Count' column
    if sort_by == 'ascending':
        activity_counts = activity_counts.sort_values(by='Count', ascending=True)
    else:
        activity_counts = activity_counts.sort_values(by='Count', ascending=False)

    # Create a pie chart using Plotly
    fig = px.pie(activity_counts, 
                 names='Activity', 
                 values='Count', 
                 hole=0.4)  # Donut-style pie chart

    # Remove all labels from the pie chart
    # fig.update_traces(textinfo='none')

    # Adjust layout
    fig.update_layout(template='base', 
                      height=600,  # Set figure height
                      width=800,  # Set figure width
                      margin=dict(t=50, b=50, l=50, r=50),  # Add margins
                      showlegend=True,  # Keep legend if needed
                      font=dict(size=16))  # Increase general font size for legend

    # Show the plot
    fig.show()

In [16]:
activities = ['Running', 'Chasing', 'Climbing', 'Eating', 'Foraging']
calls = ['Kuks', 'Quaas', 'Moans']
tail = ['Tail flags', 'Tail twitches']
human  = ['Approaches', 'Indifferent', 'Runs from']

create_single_activity_pie_chart(rbt_squirrel[rbt_squirrel['Age'] != '?'], activities, sort_by='ascending')
create_single_activity_pie_chart(rbt_squirrel[rbt_squirrel['Age'] == 'Adult'], activities, sort_by='ascending')
create_single_activity_pie_chart(rbt_squirrel[rbt_squirrel['Age'] == 'Juvenile'], activities, sort_by='ascending')

create_single_activity_pie_chart(rbt_squirrel[rbt_squirrel['Age'] != '?'], human, sort_by='ascending')
# create_activity_pie_chart(rbt_squirrel, calls)
# create_activity_pie_chart(rbt_squirrel, tail)
# create_activity_pie_chart(rbt_squirrel, human)

In [17]:
import pandas as pd
import scipy.stats as stats
import plotly.express as px

def analyze_group_difference_with_multiple_donuts(data, group_var, compare_var, plot_type='absolute'):
    """
    Analyzes if there is a statistically significant difference in the distribution of compare_var 
    across different group_var categories using a Chi-Square test. Also generates multiple donut charts 
    showing the observed counts or percentages for each group and compare_var category.

    Parameters:
    data (DataFrame): The dataset to analyze.
    group_var (str): The variable that defines the groups (e.g., 'Primary Fur Color').
    compare_var (str): The variable to compare across groups (e.g., 'Location').
    plot_type (str): Either 'absolute' for counts or 'percentage' for percentage values in the charts.

    Returns:
    chi2_stat (float): The chi-square statistic.
    p_val (float): The p-value of the test.
    contingency_table (DataFrame): The contingency table used for the Chi-Square test.
    """
    
    # Drop rows where compare_var is NaN (since we can't analyze missing data)
    filtered_data = data.dropna(subset=[compare_var])
    
    # Create a contingency table (cross-tabulation) of group_var and compare_var
    contingency_table = pd.crosstab(filtered_data[group_var], filtered_data[compare_var])
    
    # Perform the Chi-Square test
    chi2_stat, p_val, dof, expected = stats.chi2_contingency(contingency_table)
    
    # Output the results
    print(f"Chi-Square Statistic: {chi2_stat}")
    print(f"p-value: {p_val}")
    print(f"Degrees of Freedom: {dof}")
    print("Expected Frequencies:")
    print(pd.DataFrame(expected, index=contingency_table.index, columns=contingency_table.columns))
    
    # Loop through each unique value of group_var (e.g., Adult, Juvenile) and create a donut for each
    for group_value in filtered_data[group_var].dropna().unique():
        # Filter the data for the current group
        group_data = filtered_data[filtered_data[group_var] == group_value]
        
        # Create a contingency table for the current group
        contingency_table_group = pd.crosstab(group_data[group_var], group_data[compare_var])
        
        # Prepare data for plotting
        contingency_table_melted = contingency_table_group.reset_index().melt(id_vars=group_var, var_name=compare_var, value_name='Count')

        # If plot_type is 'percentage', calculate percentages
        if plot_type == 'percentage':
            contingency_table_melted['Total'] = contingency_table_melted.groupby(group_var)['Count'].transform('sum')
            contingency_table_melted['Percentage'] = (contingency_table_melted['Count'] / contingency_table_melted['Total']) * 100
            values = 'Percentage'
        else:
            values = 'Count'
        
        # Create the donut chart using Plotly (pie chart with hole)
        fig = px.pie(contingency_table_melted, names=compare_var, values=values, hole=0.5)

        # Remove labels from the plot
        fig.update_traces(textinfo='none')

        # Update layout with the base template and a title for each group
        fig.update_layout(template='base', showlegend=False, title=f"Distribution of {compare_var} for {group_value}")

        # Show the plot for each group
        fig.show()

    # Return relevant results
    return chi2_stat, p_val, contingency_table

In [18]:
analyze_group_difference_with_multiple_donuts(rbt_squirrel[rbt_squirrel['Age'] != '?'], 'Age', 'Location', plot_type='percentage')
analyze_group_difference_with_multiple_donuts(rbt_squirrel[rbt_squirrel['Age'] != '?'], 'Age', 'Foraging', plot_type='percentage')
# analyze_group_difference_with_multiple_donuts(rbt_squirrel[rbt_squirrel['Age'] != '?'], 'Age', 'Kuks', plot_type='percentage')
# analyze_group_difference_with_multiple_donuts(rbt_squirrel, 'Age', 'Runs from')

Chi-Square Statistic: 17.75737655265865
p-value: 2.509426047659284e-05
Degrees of Freedom: 1
Expected Frequencies:
Location  Above Ground  Ground Plane
Age                                 
Adult       708.792512   1820.207488
Juvenile     92.207488    236.792512


Chi-Square Statistic: 21.819089829645005
p-value: 2.9960436673057834e-06
Degrees of Freedom: 1
Expected Frequencies:
Foraging        False        True 
Age                               
Adult     1340.569659  1233.430341
Juvenile   173.430341   159.569659


(np.float64(21.819089829645005),
 np.float64(2.9960436673057834e-06),
 Foraging  False  True 
 Age                   
 Adult      1300   1274
 Juvenile    214    119)

In [19]:
# analyze_group_difference_with_multiple_donuts(rbt_squirrel, 'Shift', 'Location', plot_type='percentage')
# analyze_group_difference_with_multiple_donuts(rbt_squirrel, 'Shift', 'Foraging', plot_type='percentage')

In [20]:
# analyze_group_difference_with_plot(rbt_squirrel, 'Primary Fur Color', 'Location')
# analyze_group_difference_with_plot(rbt_squirrel, 'Primary Fur Color', 'Foraging') 
# analyze_group_difference_with_plot(rbt_squirrel, 'Primary Fur Color', 'Kuks') 
# analyze_group_difference_with_plot(rbt_squirrel, 'Primary Fur Color', 'Runs from')

## Location

In [21]:
import plotly.express as px

# Define color mapping to match the Primary Fur Color
color_map = {
    'Gray': 'gray',
    'Cinnamon': '#D2691E',  # Cinnamon-like color
    'Black': 'black'
}

# Scatter plot of squirrel locations with color based on 'Primary Fur Color'
fig2 = px.scatter(rbt_squirrel, x='X', y='Y',
                  color='Primary Fur Color',
                  title='Geographical Distribution of Squirrels by Primary Fur Color',
                  labels={'X': 'Longitude', 'Y': 'Latitude'},
                  color_discrete_map=color_map,  # Use the custom color map
                  template='base')  # Use 'base' template

# Update layout for better visualization
fig2.update_layout(
    xaxis_title='Longitude',
    yaxis_title='Latitude',
    height=600,  # Adjust height as needed
    legend_title='Primary Fur Color'
)

# Show the figure
fig2.show()

In [22]:
import plotly.express as px

def plot_squirrel_map(data, color_column, filters=None, height=600, width=800, zoom=12, mapbox_style='open-street-map'):
    """
    Plots a scatter map of squirrel locations with points colored based on a specified column.
    
    Parameters:
    - data: pandas DataFrame containing the squirrel data.
    - color_column: The column to use for coloring the points (e.g., 'Primary Fur Color').
    - filters: A dictionary of column-value pairs to filter the data (e.g., {'Location': 'Above Ground'}).
    - height: The height of the plot.
    - width: The width of the plot.
    - zoom: The zoom level of the map.
    - mapbox_style: The Mapbox style to use (default is 'open-street-map').
    
    Returns:
    - A Plotly scatter map with squirrel locations.
    """
    
    # Apply filters if provided
    if filters:
        for col, value in filters.items():
            data = data[data[col] == value]
    
    # Set a custom color map for 'Primary Fur Color'
    color_discrete_map = None
    if color_column == 'Primary Fur Color':
        color_discrete_map = {
            'Gray': 'gray',
            'Cinnamon': '#D2691E',  # Cinnamon color
            'Black': 'black'
        }
    
    # Scatter plot of squirrel locations on an open-street-map map
    fig = px.scatter_mapbox(data, 
                            lat='Y', lon='X',  # Latitude and Longitude columns
                            color=color_column,  # Color by the specified column
                            title=f'Geographical Distribution of Squirrels by {color_column}',
                            hover_data=['Hectare Squirrel Number', 'Hectare'],  # Include Hectare Squirrel Number in hover
                            mapbox_style=mapbox_style,  # Default back to 'open-street-map'
                            zoom=zoom,  # Set zoom level
                            height=height,  # Set height
                            width=width,  # Set width
                            color_discrete_map=color_discrete_map)  # Use custom color map for 'Primary Fur Color'

    # Center the map on the average coordinates of the dataset
    fig.update_layout(
        mapbox=dict(
            center=dict(lat=data['Y'].mean(), lon=data['X'].mean()),  # Center map
            zoom=zoom  # Zoom level
        ),
        margin={"r":0,"t":50,"l":0,"b":0},  # Remove excess margins
        legend_title=color_column
    )
    
    return fig

In [23]:
# Example usage of the function
satellite_map = plot_squirrel_map(
    data=rbt_squirrel, 
    color_column='Primary Fur Color',  # Color by Primary Fur Color with custom mapping
    # filters={'Primary Fur Color': 'Gray'},  # Filter by 'Above Ground' location
    height=700,  # Set the height of the plot
    width=1000,  # Set the width of the plot
    mapbox_style='carto-positron')

# Display the map
satellite_map.show()

## Appendix

In [24]:
import pandas as pd
import plotly.express as px

# Ensure 'Date' is in datetime format
rbt_squirrel['Date'] = pd.to_datetime(rbt_squirrel['Date'])

# Filter data for AM and PM shifts
am_shift_data = rbt_squirrel[rbt_squirrel['Shift'] == 'AM']
pm_shift_data = rbt_squirrel[rbt_squirrel['Shift'] == 'PM']

# Create a box plot for AM Shift
fig_am = px.box(am_shift_data, 
                x='Date', 
                y='temp_c', 
                title='Temperature Distribution (in Celsius) Grouped by Date (AM Shift)', 
                labels={'temp_c': 'Temperature (°C)', 'Date': 'Date'},
                template='base')

# Adjust layout for AM plot
fig_am.update_layout(
    xaxis_title="Date",
    yaxis_title="Temperature (°C)",
    title_x=0.5,  # Center the title
    height=600,  # Set figure height
    width=900,  # Set figure width
    margin=dict(t=50, b=50, l=50, r=50),  # Add margins
    font=dict(size=14))  # Increase font size for readability

# Show the AM plot
fig_am.show()

# Create a box plot for PM Shift
fig_pm = px.box(pm_shift_data, 
                x='Date', 
                y='temp_c', 
                title='Temperature Distribution (in Celsius) Grouped by Date (PM Shift)', 
                labels={'temp_c': 'Temperature (°C)', 'Date': 'Date'},
                template='base')

# Adjust layout for PM plot
fig_pm.update_layout(
    xaxis_title="Date",
    yaxis_title="Temperature (°C)",
    title_x=0.5,  # Center the title
    height=600,  # Set figure height
    width=900,  # Set figure width
    margin=dict(t=50, b=50, l=50, r=50),  # Add margins
    font=dict(size=14))  # Increase font size for readability

# Show the PM plot
fig_pm.show()

In [25]:
import pandas as pd
import plotly.express as px

# Function to create a single pie chart based on True values for activities
def create_single_activity_pie_chart(data, activities):
    # Create a dictionary to store the count of True values for each activity
    true_counts = {activity: data[activity].sum() for activity in activities}

    # Convert the dictionary into a DataFrame
    activity_counts = pd.DataFrame(list(true_counts.items()), columns=['Activity', 'Count'])

    # Create a pie chart using Plotly
    fig = px.pie(activity_counts, 
                 names='Activity', 
                 values='Count', 
                 title='Proportion of Squirrel Activities (True Values Only)', 
                 hole=0.4)  # Donut-style pie chart

    # Adjust layout and display percentages inside the pie chart
    fig.update_traces(textinfo='percent', textposition='inside')
    fig.update_layout(template='base', 
                      height=600,  # Set figure height
                      width=800,  # Set figure width
                      margin=dict(t=50, b=50, l=50, r=50),  # Add margins
                      title_x=0.5,  # Center the title
                      font=dict(size=16))  # Increase font size for readability

    # Show the plot
    fig.show()
    
# Define the activities to analyze
activities = ['Running', 'Chasing', 'Climbing', 'Eating', 'Foraging']
calls = ['Kuks', 'Quaas', 'Moans']
tail = ['Tail flags', 'Tail twitches']
human  = ['Approaches', 'Indifferent', 'Runs from']

# Call the function to generate the pie chart
create_activity_pie_chart(rbt_squirrel, activities)
# create_activity_pie_chart(rbt_squirrel, calls)
# create_activity_pie_chart(rbt_squirrel, tail)
# create_activity_pie_chart(rbt_squirrel, human)

NameError: name 'create_activity_pie_chart' is not defined

In [74]:
import pandas as pd
import scipy.stats as stats
import plotly.express as px
import statsmodels.api as sm
from statsmodels.formula.api import ols

def analyze_group_difference_continuous(data, group_var, continuous_var):
    """
    Analyzes if there is a statistically significant difference in the mean of continuous_var 
    across different group_var categories using ANOVA. Also generates a box plot showing the 
    distribution of continuous_var for each group.

    Parameters:
    data (DataFrame): The dataset to analyze.
    group_var (str): The variable that defines the groups (e.g., 'Primary Fur Color').
    continuous_var (str): The continuous variable to compare across groups (e.g., 'temp_c').

    Returns:
    anova_table (DataFrame): The ANOVA table with F-statistic and p-value.
    """
    
    # Drop rows where continuous_var is NaN (since we can't analyze missing data)
    filtered_data = data.dropna(subset=[continuous_var])
    
    # Clean the column names (replace spaces or special characters)
    filtered_data.columns = filtered_data.columns.str.replace(' ', '_').str.replace(r'[^\w]', '')

    # Ensure the group variable and continuous variable names are cleaned as well
    group_var_clean = group_var.replace(' ', '_').replace(r'[^\w]', '')
    continuous_var_clean = continuous_var.replace(' ', '_').replace(r'[^\w]', '')

    # Create the formula for ANOVA
    formula = f'{continuous_var_clean} ~ C({group_var_clean})'
    
    # Perform ANOVA to compare the means of the continuous variable across groups
    model = ols(formula, data=filtered_data).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    
    # Output the ANOVA table
    print("ANOVA Table:")
    print(anova_table)
    
    # Create a box plot using Plotly
    fig = px.box(filtered_data, x=group_var_clean, y=continuous_var_clean, 
                 points='all',  # Shows individual data points
                 title=f"Distribution of {continuous_var} by {group_var}")
    
    # Update layout to show hover information with more detail
    fig.update_traces(marker=dict(size=6), selector=dict(type='box'))
    fig.update_layout(template='base', showlegend=False)
    
    # Show the plot
    fig.show()
    
    # Return the ANOVA table
    return anova_table

# Example usage:
# Analyze if there's a difference in 'temp_c' across different squirrel 'Primary Fur Color'
# anova_table = analyze_group_difference_continuous(rbt_squirrel, 'Age', 'Above Ground Sighter Measurement')