In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# Load the data
df = pd.read_excel("data/record_24to25.xlsx")
# Drop rows with missing data
df = df.dropna(subset=["patients_common_name", "patients_disposition"])

## Confirm number of unique common names

In [2]:
# Get unique species and count
unique_species = df['patients_common_name'].unique()
num_species = len(unique_species)

print(f"Number of unique animal types: {num_species}")
print("Sample species list:")
print(unique_species[:20])  # Show first 20 for a quick look

Number of unique animal types: 40
Sample species list:
['Virginia Opossum' 'Eastern Cottontail' 'Northern Raccoon'
 'Big Brown Bat' 'Eastern Gray Squirrel' 'American Beaver'
 'Silver-haired Bat' 'Brown Rat' 'Douglas Squirrel' 'Little Brown Bat'
 'Short-tailed Weasel' 'Rat' 'Eastern Grey Squirrel' "Townsend's Vole"
 'North American Deer Mouse' 'Columbian Ground Squirrel'
 'North American Deer mouse' 'Striped Skunk' 'Coyote' 'Black-tailed Deer']


## Plot all Release Rate

In [3]:
# Group by species and calculate total and released counts
grouped = df.groupby("patients_common_name")["patients_disposition"].value_counts().unstack().fillna(0)
grouped["total"] = grouped.sum(axis=1)
grouped["released"] = grouped.get("Released", 0)
grouped["release_rate"] = grouped["released"] / grouped["total"]
grouped = grouped.reset_index()

# Sort by release rate (highest first)
grouped = grouped.sort_values(by="release_rate", ascending=False)

# Create interactive bar chart
fig = px.bar(
    grouped,
    x="patients_common_name",
    y="release_rate",
    hover_data={"released": True, "total": True},
    labels={"patients_common_name": "Animal Common Name", "release_rate": "Release Rate"},
    title="Release Rate by Animal Common Name 2024-2025"
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
fig.write_html("release_rate.html")

## Average number of days of care by Animal Common Name

In [4]:
released_df = df[df["patients_disposition"] == "Released"]

# Drop missing or invalid values
released_df = released_df.dropna(subset=["patients_days_in_care", "patients_common_name"])
released_df["patients_days_in_care"] = pd.to_numeric(released_df["patients_days_in_care"], errors='coerce')

# Compute average days in care per animal type
avg_days = (
    released_df
    .groupby("patients_common_name")["patients_days_in_care"]
    .mean()
    .sort_values(ascending=False)
    .reset_index()
)

# Plot using Plotly
fig = px.bar(
    avg_days,
    x="patients_common_name",
    y="patients_days_in_care",
    title="Average Days in Care per Animal Type (Released Only)",
    labels={"patients_common_name": "Animal Type", "patients_days_in_care": "Average Days in Care"},
    hover_data={"patients_days_in_care": ':.2f'}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()
fig.write_html("days_before_release.html")