In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px
from adjustText import adjust_text

In [None]:
data = pd.read_excel('Legal Meeting Attraction FY24 Level Set.xlsx')
df_attendance = pd.read_excel('FY24 Ridership.xlsx')

In [None]:
df_data = data[data['Division Desc'] == "Disney's Animal Kingdom Theme Park"]

In [None]:
grouped_data = df_data.groupby('Location Code Desc').agg(
    total_incidents=('Severity Score', 'count'),
    avg_severity_score=('Severity Score', 'mean')
).reset_index()

# Merging the grouped incident data with attendance data
merged_df = pd.merge(grouped_data, df_attendance, left_on='Location Code Desc', right_on='Location')

# Calculating the incident rate per 1000 riders
merged_df['incident_rate'] = (merged_df['total_incidents'] / merged_df['FY 2024']) * 1000000

In [None]:
# Calculate the incident rate per million riders
merged_df['incident_rate'] = (merged_df['total_incidents'] / merged_df['FY 2024']) * 1_000_000

# Normalize the severity score (min-max scaling) and invert so higher means more severe
min_severity = merged_df['avg_severity_score'].min()
max_severity = merged_df['avg_severity_score'].max()
merged_df['normalized_severity'] = 1 - (merged_df['avg_severity_score'] - min_severity) / (max_severity - min_severity)

# Determine the median values for the quadrants
median_volume = merged_df['total_incidents'].median()
median_rate = merged_df['incident_rate'].median()

# Plotting the quadrant chart
fig, ax = plt.subplots(figsize=(10, 7))

# Scatter plot with color representing normalized severity
scatter = ax.scatter(
    merged_df['total_incidents'],        # X: Incident Volume
    merged_df['incident_rate'],          # Y: Incident Rate per million riders
    c=merged_df['normalized_severity'],  # Color by normalized severity (higher is more severe)
    cmap='viridis',                      # Colormap for better visualization
    s=100,                               # Marker size
    alpha=0.7
)

# Adding labels to axes
ax.set_xlabel('Total Incidents (Volume)')
ax.set_ylabel('Incident Rate (per Million Riders)')

# Adding a colorbar for the normalized severity score
cbar = plt.colorbar(scatter)
cbar.set_label('Normalized Severity (0 = Low, 1 = High)')

# Drawing lines to create quadrants based on median volume and rate
ax.axvline(median_volume, color='gray', linestyle='--', linewidth=1)  # Vertical line at median volume
ax.axhline(median_rate, color='gray', linestyle='--', linewidth=1)    # Horizontal line at median rate

# Prepare the list of texts for adjustText to adjust automatically
texts = []
for i, attraction in enumerate(merged_df['Location Code Desc']):
    if merged_df['incident_rate'].iloc[i] > median_rate:
        # Adding the text object for adjustText to handle, initially centered
        texts.append(ax.text(
            merged_df['total_incidents'].iloc[i],
            merged_df['incident_rate'].iloc[i],
            attraction,
            fontsize=8,
            ha='center',
            va='center'
        ))

# Adjust text positions, restrict movement for better centering, and remove arrows
adjust_text(texts, 
            force_text=(0.3, 0.3),   # Force factor for minimal movement (closer to the center)
            only_move={'points':'xy', 'texts':'xy'},  # Restrict to movement in both x and y axes
            expand_points=(1.2, 1.2),  # Avoid too much expansion away from points
            arrowprops=None)  # No arrows

# Title of the plot
plt.title('Quadrant Chart: Incident Volume vs. Incident Rate (Color by Normalized Severity)')

plt.show()

In [None]:
data