In [42]:
pip 

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [123]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os

In [126]:
# 设置 Plotly 模板，您可以根据需要调整
pio.templates.default = "plotly_white"

# --- 1. 加载数据 ---
print("--- Loading Data ---")
file_path = 'New_Working_Data.xlsx'
try:
    df = pd.read_excel(file_path, parse_dates=['Date/Time'])
    print("Excel file successfully loaded into DataFrame.")
    # display(df.head()) # Optional: display head
    # display(df.info()) # Optional: display info
except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
    df = None
except Exception as e:
    print(f"Error loading file: {e}")
    df = None

--- Loading Data ---
Excel file successfully loaded into DataFrame.


In [129]:


# Ensure DataFrame is loaded before proceeding
if df is not None:
    # --- 2. 清洗和预处理数据 ---
    print("\n--- Cleaning and Preprocessing Data ---")
    # Handle missing values
    for col in df.columns:
        if df[col].isnull().any():
            if pd.api.types.is_numeric_dtype(df[col]):
                if col == 'No of Beds':
                    df[col].fillna(df[col].median(), inplace=True)
                else:
                    df[col].fillna(df[col].mean(), inplace=True)
            else:
                df[col].fillna(df[col].mode()[0], inplace=True)

    # Handle inconsistencies in 'Care Home ID' and convert to string
    df['Care Home ID'] = df['Care Home ID'].astype(str).str.strip()
    # Create a cleaned version with only digits for robust filtering
    df['Care Home ID_cleaned'] = df['Care Home ID'].str.replace(r'\D+', '', regex=True)

    # Handle outliers in 'No of Beds'
    df = df[df['No of Beds'] >= 0].copy() # Use .copy() to avoid SettingWithCopyWarning

    # Remove duplicate rows
    initial_rows = len(df)
    df.drop_duplicates(inplace=True)
    duplicates_removed = initial_rows - len(df)
    print(f"Removed {duplicates_removed} duplicate rows.")

    # Feature Engineering from Date/Time
    df['year'] = df['Date/Time'].dt.year.astype(int)
    df['month'] = df['Date/Time'].dt.month.astype(int)
    df['quarter'] = df['Date/Time'].dt.quarter.astype(int)
    df['week'] = df['Date/Time'].dt.isocalendar().week.astype(int)


    # Clinical Concern Conversion
    df['Clinical_Concern_Num'] = df['Clinical concern?'].map({'No': 0, 'Yes': 1})
    mode_value_concern = df['Clinical_Concern_Num'].mode()[0]
    df['Clinical_Concern_Num'] = df['Clinical_Concern_Num'].fillna(mode_value_concern).astype(int)

    # NEWS2 Score Categorization
    def categorize_news2(score):
        if pd.isna(score):
            return -1
        if score == 0:
            return 0
        elif 1 <= score <= 4:
            return 1
        elif 5 <= score <= 6:
            return 2
        elif score >= 7:
            return 3
        else:
            return -1

    df['NEWS2_Category'] = df['NEWS2 score'].apply(categorize_news2)

    print("Data cleaning and preprocessing complete.")
    # display(df.head()) # Optional: display head
    # display(df.info()) # Optional: display info


--- Cleaning and Preprocessing Data ---
Removed 23 duplicate rows.
Data cleaning and preprocessing complete.


# Function for calculate, high=5

In [132]:
# --- Define Analysis Functions (returning data) ---
def combined_filter(df, care_home_id, interval, start_date, end_date):
    df_filtered = df[df['Care Home ID_cleaned'] == str(care_home_id)].copy()
    if start_date and end_date:
        df_filtered = df_filtered[(df_filtered['Date/Time'] >= start_date) & (df_filtered['Date/Time'] <= end_date)].copy()
    return df_filtered

def usage_volume_over_time(df, care_home_id, interval='M', start_date=None, end_date=None):
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=int)
    if interval == 'D':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D')).size()
    elif interval == 'W':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W')).size()
    elif interval == 'M':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    elif interval == 'Q':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q')).size()
    elif interval == 'Y':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y')).size()
    else:
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    return grouped

def usage_per_bed(df, care_home_id, interval='M', start_date=None, end_date=None):
    usage_volume = usage_volume_over_time(df, care_home_id, interval, start_date, end_date)
    if usage_volume.empty:
        return pd.Series(dtype=float)
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=float)
    if interval == 'D':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D'))['No of Beds'].mean()
    elif interval == 'W':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W'))['No of Beds'].mean()
    elif interval == 'M':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    elif interval == 'Q':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q'))['No of Beds'].mean()
    elif interval == 'Y':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y'))['No of Beds'].mean()
    else:
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    usage_per_bed = usage_volume / beds
    return usage_per_bed.replace([float('inf'), -float('inf')], 0).fillna(0)
def observation_interval_quarter_avg(df, care_home_id, start_date=None, end_date=None):
    """
    Calculates the average time interval (in days) between successive *observation days*
    within each quarter for a given care home, considering intervals only within the quarter
    and the first observation in a quarter has a 0 interval.
    Includes quarters with no data, showing 0 interval.

    Args:
        df (pd.DataFrame): The input DataFrame.
        care_home_id (str or int): The ID of the care home.
        start_date (str or datetime, optional): The start date for filtering.
        end_date (str or datetime, optional): The end date for filtering.

    Returns:
        pd.Series: A Series with all relevant quarters as the index and average intervals as values (0 for quarters with no data).
                   Returns an empty Series if no overall time range can be determined.
    """
    # Combined filtering using the cleaned ID
    df_filtered = df[df['Care Home ID_cleaned'] == str(care_home_id)].copy()

    if start_date and end_date:
        df_filtered = df_filtered[(df_filtered['Date/Time'] >= pd.to_datetime(start_date)) & (df_filtered['Date/Time'] <= pd.to_datetime(end_date))].copy()
    elif start_date:
         df_filtered = df_filtered[df_filtered['Date/Time'] >= pd.to_datetime(start_date)].copy()
    elif end_date:
         df_filtered = df_filtered[df_filtered['Date/Time'] <= pd.to_datetime(end_date)].copy()


    # Determine the overall time range to consider for quarters
    min_date = None
    max_date = None

    if start_date:
        min_date = pd.to_datetime(start_date)
    elif not df.empty:
        min_date = df['Date/Time'].min()

    if end_date:
        max_date = pd.to_datetime(end_date)
    elif not df.empty:
        max_date = df['Date/Time'].max()

    if min_date is None or max_date is None:
         print("Could not determine a valid date range for quarters.")
         return pd.Series(dtype='float64')

    all_quarters = pd.period_range(start=min_date.to_period('Q'), end=max_date.to_period('Q'), freq='Q')


    quarterly_avg_intervals = {}

    # Iterate through each relevant quarter
    for quarter in all_quarters:
        # Get unique observation dates specifically within this quarter
        quarter_start_date = quarter.start_time
        quarter_end_date = quarter.end_time

        unique_dates_in_quarter = df_filtered[(df_filtered['Date/Time'] >= quarter_start_date) &
                                              (df_filtered['Date/Time'] <= quarter_end_date)]['Date/Time'].dt.date.unique()

        unique_dates_in_quarter_sorted = pd.Series(pd.to_datetime(unique_dates_in_quarter)).sort_values()

        observation_day_count = len(unique_dates_in_quarter_sorted)
        total_intervals_sum = 0

        if observation_day_count > 0:
            # Add the initial 0 interval for the first observation day in the quarter
            total_intervals_sum += 0

            # Calculate intervals between successive observation days within the quarter
            if observation_day_count >= 2:
                date_differences_in_quarter = unique_dates_in_quarter_sorted.diff().dt.days.dropna()
                total_intervals_sum += date_differences_in_quarter.sum()

            # Calculate average for this quarter
            # The divisor is the number of observation days
            quarterly_avg_intervals[quarter] = total_intervals_sum / observation_day_count
        else:
            # If no observation days in the quarter, average interval is 0
             quarterly_avg_intervals[quarter] = 0.0


    # Convert the dictionary to a pandas Series, with all_quarters as index
    result = pd.Series(quarterly_avg_intervals).reindex(all_quarters, fill_value=0.0)

    return result

# Fixing the AttributeError: 'SeriesGroupBy' object has no attribute 'dt'
# Th issue arises because the `.dt` accessor is being used on a grouped Series object.
# To fix this, we need to ensure that the operation is applied before grouping or adjust the logic accordingly.

# Updated function: monthly_coverage_percentage
def monthly_coverage_percentage(df, care_home_id, start_date=None, end_date=None):
    """
    Calculates the monthly coverage percentage for a given care home,
    defined as (Number of days with at least one observation / 30) * 100 %.
    Includes months with no data, showing 0 coverage.

    Args:
        df (pd.DataFrame): The input DataFrame.
        care_home_id (str or int): The ID of the care home.
        start_date (str or datetime, optional): The start date for filtering.
        end_date (str or datetime, optional): The end date for filtering.

    Returns:
        pd.Series: A Series with all relevant months as the index and coverage percentages as values (0 for months with no data).
                   Returns an empty Series if no overall time range can be determined.
    """
    # Use the combined_filter function as in your original code
    df_filtered = combined_filter(df, care_home_id, interval=None, start_date=start_date, end_date=end_date)

    # If filtered data is empty, we still need to determine the relevant months
    if df_filtered.empty:
        # Determine the overall time range based on original df or specified dates
        min_date = pd.to_datetime(start_date) if start_date else (df['Date/Time'].min() if not df.empty else None)
        max_date = pd.to_datetime(end_date) if end_date else (df['Date/Time'].max() if not df.empty else None)

        if min_date is None or max_date is None:
             print("Could not determine a valid date range for months.")
             return pd.Series(dtype='float64')

        all_months = pd.period_range(start=min_date.to_period('M'), end=max_date.to_period('M'), freq='M')
        # For months with no data, the coverage is 0
        result = pd.Series(0.0, index=all_months)
        return result

    # Extract the date part before grouping
    df_filtered['Date'] = df_filtered['Date/Time'].dt.date

    # Group filtered data by month and count unique days with observations
    monthly_observations = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['Date'].nunique()

    # Get the full range of months relevant to the filtered data or specified dates
    min_date_range = pd.to_datetime(start_date) if start_date else df_filtered['Date/Time'].min()
    max_date_range = pd.to_datetime(end_date) if end_date else df_filtered['Date/Time'].max()
    all_months = pd.period_range(start=min_date_range.to_period('M'), end=max_date_range.to_period('M'), freq='M')

    # Create a Series with all relevant months as index
    # Reindex monthly_observations to include all months in the range, filling missing with 0
    monthly_observations_all_months = monthly_observations.reindex(all_months, fill_value=0)

    # Calculate monthly coverage percentage with a fixed denominator of 30
    coverage_percentage = (monthly_observations_all_months / 30) * 100

    return coverage_percentage.fillna(0) # Fill potential NaN if any unexpected cases arise

def news2_score_category_counts(df, care_home_id, interval='M', start_date=None, end_date=None):
        df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
        if df_filtered.empty:
            return pd.DataFrame()
        if interval == 'M':
             grouped = df_filtered.groupby([df_filtered['Date/Time'].dt.to_period('M'), 'NEWS2_Category']).size().unstack(fill_value=0)
        else:
            grouped = df_filtered.groupby([df_filtered['Date/Time'].dt.to_period('M'), 'NEWS2_Category']).size().unstack(fill_value=0)
        grouped.columns = [f'NEWS2 Category {col}' for col in grouped.columns]
        return grouped


def high_news2_score_proportion(df, care_home_id, interval='M', start_date=None, end_date=None):
        df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
        if df_filtered.empty:
            return pd.DataFrame()
        df_filtered['High_NEWS2_5'] = (df_filtered['NEWS2 score'] >= 5).astype(int)
        df_filtered['High_NEWS2_7'] = (df_filtered['NEWS2 score'] >= 7).astype(int)
        if interval == 'M':
            grouped_sum = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))[['High_NEWS2_5', 'High_NEWS2_7']].sum()
            total_observations = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
        else:
             grouped_sum = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))[['High_NEWS2_5', 'High_NEWS2_7']].sum()
             total_observations = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()

        proportion_5 = (grouped_sum['High_NEWS2_5'] / total_observations) * 100
        proportion_7 = (grouped_sum['High_NEWS2_7'] / total_observations) * 100
        result = pd.DataFrame({'NEWS2 >= 5 (%)': proportion_5, 'NEWS2 >= 7 (%)': proportion_7})
        return result.fillna(0)


def clinical_concern_proportion(df, care_home_id, start_date=None, end_date=None):
        df_filtered = combined_filter(df, care_home_id, interval=None, start_date=start_date, end_date=end_date)
        if df_filtered.empty:
            return 0.0
        concern_count = df_filtered['Clinical_Concern_Num'].sum()
        total_count = len(df_filtered)
        if total_count == 0:
            return 0.0
        proportion = (concern_count / total_count) * 100
        return proportion

def staff_judgement_accuracy(df, care_home_id, interval='M', start_date=None, end_date=None):
        df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
        if df_filtered.empty:
            return pd.Series(dtype='float64')
        df_concern_yes = df_filtered[df_filtered['Clinical_Concern_Num'] == 1].copy()
        if df_concern_yes.empty:
            return pd.Series(dtype='float64')
        df_concern_yes['Accurate_Judgement'] = (df_concern_yes['NEWS2 score'] >= 5).astype(int)
        if interval == 'M':
            grouped_accurate = df_concern_yes.groupby(df_concern_yes['Date/Time'].dt.to_period('M'))['Accurate_Judgement'].sum()
            grouped_total_concern_yes = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['Clinical_Concern_Num'].sum()
        else:
             grouped_accurate = df_concern_yes.groupby(df_concern_yes['Date/Time'].dt.to_period('M'))['Accurate_Judgement'].sum()
             grouped_total_concern_yes = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['Clinical_Concern_Num'].sum()

        accuracy = (grouped_accurate / grouped_total_concern_yes.reindex(grouped_accurate.index, fill_value=0)) * 100
        return accuracy.replace([float('inf'), -float('inf')], 0).fillna(0)



In [168]:
df

Unnamed: 0,Date/Time,Care Home ID,Care Home Name,Type,Area,Phase,Postal address,Post Code,GP,GP Practice,...,Consciousness,Consciouness New,Scale 2 in use?,Care Home ID_cleaned,year,month,quarter,week,Clinical_Concern_Num,NEWS2_Category
0,2022-11-13 10:57:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,2
1,2022-11-13 11:15:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,0,1
2,2022-11-13 11:33:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,2
3,2022-11-13 12:01:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,3
4,2022-11-13 12:16:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1968,2025-03-26 09:56:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,1
1969,2025-03-26 13:20:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,1,1
1970,2025-03-26 16:11:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,1,3
1971,2025-03-26 19:07:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,0,1


# Care home ID

In [135]:
# Fixing the issue where 'df' might be None

if df is not None:
    care_home_to_analyze = '10230'
    print(f"\n--- Analyzing Data for Care Home ID: {care_home_to_analyze} ---")

    # Use the cleaned column for filtering
    df_single_care_home_1023 = df[df['Care Home ID_cleaned'] == care_home_to_analyze].copy()

    if df_single_care_home_1023.empty:
        print(f"No data found for Care Home ID: {care_home_to_analyze} after cleaning.")
    else:
        print(f"Successfully filtered data for Care Home ID: {care_home_to_analyze}. Generating plots...")

        # Create a directory to save the plots
        output_dir = f'care_home_{care_home_to_analyze}_plots'
        os.makedirs(output_dir, exist_ok=True)
        print(f"Saving plots to directory: {output_dir}")
else:
    print("The DataFrame 'df' is None. Please ensure the data is loaded correctly before proceeding.")


--- Analyzing Data for Care Home ID: 10230 ---
Successfully filtered data for Care Home ID: 10230. Generating plots...
Saving plots to directory: care_home_10230_plots


# Plot

In [165]:

print("Generating Usage Volume Over Time chart...")
usage_vol_single_1023 = usage_volume_over_time(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not usage_vol_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_vol_single_1023.index.to_timestamp(), y=usage_vol_single_1023.values,
                             mode='lines+markers', name='Observation Count'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Volume Over Time (Monthly)',
                      xaxis_title='Time', yaxis_title='Usage Volume Over Time',
                      hovermode='x unified') # Enable unified hovermode for cursor
    pio.write_html(fig, f'{output_dir}/usage_volume_over_time.html')
    print(f"Saved: {output_dir}/usage_volume_over_time.html")
else:
    print(f"No data for Usage Volume Over Time for {care_home_to_analyze}. Skipping plot.")

# 2. usage_per_bed (Monthly)
print("\nGenerating Usage Per Bed chart...")
usage_bed_single_1023 = usage_per_bed(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not usage_bed_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_bed_single_1023.index.to_timestamp(), y=usage_bed_single_1023.values,
                             mode='lines+markers', name='Usage Per Bed'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Per Bed (Monthly)',
                      xaxis_title='Time', yaxis_title='Usage Per Bed',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/usage_per_bed.html')
    print(f"Saved: {output_dir}/usage_per_bed.html")
else:
    print(f"No data for Usage Per Bed for {care_home_to_analyze}. Skipping plot.")

# 3. observation_interval_quarter_avg
print("\nGenerating Observation Interval Quarter Avg chart...")
obs_interval_single_1023 = observation_interval_quarter_avg(df_single_care_home_1023, care_home_to_analyze)
if not obs_interval_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Bar(x=[str(q) for q in obs_interval_single_1023.index], y=obs_interval_single_1023.values,
                         name='Average Interval'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Average Observation Interval per Quarter',
                      xaxis_title='Quater', yaxis_title='Average Observation Interval',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/observation_interval_quarter_avg.html')
    print(f"Saved: {output_dir}/observation_interval_quarter_avg.html")
else:
    print(f"No data for Observation Interval Quarter Avg for {care_home_to_analyze}. Skipping plot.")

# 4. monthly_coverage_percentage
print("\nGenerating Monthly Coverage Percentage chart...")
coverage_single_1023 = monthly_coverage_percentage(df_single_care_home_1023, care_home_to_analyze)
if not coverage_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=coverage_single_1023.index.to_timestamp(), y=coverage_single_1023.values,
                             mode='lines+markers', name='Coverage (%)'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Monthly Coverage Percentage',
                      xaxis_title='Month', yaxis_title='Monthly Coverage Percentage(%)',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/monthly_coverage_percentage.html')
    print(f"Saved: {output_dir}/monthly_coverage_percentage.html")
else:
    print(f"No data for Monthly Coverage Percentage for {care_home_to_analyze}. Skipping plot.")

# Clinical Insight Value

# 1. news2_score_category_counts (Monthly)
print("\nGenerating NEWS2 Score Category Counts chart...")
news2_counts_single_1023 = news2_score_category_counts(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not news2_counts_single_1023.empty:
    fig = go.Figure()
    for col in news2_counts_single_1023.columns:
        fig.add_trace(go.Scatter(x=news2_counts_single_1023.index.to_timestamp(), y=news2_counts_single_1023[col],
                                 mode='lines+markers', name=col))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} NEWS2 Score Category Counts (Monthly)',
                      xaxis_title='Time', yaxis_title='NEWS2 Score Category Counts',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/news2_score_category_counts.html')
    print(f"Saved: {output_dir}/news2_score_category_counts.html")
else:
    print(f"No data for NEWS2 Score Category Counts for {care_home_to_analyze}. Skipping plot.")

# 2. high_news2_score_proportion (Monthly)
print("\nGenerating High NEWS2 Score Proportion chart...")
high_news2_prop_single_1023 = high_news2_score_proportion(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not high_news2_prop_single_1023.empty:
    fig = go.Figure()
    for col in high_news2_prop_single_1023.columns:
        fig.add_trace(go.Scatter(x=high_news2_prop_single_1023.index.to_timestamp(), y=high_news2_prop_single_1023[col],
                                 mode='lines+markers', name=col))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} High NEWS2 Score Proportion (Monthly)',
                      xaxis_title='Time', yaxis_title='High NEWS2 Score Proportion (%)',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/high_news2_score_proportion.html')
    print(f"Saved: {output_dir}/high_news2_score_proportion.html")
else:
    print(f"No data for High NEWS2 Score Proportion for {care_home_to_analyze}. Skipping plot.")

# 3. clinical_concern_proportion (Single Number)
print("\nCalculating Clinical Concern Proportion...")
clinical_concern_prop_single_1023 = clinical_concern_proportion(df_single_care_home_1023, care_home_to_analyze)
print(f"Care Home {care_home_to_analyze} Clinical Concern Proportion: {clinical_concern_prop_single_1023:.2f}%")

# Although this is a single number, we could represent it visually in a Plotly figure if needed,
# but the requirement is a single number output, which is already done.

# 4. staff_judgement_accuracy (Monthly)
print("\nGenerating Staff Judgement Accuracy chart...")
staff_accuracy_single_1023 = staff_judgement_accuracy(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not staff_accuracy_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=staff_accuracy_single_1023.index.to_timestamp(), y=staff_accuracy_single_1023.values,
                             mode='lines+markers', name='Accuracy (%)'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Staff Judgement Accuracy (Monthly)',
                      xaxis_title='Time', yaxis_title='Staff Judgement Accuracy (%)',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/staff_judgement_accuracy.html')
    print(f"Saved: {output_dir}/staff_judgement_accuracy.html")
else:
    print(f"No data for Staff Judgement Accuracy for {care_home_to_analyze}. Skipping plot.")

print(f"\nAnalysis and plot generation for Care Home ID {care_home_to_analyze} complete.")

Generating Usage Volume Over Time chart...
Saved: care_home_10230_plots/usage_volume_over_time.html

Generating Usage Per Bed chart...
Saved: care_home_10230_plots/usage_per_bed.html

Generating Observation Interval Quarter Avg chart...
Saved: care_home_10230_plots/observation_interval_quarter_avg.html

Generating Monthly Coverage Percentage chart...
Saved: care_home_10230_plots/monthly_coverage_percentage.html

Generating NEWS2 Score Category Counts chart...
Saved: care_home_10230_plots/news2_score_category_counts.html

Generating High NEWS2 Score Proportion chart...
Saved: care_home_10230_plots/high_news2_score_proportion.html

Calculating Clinical Concern Proportion...
Care Home 10230 Clinical Concern Proportion: 72.88%

Generating Staff Judgement Accuracy chart...
Saved: care_home_10230_plots/staff_judgement_accuracy.html

Analysis and plot generation for Care Home ID 10230 complete.


# weekly

In [171]:
def usage_volume_over_time_w(df, care_home_id, interval='W', start_date=None, end_date=None):
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=int)
    if interval == 'D':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D')).size()
    elif interval == 'W':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W')).size()
    elif interval == 'M':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    elif interval == 'Q':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q')).size()
    elif interval == 'Y':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y')).size()
    else:
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    return grouped

def usage_per_bed_w(df, care_home_id, interval='W', start_date=None, end_date=None):
    usage_volume = usage_volume_over_time(df, care_home_id, interval, start_date, end_date)
    if usage_volume.empty:
        return pd.Series(dtype=float)
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=float)
    if interval == 'D':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D'))['No of Beds'].mean()
    elif interval == 'W':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W'))['No of Beds'].mean()
    elif interval == 'M':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    elif interval == 'Q':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q'))['No of Beds'].mean()
    elif interval == 'Y':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y'))['No of Beds'].mean()
    else:
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    usage_per_bed = usage_volume / beds
    return usage_per_bed.replace([float('inf'), -float('inf')], 0).fillna(0)

In [177]:
print("Generating Usage Volume Over Time chart...")
usage_vol_single_1023 = usage_volume_over_time_w(df_single_care_home_1023, care_home_to_analyze, interval='W')
if not usage_vol_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_vol_single_1023.index.to_timestamp(), y=usage_vol_single_1023.values,
                             mode='lines+markers', name='Observation Count'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Volume Over Time (Weekly)',
                      xaxis_title='Time', yaxis_title='Usage Volume Over Time',
                      hovermode='x unified') # Enable unified hovermode for cursor
    pio.write_html(fig, f'{output_dir}/usage_volume_over_time_w.html')
    print(f"Saved: {output_dir}/usage_volume_over_time_w.html")
else:
    print(f"No data for Usage Volume Over Time for {care_home_to_analyze}. Skipping plot.")

# 2. usage_per_bed (Monthly)
print("\nGenerating Usage Per Bed chart...")
usage_bed_single_1023 = usage_per_bed_w(df_single_care_home_1023, care_home_to_analyze, interval='W')
if not usage_bed_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_bed_single_1023.index.to_timestamp(), y=usage_bed_single_1023.values,
                             mode='lines+markers', name='Usage Per Bed'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Per Bed (Weekly)',
                      xaxis_title='Time', yaxis_title='Usage Per Bed',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/usage_per_bed_w.html')
    print(f"Saved: {output_dir}/usage_per_bed_w.html")
else:
    print(f"No data for Usage Per Bed for {care_home_to_analyze}. Skipping plot.")

Generating Usage Volume Over Time chart...
Saved: care_home_10230_plots/usage_volume_over_time_w.html

Generating Usage Per Bed chart...
Saved: care_home_10230_plots/usage_per_bed_w.html


# Dayly

In [180]:
def usage_volume_over_time_d(df, care_home_id, interval='D', start_date=None, end_date=None):
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=int)
    if interval == 'D':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D')).size()
    elif interval == 'W':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W')).size()
    elif interval == 'M':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    elif interval == 'Q':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q')).size()
    elif interval == 'Y':
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y')).size()
    else:
        grouped = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M')).size()
    return grouped

def usage_per_bed_d(df, care_home_id, interval='D', start_date=None, end_date=None):
    usage_volume = usage_volume_over_time(df, care_home_id, interval, start_date, end_date)
    if usage_volume.empty:
        return pd.Series(dtype=float)
    df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
    if df_filtered.empty:
        return pd.Series(dtype=float)
    if interval == 'D':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('D'))['No of Beds'].mean()
    elif interval == 'W':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('W'))['No of Beds'].mean()
    elif interval == 'M':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    elif interval == 'Q':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Q'))['No of Beds'].mean()
    elif interval == 'Y':
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('Y'))['No of Beds'].mean()
    else:
        beds = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['No of Beds'].mean()
    usage_per_bed = usage_volume / beds
    return usage_per_bed.replace([float('inf'), -float('inf')], 0).fillna(0)

In [183]:
print("Generating Usage Volume Over Time chart...")
usage_vol_single_1023 = usage_volume_over_time_d(df_single_care_home_1023, care_home_to_analyze, interval='D')
if not usage_vol_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_vol_single_1023.index.to_timestamp(), y=usage_vol_single_1023.values,
                             mode='lines+markers', name='Observation Count'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Volume Over Time (Daily)',
                      xaxis_title='Time', yaxis_title='Usage Volume Over Time',
                      hovermode='x unified') # Enable unified hovermode for cursor
    pio.write_html(fig, f'{output_dir}/usage_volume_over_time_d.html')
    print(f"Saved: {output_dir}/usage_volume_over_time_d.html")
else:
    print(f"No data for Usage Volume Over Time for {care_home_to_analyze}. Skipping plot.")

# 2. usage_per_bed (Monthly)
print("\nGenerating Usage Per Bed chart...")
usage_bed_single_1023 = usage_per_bed_d(df_single_care_home_1023, care_home_to_analyze, interval='D')
if not usage_bed_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=usage_bed_single_1023.index.to_timestamp(), y=usage_bed_single_1023.values,
                             mode='lines+markers', name='Usage Per Bed'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Usage Per Bed (Daily)',
                      xaxis_title='Time', yaxis_title='Usage Per Bed',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/usage_per_bed_d.html')
    print(f"Saved: {output_dir}/usage_per_bed_d.html")
else:
    print(f"No data for Usage Per Bed for {care_home_to_analyze}. Skipping plot.")

Generating Usage Volume Over Time chart...
Saved: care_home_10230_plots/usage_volume_over_time_d.html

Generating Usage Per Bed chart...
Saved: care_home_10230_plots/usage_per_bed_d.html


# High=7

In [186]:
def staff_judgement_accuracy_7(df, care_home_id, interval='M', start_date=None, end_date=None):
        df_filtered = combined_filter(df, care_home_id, interval, start_date, end_date)
        if df_filtered.empty:
            return pd.Series(dtype='float64')
        df_concern_yes = df_filtered[df_filtered['Clinical_Concern_Num'] == 1].copy()
        if df_concern_yes.empty:
            return pd.Series(dtype='float64')
        df_concern_yes['Accurate_Judgement'] = (df_concern_yes['NEWS2 score'] >= 7).astype(int)
        if interval == 'M':
            grouped_accurate = df_concern_yes.groupby(df_concern_yes['Date/Time'].dt.to_period('M'))['Accurate_Judgement'].sum()
            grouped_total_concern_yes = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['Clinical_Concern_Num'].sum()
        else:
             grouped_accurate = df_concern_yes.groupby(df_concern_yes['Date/Time'].dt.to_period('M'))['Accurate_Judgement'].sum()
             grouped_total_concern_yes = df_filtered.groupby(df_filtered['Date/Time'].dt.to_period('M'))['Clinical_Concern_Num'].sum()

        accuracy = (grouped_accurate / grouped_total_concern_yes.reindex(grouped_accurate.index, fill_value=0)) * 100
        return accuracy.replace([float('inf'), -float('inf')], 0).fillna(0)

In [189]:
print("\nGenerating Staff Judgement Accuracy chart...")
staff_accuracy_single_1023 = staff_judgement_accuracy_7(df_single_care_home_1023, care_home_to_analyze, interval='M')
if not staff_accuracy_single_1023.empty:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=staff_accuracy_single_1023.index.to_timestamp(), y=staff_accuracy_single_1023.values,
                             mode='lines+markers', name='Accuracy (%)'))
    fig.update_layout(title=f'Care Home {care_home_to_analyze} Staff Judgement Accuracy (Monthly)',
                      xaxis_title='Time', yaxis_title='Staff Judgement Accuracy (%)',
                      hovermode='x unified')
    pio.write_html(fig, f'{output_dir}/staff_judgement_accuracy_7.html')
    print(f"Saved: {output_dir}/staff_judgement_accuracy_7.html")
else:
    print(f"No data for Staff Judgement Accuracy for {care_home_to_analyze}. Skipping plot.")

print(f"\nAnalysis and plot generation for Care Home ID {care_home_to_analyze} complete.")


Generating Staff Judgement Accuracy chart...
Saved: care_home_10230_plots/staff_judgement_accuracy_7.html

Analysis and plot generation for Care Home ID 10230 complete.


# Physiological parameters, high=5

In [192]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import os

In [195]:
df

Unnamed: 0,Date/Time,Care Home ID,Care Home Name,Type,Area,Phase,Postal address,Post Code,GP,GP Practice,...,Consciousness,Consciouness New,Scale 2 in use?,Care Home ID_cleaned,year,month,quarter,week,Clinical_Concern_Num,NEWS2_Category
0,2022-11-13 10:57:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,2
1,2022-11-13 11:15:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,0,1
2,2022-11-13 11:33:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,2
3,2022-11-13 12:01:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,1,3
4,2022-11-13 12:16:00,1031.0,Harvey House,Residential,LC,1,"Church lane, Barwell, leicester",LE9 8DG,0,0,...,Alert,0,0,10310,2022,11,4,45,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1968,2025-03-26 09:56:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,1
1969,2025-03-26 13:20:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,1,1
1970,2025-03-26 16:11:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,1,3
1971,2025-03-26 19:07:00,1069.0,0,MH,LCC,2,"Holmwood Meadows, 15a Holmwood Drive, Leicester",LE3 9LG,Oakmeadow Surgery,Oakmeadow Surgery,...,Alert,0,0,10690,2025,3,1,13,0,1


In [198]:
df_single_care_home_1023

Unnamed: 0,Date/Time,Care Home ID,Care Home Name,Type,Area,Phase,Postal address,Post Code,GP,GP Practice,...,Consciousness,Consciouness New,Scale 2 in use?,Care Home ID_cleaned,year,month,quarter,week,Clinical_Concern_Num,NEWS2_Category
9,2022-11-22 11:52:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2022,11,4,47,1,0
10,2022-11-22 12:02:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2022,11,4,47,1,1
11,2022-11-22 12:07:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2022,11,4,47,1,0
12,2022-11-22 12:13:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2022,11,4,47,1,0
13,2022-11-22 12:18:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2022,11,4,47,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,2025-03-26 09:40:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,1
1965,2025-03-26 09:43:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,1
1966,2025-03-26 09:47:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,1
1967,2025-03-26 09:51:00,1023.0,Goodwood orchard,Residential,LC,1,"304, Uppingham Road, Leicester",LE5 2BE,0,0,...,Alert,0,0,10230,2025,3,1,13,0,0


In [201]:
def parameter_contribution_proportion(df, care_home_id, parameter_new_col, news2_threshold=5, interval='M', start_date=None, end_date=None):

    # Use the combined_filter function
    df_filtered = combined_filter(df, care_home_id, interval=None, start_date=start_date, end_date=end_date)

    # Determine the overall time range to consider for months
    min_date = None
    max_date = None

    if start_date:
        min_date = pd.to_datetime(start_date)
    elif not df.empty:
        min_date = df['Date/Time'].min()

    if end_date:
        max_date = pd.to_datetime(end_date)
    elif not df.empty:
        max_date = df['Date/Time'].max()

    if min_date is None or max_date is None:
         print("Could not determine a valid date range for months.")
         return pd.Series(dtype='float64')

    all_months = pd.period_range(start=min_date.to_period('M'), end=max_date.to_period('M'), freq='M')


    # Check if the parameter contribution column exists
    if parameter_new_col not in df_filtered.columns:
        print(f"Error: Contribution column '{parameter_new_col}' not found in DataFrame.")
        # Return a Series with 0% for all relevant months
        return pd.Series(0.0, index=all_months)


    # Filter for observations where total NEWS2 score is >= threshold
    df_high_news2 = df_filtered[df_filtered['NEWS2 score'] >= news2_threshold].copy()

    # If no high NEWS2 observations, the proportion is 0 for all months
    if df_high_news2.empty:
        print(f"No observations with NEWS2 score >= {news2_threshold} for Care Home ID: {care_home_id}.")
        # Return a Series with 0% for all relevant months
        return pd.Series(0.0, index=all_months)


    # Count observations where the parameter's contribution is non-zero, grouped by month
    # We need to group by month of the original high_news2 data to get total counts per month
    if interval == 'M':
        # Count occurrences where the contribution column is not 0 (assuming 0 means no contribution)
        contributing_counts = df_high_news2[df_high_news2[parameter_new_col] != 0].groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
        # Count total high NEWS2 observations per month
        total_high_news2_counts = df_high_news2.groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
    # Add other intervals if needed
    else:
         print(f"Interval '{interval}' not fully supported for parameter contribution proportion, using 'M'.")
         contributing_counts = df_high_news2[df_high_news2[parameter_new_col] != 0].groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
         total_high_news2_counts = df_high_news2.groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()


    # Calculate the proportion (%)
    # Use reindex to align counts and handle months with 0 contributing observations
    # Use total_high_news2_counts.reindex(all_months, fill_value=0) as the base for proportions
    total_high_news2_counts_all_months = total_high_news2_counts.reindex(all_months, fill_value=0)
    contributing_counts_all_months = contributing_counts.reindex(all_months, fill_value=0)


    # Avoid division by zero by checking if total_high_news2_counts_all_months is not 0
    proportion = pd.Series(0.0, index=all_months, dtype='float64')
    non_zero_months = total_high_news2_counts_all_months > 0
    proportion[non_zero_months] = (contributing_counts_all_months[non_zero_months] / total_high_news2_counts_all_months[non_zero_months]) * 100


    return proportion.fillna(0) # Final fillna for safety

In [213]:
def plot_physiological_contribution_timeseries(analysis_results_dict, title, yaxis_title, output_dir, filename):
    """
    Plots time series of physiological parameter contribution proportions for multiple care homes using Plotly.

    Args:
        analysis_results_dict (dict): A dictionary where keys are Care Home IDs
                                      and values are pandas Series (time series of proportions).
        title (str): The title of the plot.
        yaxis_title (str): The title for the Y-axis.
        output_dir (str): The directory to save the HTML file.
        filename (str): The name of the HTML file (without extension).
    """
    if not analysis_results_dict:
        print(f"No data provided for plotting: {title}. Skipping plot.")
        return

    fig = go.Figure()

    for care_home_id, data_series in analysis_results_dict.items():
        if not data_series.empty:
            fig.add_trace(go.Scatter(x=data_series.index.to_timestamp(), y=data_series.values,
                                     mode='lines+markers', name=f'Care Home {care_home_id}'))

    fig.update_layout(title=title,
                      xaxis_title='TIME',
                      yaxis_title=yaxis_title,
                      hovermode='x unified')

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save the plot as an HTML file
    pio.write_html(fig, f'{output_dir}/{filename}.html')
    print(f"Saved plot: {output_dir}/{filename}.html")


care_home_to_analyze = '10230' # Replace with the correct cleaned ID if needed

# Determine output directory
output_dir = f'care_home_{care_home_to_analyze}_physiological_contribution_plots'
os.makedirs(output_dir, exist_ok=True)
print(f"\nSaving physiological contribution plots to directory: {output_dir}")

# List of contribution columns to analyze
contribution_cols = [
    'O2_New', 'Systolic_New', 'Pulse_New', 'Temperate_New',
    'Respiraties_New', 'O2 Delivery_New', 'Consciouness New'
]

print("\n--- Analyzing Physiological Parameter Contributions to High NEWS2 ---")

for col in contribution_cols:
    # Determine a user-friendly name for the parameter
    parameter_name = col.replace('_New', '').replace(' New', '').replace('_', ' ')

    print(f"\nAnalyzing {parameter_name} contribution...")
    # Calculate the proportion
    contribution_prop = parameter_contribution_proportion(df, care_home_to_analyze, col, news2_threshold=5, interval='M')

    if not contribution_prop.empty:
        # Plot the results
        plot_physiological_contribution_timeseries(
            {care_home_to_analyze: contribution_prop}, # Pass as a dictionary
            f'Care Home {care_home_to_analyze}: {parameter_name} Contributing to High NEWS2 (>=5)',
            'Proportion of High NEWS2 Obs (%)',
            output_dir,
            f'{parameter_name.lower().replace(" ", "_")}_high_news2_proportion' # Generate filename
        )
    else:
        print(f"No data or results for {parameter_name} contribution for {care_home_to_analyze}. Skipping plot.")


Saving physiological contribution plots to directory: care_home_10230_physiological_contribution_plots

--- Analyzing Physiological Parameter Contributions to High NEWS2 ---

Analyzing O2 contribution...
Saved plot: care_home_10230_physiological_contribution_plots/o2_high_news2_proportion.html

Analyzing Systolic contribution...
Saved plot: care_home_10230_physiological_contribution_plots/systolic_high_news2_proportion.html

Analyzing Pulse contribution...
Saved plot: care_home_10230_physiological_contribution_plots/pulse_high_news2_proportion.html

Analyzing Temperate contribution...
Saved plot: care_home_10230_physiological_contribution_plots/temperate_high_news2_proportion.html

Analyzing Respiraties contribution...
Saved plot: care_home_10230_physiological_contribution_plots/respiraties_high_news2_proportion.html

Analyzing O2 Delivery contribution...
Saved plot: care_home_10230_physiological_contribution_plots/o2_delivery_high_news2_proportion.html

Analyzing Consciouness contrib

# Physiological parameters, high=7

In [216]:
def parameter_contribution_proportion_7(df, care_home_id, parameter_new_col, news2_threshold=7, interval='M', start_date=None, end_date=None):

    # Use the combined_filter function
    df_filtered = combined_filter(df, care_home_id, interval=None, start_date=start_date, end_date=end_date)

    # Determine the overall time range to consider for months
    min_date = None
    max_date = None

    if start_date:
        min_date = pd.to_datetime(start_date)
    elif not df.empty:
        min_date = df['Date/Time'].min()

    if end_date:
        max_date = pd.to_datetime(end_date)
    elif not df.empty:
        max_date = df['Date/Time'].max()

    if min_date is None or max_date is None:
         print("Could not determine a valid date range for months.")
         return pd.Series(dtype='float64')

    all_months = pd.period_range(start=min_date.to_period('M'), end=max_date.to_period('M'), freq='M')


    # Check if the parameter contribution column exists
    if parameter_new_col not in df_filtered.columns:
        print(f"Error: Contribution column '{parameter_new_col}' not found in DataFrame.")
        # Return a Series with 0% for all relevant months
        return pd.Series(0.0, index=all_months)


    # Filter for observations where total NEWS2 score is >= threshold
    df_high_news2 = df_filtered[df_filtered['NEWS2 score'] >= news2_threshold].copy()

    # If no high NEWS2 observations, the proportion is 0 for all months
    if df_high_news2.empty:
        print(f"No observations with NEWS2 score >= {news2_threshold} for Care Home ID: {care_home_id}.")
        # Return a Series with 0% for all relevant months
        return pd.Series(0.0, index=all_months)


    # Count observations where the parameter's contribution is non-zero, grouped by month
    # We need to group by month of the original high_news2 data to get total counts per month
    if interval == 'M':
        # Count occurrences where the contribution column is not 0 (assuming 0 means no contribution)
        contributing_counts = df_high_news2[df_high_news2[parameter_new_col] != 0].groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
        # Count total high NEWS2 observations per month
        total_high_news2_counts = df_high_news2.groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
    # Add other intervals if needed
    else:
         print(f"Interval '{interval}' not fully supported for parameter contribution proportion, using 'M'.")
         contributing_counts = df_high_news2[df_high_news2[parameter_new_col] != 0].groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()
         total_high_news2_counts = df_high_news2.groupby(df_high_news2['Date/Time'].dt.to_period('M')).size()


    # Calculate the proportion (%)
    # Use reindex to align counts and handle months with 0 contributing observations
    # Use total_high_news2_counts.reindex(all_months, fill_value=0) as the base for proportions
    total_high_news2_counts_all_months = total_high_news2_counts.reindex(all_months, fill_value=0)
    contributing_counts_all_months = contributing_counts.reindex(all_months, fill_value=0)


    # Avoid division by zero by checking if total_high_news2_counts_all_months is not 0
    proportion = pd.Series(0.0, index=all_months, dtype='float64')
    non_zero_months = total_high_news2_counts_all_months > 0
    proportion[non_zero_months] = (contributing_counts_all_months[non_zero_months] / total_high_news2_counts_all_months[non_zero_months]) * 100


    return proportion.fillna(0) # Final fillna for safety

In [219]:
def plot_physiological_contribution_timeseries(analysis_results_dict, title, yaxis_title, output_dir, filename):
    """
    Plots time series of physiological parameter contribution proportions for multiple care homes using Plotly.

    Args:
        analysis_results_dict (dict): A dictionary where keys are Care Home IDs
                                      and values are pandas Series (time series of proportions).
        title (str): The title of the plot.
        yaxis_title (str): The title for the Y-axis.
        output_dir (str): The directory to save the HTML file.
        filename (str): The name of the HTML file (without extension).
    """
    if not analysis_results_dict:
        print(f"No data provided for plotting: {title}. Skipping plot.")
        return

    fig = go.Figure()

    for care_home_id, data_series in analysis_results_dict.items():
        if not data_series.empty:
            fig.add_trace(go.Scatter(x=data_series.index.to_timestamp(), y=data_series.values,
                                     mode='lines+markers', name=f'Care Home {care_home_id}'))

    fig.update_layout(title=title,
                      xaxis_title='TIME',
                      yaxis_title=yaxis_title,
                      hovermode='x unified')

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save the plot as an HTML file
    pio.write_html(fig, f'{output_dir}/{filename}.html')
    print(f"Saved plot: {output_dir}/{filename}.html")


care_home_to_analyze = '10230' # Replace with the correct cleaned ID if needed

# Determine output directory
output_dir = f'care_home_{care_home_to_analyze}_physiological_contribution_plots'
os.makedirs(output_dir, exist_ok=True)
print(f"\nSaving physiological contribution plots to directory: {output_dir}")

# List of contribution columns to analyze
contribution_cols = [
    'O2_New', 'Systolic_New', 'Pulse_New', 'Temperate_New',
    'Respiraties_New', 'O2 Delivery_New', 'Consciouness New'
]

print("\n--- Analyzing Physiological Parameter Contributions to High NEWS2 ---")

for col in contribution_cols:
    # Determine a user-friendly name for the parameter
    parameter_name = col.replace('_New', '').replace(' New', '').replace('_', ' ')

    print(f"\nAnalyzing {parameter_name} contribution...")
    # Calculate the proportion
    contribution_prop = parameter_contribution_proportion_7(df, care_home_to_analyze, col, news2_threshold=7, interval='M')

    if not contribution_prop.empty:
        # Plot the results
        plot_physiological_contribution_timeseries(
            {care_home_to_analyze: contribution_prop}, # Pass as a dictionary
            f'Care Home {care_home_to_analyze}: {parameter_name} Contributing to High NEWS2 (>=7)',
            'Proportion of High NEWS2 Obs (%)',
            output_dir,
            f'{parameter_name.lower().replace(" ", "_")}_high_news2_proportion' # Generate filename
        )
    else:
        print(f"No data or results for {parameter_name} contribution for {care_home_to_analyze}. Skipping plot.")


Saving physiological contribution plots to directory: care_home_10230_physiological_contribution_plots

--- Analyzing Physiological Parameter Contributions to High NEWS2 ---

Analyzing O2 contribution...
Saved plot: care_home_10230_physiological_contribution_plots/o2_high_news2_proportion.html

Analyzing Systolic contribution...
Saved plot: care_home_10230_physiological_contribution_plots/systolic_high_news2_proportion.html

Analyzing Pulse contribution...
Saved plot: care_home_10230_physiological_contribution_plots/pulse_high_news2_proportion.html

Analyzing Temperate contribution...
Saved plot: care_home_10230_physiological_contribution_plots/temperate_high_news2_proportion.html

Analyzing Respiraties contribution...
Saved plot: care_home_10230_physiological_contribution_plots/respiraties_high_news2_proportion.html

Analyzing O2 Delivery contribution...
Saved plot: care_home_10230_physiological_contribution_plots/o2_delivery_high_news2_proportion.html

Analyzing Consciouness contrib

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=ce340b38-6a9d-4e03-9468-11b42664220b' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>