In [50]:
import pyodbc
import pandas as pd
import os 
import sqlite3 
import shutil
from sqlalchemy import create_engine
import urllib
from openpyxl import load_workbook
import openpyxl

# SQLAlchemy
# Database connection details
server = 'PRD-APP-PBI-GW\\UCRHEALTH_PROD'  
database = 'ucr_health'  
username = 'ucr_svc_analytics'  


# Construct the connection string
connection_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};UID={username};PWD={password}'
encoded_conn_str = urllib.parse.quote_plus(connection_string)

# Create the SQLAlchemy engine 
engine = create_engine(f'mssql+pyodbc:///?odbc_connect={encoded_conn_str}')

# Read query results directly into a DataFrame
df = pd.read_sql('SELECT * FROM [rpt].[vw_cemrp_encounters]', engine)


# Close the connection (optional with SQLAlchemy, but good practice)
engine.dispose() 

In [52]:
# get all the quarters in a variable for processing the rolling median moving forward a quarter at a time
all_quarters = df.year_quarter.unique()

In [54]:
# rolling median for quarters in CEMRP reporting 

def create_rolling_lists(quarters):
    rolling_medians = []  # Initialize the list to store rolling lists
    for i in range(len(quarters) - 3):
        rolling_list = quarters[i:i+4]
        rolling_medians.append(rolling_list)
    return rolling_medians  # Return the rolling lists

# Call the function and store the result in a variable
rolling_medians = create_rolling_lists(all_quarters)

# Now you can print the rolling_medians
print(rolling_medians)

[array(['2022_Q1', '2022_Q2', '2022_Q3', '2022_Q4'], dtype=object), array(['2022_Q2', '2022_Q3', '2022_Q4', '2023_Q1'], dtype=object), array(['2022_Q3', '2022_Q4', '2023_Q1', '2023_Q2'], dtype=object), array(['2022_Q4', '2023_Q1', '2023_Q2', '2023_Q3'], dtype=object), array(['2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4'], dtype=object), array(['2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1'], dtype=object), array(['2023_Q3', '2023_Q4', '2024_Q1', '2024_Q2'], dtype=object), array(['2023_Q4', '2024_Q1', '2024_Q2', '2024_Q3'], dtype=object), array(['2024_Q1', '2024_Q2', '2024_Q3', '2024_Q4'], dtype=object), array(['2024_Q2', '2024_Q3', '2024_Q4', '2025_Q1'], dtype=object)]


In [56]:
new_pt = df[df.patient_status == 'new_patient']
# Initialize an empty DataFrame
results_df = pd.DataFrame(columns=['Year_Quarter_Array', 'Days_to_Appt_Median'])

# Convert the 'days_to_appt' column to numeric, coercing errors to NaN
new_pt['days_to_appt'] = pd.to_numeric(new_pt['days_to_appt'], errors='coerce')

# Loop through the rolling_medians and calculate the medians
for median in rolling_medians:
    year_quarter_array = median  # The array of quarters
    days_to_appt_median = new_pt[new_pt.year_quarter.isin(median)].days_to_appt.median()  # The median for days to appointment
    
    # Create a new DataFrame with the current row data
    new_row = pd.DataFrame({
        'Year_Quarter_Array': [year_quarter_array],
        'Days_to_Appt_Median': [days_to_appt_median]
    })
    
    # Concatenate the new row to the results DataFrame
    results_df = pd.concat([results_df, new_row], ignore_index=True)

# Display the results as a DataFrame
print(results_df)

                     Year_Quarter_Array  Days_to_Appt_Median
0  [2022_Q1, 2022_Q2, 2022_Q3, 2022_Q4]                 13.0
1  [2022_Q2, 2022_Q3, 2022_Q4, 2023_Q1]                 13.0
2  [2022_Q3, 2022_Q4, 2023_Q1, 2023_Q2]                 14.0
3  [2022_Q4, 2023_Q1, 2023_Q2, 2023_Q3]                 13.0
4  [2023_Q1, 2023_Q2, 2023_Q3, 2023_Q4]                 14.0
5  [2023_Q2, 2023_Q3, 2023_Q4, 2024_Q1]                 13.0
6  [2023_Q3, 2023_Q4, 2024_Q1, 2024_Q2]                 13.0
7  [2023_Q4, 2024_Q1, 2024_Q2, 2024_Q3]                 14.0
8  [2024_Q1, 2024_Q2, 2024_Q3, 2024_Q4]                 14.0
9  [2024_Q2, 2024_Q3, 2024_Q4, 2025_Q1]                 15.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_pt['days_to_appt'] = pd.to_numeric(new_pt['days_to_appt'], errors='coerce')
  results_df = pd.concat([results_df, new_row], ignore_index=True)


In [58]:
new_pt[new_pt.calendar_year == 2024].days_to_appt.median()

14.0