In [2]:
# setting my si data frame
import pandas as pd
data = '/workspaces/service-data/outputs/si.csv'
si = pd.read_csv(data, sep=';')
programid = '/workspaces/service-data/inputs/serv_prog.csv'
programs = pd.read_csv(programid, sep=',')
from datetime import datetime
import pytz
import numpy as np

# Specify the timezone
timezone = pytz.timezone("America/Montreal")

# Get the current date and time in the specified timezone
current_date = datetime.now(timezone)

# Format the current date and time into the desired string format
current_datestr = current_date.strftime("%Y-%m-%d_%H:%M:%S")

# Print the current date and time
print(f"current date: {current_datestr}")


current date: 2025-04-15_15:55:54


In [3]:
# to add a new column for phone apps inquiries
# phone apps inquiries are num_phone_enquiries plus num_applications_by_phone
# based on error received 'can only concatenate str (not int) to string

# Columns to convert to numeric and fill NaNs with 0
cols_to_numeric = [
    'num_phone_enquiries',
    'num_applications_by_phone',
    'num_applications_by_email',
    'num_applications_by_fax',
    'num_applications_by_mail',
    'num_applications_by_other',
    'num_applications_in_person',
    'num_applications_online'
]

# Convert to numeric and fill NaNs with 0
si[cols_to_numeric] = si[cols_to_numeric].apply(pd.to_numeric, errors='coerce').fillna(0)

# Create 'phone_apps_inquiries' column (sum of phone enquiries + phone applications)
si['phone_apps_inquiries'] = si['num_phone_enquiries'] + si['num_applications_by_phone']

# Create 'total_transactions' by summing all relevant application methods
si['total_transactions'] = (
    si['num_applications_by_email'] +
    si['num_applications_by_fax'] +
    si['num_applications_by_mail'] +
    si['num_applications_by_other'] +
    si['num_applications_in_person'] +
    si['num_applications_online'] +
    si['phone_apps_inquiries']
)

# adding a new column for applications done by phone, online and in person only
si['apps_online_and_per'] = (
    si['num_applications_in_person'].fillna(0) + 
    si['num_applications_online'].fillna(0) + 
    si['phone_apps_inquiries'].fillna(0)
)

# adding a new column for omnichannels
si['omnichannel'] = si.apply(
    lambda row: 1 if pd.notna(row['phone_apps_inquiries']) and pd.notna(row['num_applications_online']) and pd.notna(row['num_applications_in_person']) else 0, 
    axis=1
)

# adding a new column for external
si['external'] = si['service_scope'].str.contains('EXTERN', na=False).astype(int)
#convert the column to numeric 
si['external'] = pd.to_numeric(si['external'], errors='coerce')

# adding a new column for high volume services
si['highvolume'] = (si['total_transactions'] >= 45000).astype(int)

# adding a new column for online enabled Y
# creating columns to check which lists out the columns from os_account_registration to os_issue_resolution_feedback
columns_to_check = [ 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback']
si['online_enabledY'] = si[columns_to_check].apply(lambda row: (row == 'Y').sum(), axis=1)

# adding column for online enabled N
si['online_enabledN'] = si[columns_to_check].apply(lambda row: (row == 'N').sum(), axis=1)

# adding column for online enabled NA
si['online_enabledNA'] = si[columns_to_check].isna().sum(axis=1)

# adding a new column for online end to end
si['onlineE2E'] = (
    si.apply(lambda row: "0" if row['online_enabledNA'] == 6 
    else "1" if row['online_enabledY'] + row['online_enabledNA'] == 6 
    else "0", axis=1)
    )

# adding a new column for online one or more points
si['onl_morepoints'] = si['online_enabledY'].apply(lambda x: '1' if x >= 1 else '0')

# Importing the service standards data
ss_data = '/workspaces/service-data/outputs/ss.csv'
ss = pd.read_csv(ss_data, sep=';')

# Adding column for services with standards and standards met
# Grouping and summarizing the 'ss' DataFrame
ss_count = ss.groupby(['service_id', 'fiscal_yr']).agg(
    standards_count=('service_id', 'size'),  # Count the occurrences
    standards_met=('target_met', lambda x: (x == 'Y').sum())  # Count where target_met is 'Y'
).reset_index()

# Merging the 'ss_count' DataFrame with the 'si' DataFrame
si = si.merge(ss_count, on=['service_id', 'fiscal_yr'], how='left')

# Replacing NaN values in 'standards_count' and 'standards_met' with 0
si['standards_count'] = si['standards_count'].fillna(0)
si['standards_met'] = si['standards_met'].fillna(0)

# Adding a new column for services that met at least one standard
si['STDS_metsome'] = (si['standards_met'] >= 1).astype(int)

# Creating the FYSID column by merging fiscal_yr and service_id columns
si['FYSID'] = si['fiscal_yr'].astype(str) + si['service_id'].astype(str)

# --- MODIFIED SECTION BELOW ---

# Merge the 'ss' DataFrame with 'external' and 'highvolume' from 'si' using 'fy_org_id_service_id'
merged_df = pd.merge(
    ss, 
    si[['fy_org_id_service_id', 'external', 'highvolume']], 
    on='fy_org_id_service_id', 
    how='left'
)

# Create the 'external_service' column based on the merged data
merged_df['external_service'] = (merged_df['external'] == 1).astype(int)

# Add both 'external_service' and 'highvolume' columns back into the original 'ss' DataFrame
ss['external_service'] = merged_df['external_service']
ss['highvolume'] = merged_df['highvolume']


In [4]:
#BEGIN DATAPACK METRICS
# The following are the metrics to be calculated:
# Metric 2 total number of transactions (in millions)
# metric 3a online as a share of total transactions
# metric 3b telephone as a share of total transactions
# metric 3c in-person as a share of total transactions
# metric 4 share of GC services with omni channel offerings
# metric 5a online as a share of omnichannel usage
# metric 5b phone as a share of onminchannel usage
# metric 5c in-person as a share of omnichannel usage
# metric 6 number of departments
# metric 7 number of programs
# metric 8 number of external services
# metric 9 number of high volume services
# metric 10 total online transactions ( in millions)
# metric 11 total phone transactions (in millions)
# metric 12 total in-person transactions (in millions)
# metric 13 total mail applications (in millions)
# metric 14 share of external services online end-to-end
# metric 15 share of external services which have at least one point online
# metric 16 services meeting service standards
# metric 17 share of external, high volume services online end-to-end
# metric 18 share of external, high volume services which have at least one point online
# metric 19, share of external, highvolume services meeting service standards

In [5]:
# METRICs 2-16 - the transactions table will contain all the metrics that apply to external services alone
# filter the data for external services

# Filter the si DataFrame where 'external' == 1
filtered_si = si[si['external'] == 1]

# Group by fiscal_yr and sum the numeric columns
grouped = filtered_si.groupby('fiscal_yr')[[ 
    'total_transactions', 
    'num_applications_online', 
    'phone_apps_inquiries', 
    'num_applications_by_mail',
    'num_applications_in_person'
]].sum().reset_index()

# Rename columns
Transactions_table = grouped.rename(columns={
    'fiscal_yr': 'fiscal_year',
    'num_applications_online': 'online applications',
    'phone_apps_inquiries': 'phone applications',
    'num_applications_by_mail': 'mail applications',
    'num_applications_in_person': 'in_person_apps'
})

# Add share columns
Transactions_table['online_share'] = (
    Transactions_table['online applications'] / Transactions_table['total_transactions']
).fillna(0)

Transactions_table['phone_share'] = (
    Transactions_table['phone applications'] / Transactions_table['total_transactions']
).fillna(0)

Transactions_table['in_person_share'] = (
    Transactions_table['in_person_apps'] / Transactions_table['total_transactions']
).fillna(0)

# METRIC 4-5C OMNICHANNEL OFFERINGS
# Count distinct omnichannel services per fiscal year
omni_count_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['service_id'].nunique()

# Count total distinct services per fiscal year
total_count_by_year = si.groupby('fiscal_yr')['service_id'].nunique()

# Calculate omnichannel service share
share_omni_by_year = (omni_count_by_year / total_count_by_year) * 100

# === Metric 5a: Online as a share of omnichannel usage ===
total_transactions_by_year_omni = filtered_si[filtered_si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()
sum_online_apps_by_year = filtered_si[filtered_si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_online'].sum()
share_online_by_year = (sum_online_apps_by_year / total_transactions_by_year_omni) * 100

# === Metric 5b: Phone as a share of omnichannel usage ===
sum_phone_by_year = filtered_si[filtered_si['omnichannel'] == 1].groupby('fiscal_yr')['phone_apps_inquiries'].sum()
share_phone_by_year = (sum_phone_by_year / total_transactions_by_year_omni) * 100

# === Metric 5c: In-person as a share of omnichannel usage ===
sum_in_person_by_year = filtered_si[filtered_si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_in_person'].sum()
share_in_person_by_year = (sum_in_person_by_year / total_transactions_by_year_omni) * 100

# === Reindex for consistent fiscal years ===
all_fiscal_years = Transactions_table['fiscal_year']

Transactions_table['Omnichannel_Service_Count'] = omni_count_by_year.reindex(all_fiscal_years).values
Transactions_table['Total_Service_Count'] = total_count_by_year.reindex(all_fiscal_years).values
Transactions_table['GC_services_with_omnichannel (%)'] = share_omni_by_year.reindex(all_fiscal_years).values

Transactions_table['Total_Transactions_Omnichannel'] = total_transactions_by_year_omni.reindex(all_fiscal_years).values
Transactions_table['omnichannel_online_apps'] = sum_online_apps_by_year.reindex(all_fiscal_years).values
Transactions_table['Online_Share_of_Omnichannel_Usage (%)'] = share_online_by_year.reindex(all_fiscal_years).values

Transactions_table['Omnichannel_phone_apps'] = sum_phone_by_year.reindex(all_fiscal_years).values
Transactions_table['Phone_Share_of_Omnichannel_Usage (%)'] = share_phone_by_year.reindex(all_fiscal_years).values

Transactions_table['omnichannel_inperson_apps'] = sum_in_person_by_year.reindex(all_fiscal_years).values
Transactions_table['In-Person_Share_of_Omnichannel_Usage (%)'] = share_in_person_by_year.reindex(all_fiscal_years).values

# METRIC 6-9 NUMBER OF External departments, programs, services, and high volume services
# METRIC 6: Number of External Departments 
no_departments_by_year = filtered_si.groupby('fiscal_yr')['department_en'].nunique()
no_departments_by_year = no_departments_by_year.reindex(Transactions_table['fiscal_year']).values
Transactions_table['external_departments'] = no_departments_by_year

# METRIC 7: Number of Programs 
no_programs_by_year = programs.groupby('fiscal_yr')['program_id'].nunique()
no_programs_by_year = no_programs_by_year.reindex(Transactions_table['fiscal_year']).values
Transactions_table['number_of_programs'] = no_programs_by_year

# METRIC 8: Number of External Services 
no_external_service_by_year = filtered_si.groupby('fiscal_yr')['service_id'].nunique()
no_external_service_by_year = no_external_service_by_year.reindex(Transactions_table['fiscal_year']).values
Transactions_table['external_services'] = no_external_service_by_year

# METRIC 9: Number of High Volume Services 
no_highvolume_services_by_year = filtered_si[filtered_si['highvolume'] == 1].groupby('fiscal_yr')['service_id'].nunique()
no_highvolume_services_by_year = no_highvolume_services_by_year.reindex(Transactions_table['fiscal_year']).values
Transactions_table['high_volume_services'] = no_highvolume_services_by_year

# METRIC 10-13 - total online, phone, in-person and mail transactions in millions
# the columns with this information already exist from metric 2-3c.

# METRIC 14-15 SHARE of external services online end-to-end and share of external services with at least one point online

filtered_si['onlineE2E'] = pd.to_numeric(filtered_si['onlineE2E'], errors='coerce')
filtered_si['onl_morepoints'] = pd.to_numeric(filtered_si['onl_morepoints'], errors='coerce')

# Define flags (external services which are online end-to-end and external services which have at least one point online)
filtered_si['onlinee2e'] = np.where((filtered_si['onlineE2E'] == 1), 1, 0)
filtered_si['one_point_onl'] = np.where((filtered_si['onl_morepoints'] == 1), 1, 0)
filtered_si['all_external'] = np.where(filtered_si['external'] == 1, 1, 0)

# Group by fiscal year and calculate the counts
online_services = filtered_si.groupby('fiscal_yr')[['onlinee2e', 'one_point_onl', 'all_external']].sum()

# Calculate percentages
online_services['pct_onlinee2e'] = (online_services['onlinee2e'] / online_services['all_external']) * 100
online_services['pct_one_point_onl'] = (online_services['one_point_onl'] / online_services['all_external']) * 100

# Align with Transactions_table fiscal years
online_services = online_services.reindex(Transactions_table['fiscal_year'])

# Add to Transactions_table
Transactions_table['service_onlineE2E_%'] = online_services['pct_onlinee2e'].values
Transactions_table['at_least_1point_onl_%'] = online_services['pct_one_point_onl'].values

# METRIC 16 external services standards meeting targets
# Create new column for standards meeting target
ss['standards_meeting_target'] = np.where((ss['target_met'] == 'Y') & (ss['external_service'] == 1), 1, 0)

# Create new column for total standards
ss['total_standards'] = np.where((ss['target_met'].notna()) & (ss['external_service'] == 1), 1, 0)

# Group by fiscal year and calculate the sum for each metric
standards_meeting_targets = ss.groupby('fiscal_yr')[['standards_meeting_target', 'total_standards']].sum()

# Calculate the percentage of standards meeting the target
standards_meeting_targets['pct_standards_meeting_target'] = (
    standards_meeting_targets['standards_meeting_target'] / standards_meeting_targets['total_standards']
) * 100

# Align with Transactions_table fiscal years
standards_meeting_targets = standards_meeting_targets.reindex(Transactions_table['fiscal_year'])

# Add to Transactions_table
Transactions_table['Standards_Meeting_Targets%'] = standards_meeting_targets['pct_standards_meeting_target'].values

# Final output
print(Transactions_table)

# Export the Transactions_table to a CSV file
Transactions_table.to_csv('Transactions_table.csv', index=True)

# Transpose the Transactions_table such that fiscal_year becomes the columns
Transactions_table_transposed = Transactions_table.set_index('fiscal_year').transpose()

# Final output: print the transposed table
print(Transactions_table_transposed)

# Export the transposed Transactions_table to a CSV file
Transactions_table_transposed.to_csv('Transactions_table_transposed.csv', index=True)



  fiscal_year  total_transactions  online applications  phone applications  \
0   2018-2019         421133885.0          174630378.0          53099291.0   
1   2019-2020         412398882.0          156831471.0          90704008.0   
2   2020-2021         386183263.0          202706431.0         137242208.0   
3   2021-2022         358629620.0          196235510.0         104413441.0   
4   2022-2023         464461678.0          268647250.0          72927098.0   
5   2023-2024         462845966.0          254970341.0          71119731.0   
6   2024-2025                 0.0                  0.0                 0.0   

   mail applications  in_person_apps  online_share  phone_share  \
0         42316481.0     118928362.0      0.414667     0.126086   
1         33221419.0     111052720.0      0.380291     0.219942   
2         19468112.0      15198693.0      0.524897     0.355381   
3         22377948.0      27955327.0      0.547182     0.291146   
4         31707932.0      78959836.0    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['onlineE2E'] = pd.to_numeric(filtered_si['onlineE2E'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['onl_morepoints'] = pd.to_numeric(filtered_si['onl_morepoints'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['onlinee2e'] = 

In [6]:
# METRIC 17 - 19
# contains the metric for services that are external and high volume services
# external, highvolume services online end-to-end, external, highvolume services with at least one point online and external, highvolume service standards meeting targets

# METRIC 17-18 EXTERNAL HIGH VOLUME PERCENTAGES 
# Create 'online_end2end': 1 if external == 1, onlineE2E == 1, and highvolume == 1
filtered_si['online_end2end'] = np.where(
    (filtered_si['external'] == 1) & (filtered_si['onlineE2E'] == 1) & (filtered_si['highvolume'] == 1), 
    1, 
    0
)

# Create 'at_least1point_onl': 1 if external == 1, onl_morepoints == 1, and highvolume == 1
filtered_si['at_least1point_onl'] = np.where(
    (filtered_si['external'] == 1) & (filtered_si['onl_morepoints'] == 1) & (filtered_si['highvolume'] == 1), 
    1, 
    0
)

# Create 'highvol_ser': 1 if external == 1 and highvolume == 1
filtered_si['highvol_ser'] = np.where(
    (filtered_si['external'] == 1) & (filtered_si['highvolume'] == 1), 
    1, 
    0
)

# Group by fiscal year and calculate sums for high volume external services
highvol_online_services = filtered_si.groupby('fiscal_yr')[['online_end2end', 'at_least1point_onl', 'highvol_ser']].sum().reset_index()

# Calculate percentages
highvol_online_services['online_end2end%'] = (highvol_online_services['online_end2end'] / highvol_online_services['highvol_ser']) * 100
highvol_online_services['at_least1point_onl%'] = (highvol_online_services['at_least1point_onl'] / highvol_online_services['highvol_ser']) * 100


# METRIC 19 EXTERNAL HIGH VOLUME SERVICES MEETING SERVICE STANDARDS

# Create new column for external standards meeting target with highvolume == 1
ss['standards_meeting_target'] = np.where(
    (ss['target_met'] == 'Y') & (ss['external_service'] == 1) & (ss['highvolume'] == 1), 
    1, 
    0
)

# Create new column for total standards with highvolume == 1
ss['total_standards'] = np.where(
    (ss['target_met'].notna()) & (ss['external_service'] == 1) & (ss['highvolume'] == 1), 
    1, 
    0
)

# Group by fiscal year and calculate the sum for each metric
highvol_standards_meeting_targets_table = ss.groupby('fiscal_yr')[['standards_meeting_target', 'total_standards']].sum().reset_index()

# Calculate the percentage of standards meeting the target
highvol_standards_meeting_targets_table['standards_meeting_target%'] = (
    highvol_standards_meeting_targets_table['standards_meeting_target'] / highvol_standards_meeting_targets_table['total_standards']
) * 100


# Merge the two tables into one final table

# Merge the high volume services percentages and standards meeting metrics on fiscal year
Highvolume_services_table = pd.merge(
    highvol_online_services,
    highvol_standards_meeting_targets_table[['fiscal_yr', 'standards_meeting_target%']],
    on='fiscal_yr',
    how='left'
)

# Display the final Highvolume_services_table
print(Highvolume_services_table)

# If you want to save the output to a CSV file
Highvolume_services_table.to_csv('Highvolume_services_table.csv', index=False)

# Export the final Highvolume_services_table to a CSV file
Highvolume_services_table.to_csv('Highvolume_services_table.csv', index=False)


   fiscal_yr  online_end2end  at_least1point_onl  highvol_ser  \
0  2018-2019              32                  84          117   
1  2019-2020              44                  96          126   
2  2020-2021              59                  95          123   
3  2021-2022              63                 106          134   
4  2022-2023              50                  89          116   
5  2023-2024              55                  88          117   
6  2024-2025               0                   0            0   

   online_end2end%  at_least1point_onl%  standards_meeting_target%  
0        27.350427            71.794872                  63.522013  
1        34.920635            76.190476                  73.195876  
2        47.967480            77.235772                  51.811594  
3        47.014925            79.104478                  48.165138  
4        43.103448            76.724138                  66.784452  
5        47.008547            75.213675                  67.36111

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['online_end2end'] = np.where(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['at_least1point_onl'] = np.where(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_si['highvol_ser'] = np.where(
