In [1]:
# setting my si data frame
import pandas as pd
data = '/workspaces/service-data/outputs/si.csv'
si = pd.read_csv(data, sep=';')
programid = '/workspaces/service-data/inputs/serv_prog.csv'
programs = pd.read_csv(programid, sep=',')
from datetime import datetime
import pytz
import numpy as np

# Specify the timezone
timezone = pytz.timezone("America/Montreal")

# Get the current date and time in the specified timezone
current_date = datetime.now(timezone)

# Format the current date and time into the desired string format
current_datestr = current_date.strftime("%Y-%m-%d_%H:%M:%S")

# Print the current date and time
print(f"current date: {current_datestr}")


current date: 2025-04-15_14:06:00


In [2]:
# to add a new column for phone apps inquiries
# phone apps inquiries are num_phone_enquiries plus num_applications_by_phone
# based on error received 'can only concatenate str (not int) to string

# Columns to convert to numeric and fill NaNs with 0
cols_to_numeric = [
    'num_phone_enquiries',
    'num_applications_by_phone',
    'num_applications_by_email',
    'num_applications_by_fax',
    'num_applications_by_mail',
    'num_applications_by_other',
    'num_applications_in_person',
    'num_applications_online'
]

# Convert to numeric and fill NaNs with 0
si[cols_to_numeric] = si[cols_to_numeric].apply(pd.to_numeric, errors='coerce').fillna(0)

# Create 'phone_apps_inquiries' column (sum of phone enquiries + phone applications)
si['phone_apps_inquiries'] = si['num_phone_enquiries'] + si['num_applications_by_phone']

# Create 'total_transactions' by summing all relevant application methods
si['total_transactions'] = (
    si['num_applications_by_email'] +
    si['num_applications_by_fax'] +
    si['num_applications_by_mail'] +
    si['num_applications_by_other'] +
    si['num_applications_in_person'] +
    si['num_applications_online'] +
    si['phone_apps_inquiries']
)

In [3]:
# adding a new column for applications done by phone, online and in person only
si['apps_online_and_per'] = (
    si['num_applications_in_person'].fillna(0) + 
    si['num_applications_online'].fillna(0) + 
    si['phone_apps_inquiries'].fillna(0)
)

# adding a new column for omnichannels
si['omnichannel'] = si.apply(
    lambda row: 1 if pd.notna(row['phone_apps_inquiries']) and pd.notna(row['num_applications_online']) and pd.notna(row['num_applications_in_person']) else 0, 
    axis=1
)

# adding a new column for external
si['external'] = si['service_scope'].str.contains('EXTERN', na=False).astype(int)
#convert the column to numeric 
si['external'] = pd.to_numeric(si['external'], errors='coerce')

# adding a new column for high volume services
si['highvolume'] = (si['total_transactions'] >= 45000).astype(int)

# adding a new column for online enabled Y
# creating columns to check which lists out the columns from os_account_registration to os_issue_resolution_feedback
columns_to_check = [ 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback']
si['online_enabledY'] = si[columns_to_check].apply(lambda row: (row == 'Y').sum(), axis=1)

# adding column for online enabled N
si['online_enabledN'] = si[columns_to_check].apply(lambda row: (row == 'N').sum(), axis=1)

# adding column for online enabled NA
si['online_enabledNA'] = si[columns_to_check].isna().sum(axis=1)

# adding a new column for online end to end
si['onlineE2E'] = (
    si.apply(lambda row: "0" if row['online_enabledNA'] == 6 
    else "1" if row['online_enabledY'] + row['online_enabledNA'] == 6 
    else "0", axis=1)
    )

# adding a new column for online one or more points
si['onl_morepoints'] = si['online_enabledY'].apply(lambda x: '1' if x >= 1 else '0')

In [4]:
# Importing the service standards data
ss_data = '/workspaces/service-data/outputs/ss.csv'
ss = pd.read_csv(ss_data, sep=';')

# Adding column for services with standards and standards met
# Grouping and summarizing the 'ss' DataFrame
ss_count = ss.groupby(['service_id', 'fiscal_yr']).agg(
    standards_count=('service_id', 'size'),  # Count the occurrences
    standards_met=('target_met', lambda x: (x == 'Y').sum())  # Count where target_met is 'Y'
).reset_index()

# Merging the 'ss_count' DataFrame with the 'si' DataFrame
si = si.merge(ss_count, on=['service_id', 'fiscal_yr'], how='left')

# Replacing NaN values in 'standards_count' and 'standards_met' with 0
si['standards_count'] = si['standards_count'].fillna(0)
si['standards_met'] = si['standards_met'].fillna(0)

# Adding a new column for services that met at least one standard
si['STDS_metsome'] = (si['standards_met'] >= 1).astype(int)

# Creating the FYSID column by merging fiscal_yr and service_id columns
si['FYSID'] = si['fiscal_yr'].astype(str) + si['service_id'].astype(str)

# --- MODIFIED SECTION BELOW ---

# Merge the 'ss' DataFrame with 'external' and 'highvolume' from 'si' using 'fy_org_id_service_id'
merged_df = pd.merge(
    ss, 
    si[['fy_org_id_service_id', 'external', 'highvolume']], 
    on='fy_org_id_service_id', 
    how='left'
)

# Create the 'external_service' column based on the merged data
merged_df['external_service'] = (merged_df['external'] == 1).astype(int)

# Add both 'external_service' and 'highvolume' columns back into the original 'ss' DataFrame
ss['external_service'] = merged_df['external_service']
ss['highvolume'] = merged_df['highvolume']


In [5]:
#BEGIN DATAPACK METRICS

In [5]:
# METRIC 2-3c TOTAL TRANSACTIONS (Filtered for 'external' == 1)

# METRIC 2-3c TOTAL TRANSACTIONS (Filtered for 'external' == 1)

# Filter the si DataFrame where 'external' == 1
filtered_si = si[si['external'] == 1]

# Group by fiscal_yr and sum the numeric columns
grouped = filtered_si.groupby('fiscal_yr')[[
    'total_transactions', 
    'num_applications_online', 
    'phone_apps_inquiries', 
    'num_applications_in_person'
]].sum().reset_index()

# Rename the columns
Transactions_table = grouped.rename(columns={
    'fiscal_yr': 'fiscal_year',
    'num_applications_online': 'online applications',
    'phone_apps_inquiries': 'phone applications',
    'num_applications_in_person': 'in_person_apps'
})

# Add share columns
Transactions_table['online_share'] = (
    Transactions_table['online applications'] / Transactions_table['total_transactions']
).fillna(0)

Transactions_table['phone_share'] = (
    Transactions_table['phone applications'] / Transactions_table['total_transactions']
).fillna(0)

Transactions_table['in_person_share'] = (
    Transactions_table['in_person_apps'] / Transactions_table['total_transactions']
).fillna(0)

print(Transactions_table)


# code to verify the Transactions_table is valid (uncomment to run)
# Sum of 'total_transactions' for fiscal_year '2023-2024'
# total_transactions_sum = Transactions_table[Transactions_table['fiscal_year'] == '2023-2024']['total_transactions'].sum()

# Print the result
# print(f"Total Transactions for fiscal year 2023-2024: {total_transactions_sum}")


  fiscal_year  total_transactions  online applications  phone applications  \
0   2018-2019         421133885.0          174630378.0          53099291.0   
1   2019-2020         412398882.0          156831471.0          90704008.0   
2   2020-2021         386183263.0          202706431.0         137242208.0   
3   2021-2022         358629620.0          196235510.0         104413441.0   
4   2022-2023         464461678.0          268647250.0          72927098.0   
5   2023-2024         462845966.0          254970341.0          71119731.0   
6   2024-2025                 0.0                  0.0                 0.0   

   in_person_apps  online_share  phone_share  in_person_share  
0     118928362.0      0.414667     0.126086         0.282400  
1     111052720.0      0.380291     0.219942         0.269285  
2      15198693.0      0.524897     0.355381         0.039356  
3      27955327.0      0.547182     0.291146         0.077950  
4      78959836.0      0.578406     0.157014         0.

In [16]:
print(si['omnichannel'].value_counts())


omnichannel
1    8934
Name: count, dtype: int64


In [15]:
# === Metric 4: Share of GC services with omnichannel offerings ===

# Group by fiscal year and calculate the number of distinct service_ids with omnichannel offerings (where omnichannel == 1)
num_omni_services = si[si['omnichannel'] == 1].groupby('fiscal_yr')['service_id'].nunique()

# Group by fiscal year and calculate the total number of distinct service_ids in the entire dataset
total_services = si.groupby('fiscal_yr')['service_id'].nunique()

# Calculate the share of omnichannel services as a percentage for each fiscal year
GC_omnichannel_share = (num_omni_services / total_services) * 100

# === Metric 5a: Online as a share of omnichannel usage ===
total_transactions_by_year_omni = si[si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()
sum_online_apps_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_online'].sum()
share_online_by_year = (sum_online_apps_by_year / total_transactions_by_year_omni) * 100

# === Metric 5b: Phone as a share of omnichannel usage ===
sum_phone_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['phone_apps_inquiries'].sum()
share_phone_by_year = (sum_phone_by_year / total_transactions_by_year_omni) * 100

# === Metric 5c: In-person as a share of omnichannel usage ===
sum_in_person_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_in_person'].sum()
share_in_person_by_year = (sum_in_person_by_year / total_transactions_by_year_omni) * 100

# === Combine all metrics into a single DataFrame ===
combined_table = pd.DataFrame({
    'Fiscal_Year': GC_omnichannel_share.index,
    'Omnichannel_Service_Count': num_omni_services.reindex(GC_omnichannel_share.index).values,
    'Total_Service_Count': total_services.reindex(GC_omnichannel_share.index).values,
    'GC_omnichannel_share (%)': GC_omnichannel_share.values,
    
    'Total_Transactions_Omnichannel': total_transactions_by_year_omni.reindex(GC_omnichannel_share.index).values,
    'Online_Applications': sum_online_apps_by_year.reindex(GC_omnichannel_share.index).values,
    'Online_Share_of_Omnichannel (%)': share_online_by_year.reindex(GC_omnichannel_share.index).values,
    
    'Phone_Inquiries': sum_phone_by_year.reindex(GC_omnichannel_share.index).values,
    'Phone_Share_of_Omnichannel (%)': share_phone_by_year.reindex(GC_omnichannel_share.index).values,
    
    'In_Person_Applications': sum_in_person_by_year.reindex(GC_omnichannel_share.index).values,
    'In_Person_Share_of_Omnichannel (%)': share_in_person_by_year.reindex(GC_omnichannel_share.index).values
})

# Display the resulting table
print(combined_table)


                     Fiscal_Year  Omnichannel_Service_Count  \
0                      2018-2019                       1244   
1                      2019-2020                       1282   
2                      2020-2021                       1426   
3                      2021-2022                       1610   
4                      2022-2023                       1690   
5                      2023-2024                       1680   
6                      2024-2025                          1   
7  Timestamp:2025-04-01_14:23:25                          0   

   Total_Service_Count  GC_omnichannel_share (%)  \
0                 1244                     100.0   
1                 1282                     100.0   
2                 1426                     100.0   
3                 1610                     100.0   
4                 1690                     100.0   
5                 1680                     100.0   
6                    1                     100.0   
7               

In [8]:

omnichannel_metric.to_csv('omnichannel_metric.csv', index=True)

In [9]:
# METRIC 6-9 
# 6 number of departments
# 7 number of programs
# 8 number of external serices
# 9 high volume services
# the 'filtered_si' table can be used to derive these metrics hence, no further table needs to be generated (except if told otherwise)
# the code below verifies that the filtered_si table can be used to derive the metrics (#uncomment to run)

# Number of unique departments for external services in fiscal year 2023-2024
# no_departments = filtered_si[(filtered_si['fiscal_yr'] == '2023-2024')]['department_en'].nunique()
# print(no_departments)

# Number of unique service IDs for external services in fiscal year 2023-2024
# no_external_service = filtered_si[(filtered_si['fiscal_yr'] == '2023-2024')]['service_id'].nunique()
# print(no_external_service)

# Number of unique high volume service IDs for external services in fiscal year 2023-2024
# no_highvolume_services = filtered_si[(filtered_si['highvolume'] == 1) & (filtered_si['fiscal_yr'] == '2023-2024')]['service_id'].nunique()
# print(no_highvolume_services)


In [10]:
# METRIC 10-13 EXTERNAL TRANSACTIONS
# Create new columns 'online_transactions, phone in-person, and mail transactions based on the condition in 'external'
si['online_transactions'] = np.where(si['external'] == 1, si['num_applications_online'], 0)
si['phone_transactions'] = np.where(si['external'] == 1, si['phone_apps_inquiries'], 0)
si['in_person_transactions'] = np.where(si['external'] == 1, si['num_applications_in_person'], 0)
si['mail_transactions'] = np.where(si['external'] == 1, si['num_applications_by_mail'], 0)


# Select the required columns from the si DataFrame
External_transactions = si[['fiscal_yr', 'online_transactions', 'phone_transactions', 'in_person_transactions', 'mail_transactions']]
print(External_transactions)

# code to check the external trabsactions table is valid, summing the online transactions for 2023-2024 (uncomment to run)
# Sum 'online_transactions' where 'fiscal_yr' is 2023-2024
# online_transactions_sum = External_transactions[External_transactions['fiscal_yr'] == '2023-2024']['online_transactions'].sum()

# Print the result
# print("Total Online Transactions for fiscal year 2023-2024:", online_transactions_sum)


                          fiscal_yr  online_transactions  phone_transactions  \
0                         2018-2019                  0.0                 0.0   
1                         2022-2023             276390.0           7252346.0   
2                         2018-2019                  0.0           2405999.0   
3                         2019-2020              10287.0           6285124.0   
4                         2020-2021              43052.0           7205180.0   
...                             ...                  ...                 ...   
8929                      2023-2024                  0.0              1320.0   
8930                      2023-2024                770.0                 0.0   
8931                      2023-2024                  0.0                 0.0   
8932                      2023-2024                230.0                 0.0   
8933  Timestamp:2025-04-01_14:23:25                  0.0                 0.0   

      in_person_transactions  mail_tran

In [11]:
# METRIC 14-15 EXTERNAL PERCENTAGES
# services online end to end
import numpy as np  
si['onlineE2E'] = pd.to_numeric(si['onlineE2E'], errors='coerce')
si['onl_morepoints'] = pd.to_numeric(si['onl_morepoints'], errors='coerce')

# Create 'online' column: reports 1 if 'external' == 1 and 'onlineE2E' == 1, else 0
si['onlinee2e'] = np.where((si['external'] == 1) & (si['onlineE2E'] == 1), 1, 0)

#services with at least one point online
# Create 'one_point_onl' column: reports 1 if 'external' == 1 and 'onl_morepoints' == 1, else 0
si['one_point_onl'] = np.where((si['external'] == 1) & (si['onl_morepoints'] == 1), 1, 0)

# Create a new column 'all_external' that reports 1 if 'external' == 1, else 0
si['all_external'] = np.where(si['external'] == 1, 1, 0)

# Group by fiscal year and calculate sums
online_services = si.groupby('fiscal_yr')[['onlinee2e', 'one_point_onl', 'all_external']].sum().reset_index()

# Calculate percentages
online_services['pct_onlinee2e'] = (online_services['onlinee2e'] / online_services['all_external']) * 100
online_services['pct_one_point_onl'] = (online_services['one_point_onl'] / online_services['all_external']) * 100

# code to check that it is accurate sums the number of online points and online one or more points for 2023-24. (uncomment to run)
# Sum the 'online' column for fiscal_yr '2023-2024'
# online_sum = si[si['fiscal_yr'] == '2023-2024']['online'].sum()

# Sum the 'one_point_onl' column for fiscal_yr '2023-2024'
# one_point_onl_sum = si[si['fiscal_yr'] == '2023-2024']['one_point_onl'].sum()

# Sum the 'all_external' column for fiscal_yr '2023-2024'
# all_external_sum_2023_2024 = si[si['fiscal_yr'] == '2023-2024']['all_external'].sum()

# Print the results
# print(f"Sum of 'online' for fiscal year 2023-2024: {online_sum}")
# print(f"Sum of 'one_point_onl' for fiscal year 2023-2024: {one_point_onl_sum}")
# print(f"Sum of 'all_external' for fiscal year 2023-2024: {all_external_sum_2023_2024}")

# Display the new table
print(online_services)

                       fiscal_yr  onlinee2e  one_point_onl  all_external  \
0                      2018-2019        212            540          1132   
1                      2019-2020        217            567          1181   
2                      2020-2021        296            645          1304   
3                      2021-2022        319            682          1482   
4                      2022-2023        344            697          1560   
5                      2023-2024        375            712          1559   
6                      2024-2025          0              0             1   
7  Timestamp:2025-04-01_14:23:25          0              0             0   

   pct_onlinee2e  pct_one_point_onl  
0      18.727915          47.703180  
1      18.374259          48.010161  
2      22.699387          49.463190  
3      21.524966          46.018893  
4      22.051282          44.679487  
5      24.053881          45.670301  
6       0.000000           0.000000  
7          

In [12]:
# METRIC 16 EXTERNAL SERVICES MEETING TARGETS
# Standards Meeting Targets
# create new column for standards meeting target
ss['standards_meeting_target'] = np.where((ss['target_met'] == 'Y') & (ss['external_service'] == 1), 1, 0)

# total standards
# create new column for total standards
ss['total_standards'] = np.where((ss['target_met'].notna()) & (ss['external_service'] == 1), 1, 0)

# Group by fiscal year and calculate the sum for each metric
standards_meeting_targets_table = ss.groupby('fiscal_yr')[['standards_meeting_target', 'total_standards']].sum().reset_index()

# Calculate the percentage of standards meeting the target
standards_meeting_targets_table['pct_standards_meeting_target'] = (standards_meeting_targets_table['standards_meeting_target'] / standards_meeting_targets_table['total_standards']) * 100

# Display the resulting table
print(standards_meeting_targets_table)

# code to check that percentage of standards meeting target can be derived from table (uncomment to run)
# Sum the 'standards_meeting_target' column where 'fiscal_yr' is 2023-2024
# standards_meeting_target_sum = standards_meeting_targets_table[standards_meeting_targets_table['fiscal_yr'] == '2023-2024']['standards_meeting_target'].sum()

# Sum the 'total_standards' column where 'fiscal_yr' is 2023-2024
# total_standards_sum = standards_meeting_targets_table[standards_meeting_targets_table['fiscal_yr'] == '2023-2024']['total_standards'].sum()

# Print the results
# print("Total Standards Meeting Target for fiscal year 2023-2024:", standards_meeting_target_sum)
# print("Total Standards for fiscal year 2023-2024:", total_standards_sum)


                       fiscal_yr  standards_meeting_target  total_standards  \
0                      2018-2019                       832             1210   
1                      2019-2020                       946             1319   
2                      2020-2021                      1236             1713   
3                      2021-2022                      1178             1747   
4                      2022-2023                      1315             1909   
5                      2023-2024                      1228             1674   
6  Timestamp:2025-04-01_14:23:25                         0                0   

   pct_standards_meeting_target  
0                     68.760331  
1                     71.721001  
2                     72.154116  
3                     67.429880  
4                     68.884233  
5                     73.357228  
6                           NaN  


In [13]:
# METRIC 17-18 EXTERNAL HIGH VOLUME PERCENTAGES

# Create 'highvol_onlinee2e': 1 if external == 1, onlineE2E == 1, and highvolume == 1
si['highvol_onlinee2e'] = np.where(
    (si['external'] == 1) & (si['onlineE2E'] == 1) & (si['highvolume'] == 1), 
    1, 
    0
)

# Create 'highvol_one_point_onl': 1 if external == 1, onl_morepoints == 1, and highvolume == 1
si['highvol_one_point_onl'] = np.where(
    (si['external'] == 1) & (si['onl_morepoints'] == 1) & (si['highvolume'] == 1), 
    1, 
    0
)

# Create 'highvol_all_external': 1 if external == 1 and highvolume == 1
si['highvol_all_external'] = np.where(
    (si['external'] == 1) & (si['highvolume'] == 1), 
    1, 
    0
)

# Group by fiscal year and calculate sums
highvol_online_services = si.groupby('fiscal_yr')[['highvol_onlinee2e', 'highvol_one_point_onl', 'highvol_all_external']].sum().reset_index()

# Calculate percentages
highvol_online_services['pct_highvol_onlinee2e'] = (highvol_online_services['highvol_onlinee2e'] / highvol_online_services['highvol_all_external']) * 100
highvol_online_services['pct_highvol_one_point_onl'] = (highvol_online_services['highvol_one_point_onl'] / highvol_online_services['highvol_all_external']) * 100

print(highvol_online_services)



                       fiscal_yr  highvol_onlinee2e  highvol_one_point_onl  \
0                      2018-2019                 32                     84   
1                      2019-2020                 44                     96   
2                      2020-2021                 59                     95   
3                      2021-2022                 63                    106   
4                      2022-2023                 50                     89   
5                      2023-2024                 55                     88   
6                      2024-2025                  0                      0   
7  Timestamp:2025-04-01_14:23:25                  0                      0   

   highvol_all_external  pct_highvol_onlinee2e  pct_highvol_one_point_onl  
0                   117              27.350427                  71.794872  
1                   126              34.920635                  76.190476  
2                   123              47.967480                  77.23

In [14]:
# METRIC 19 EXTERNAL HIGH VOLUME SERVICES MEETING SERVICE STANDARDS


# Create new column for external standards meeting target with highvolume == 1
ss['highvol_standards_meeting_target'] = np.where(
    (ss['target_met'] == 'Y') & (ss['external_service'] == 1) & (ss['highvolume'] == 1), 
    1, 
    0
)

# Create new column for total standards with highvolume == 1
ss['total_highvol_standards'] = np.where(
    (ss['target_met'].notna()) & (ss['external_service'] == 1) & (ss['highvolume'] == 1), 
    1, 
    0
)

# Group by fiscal year and calculate the sum for each metric
highvol_standards_meeting_targets_table = ss.groupby('fiscal_yr')[['highvol_standards_meeting_target', 'total_highvol_standards']].sum().reset_index()

# Calculate the percentage of standards meeting the target
highvol_standards_meeting_targets_table['pct_highvol_standards_meeting_target'] = (
    highvol_standards_meeting_targets_table['highvol_standards_meeting_target'] / highvol_standards_meeting_targets_table['total_highvol_standards']
) * 100

# Display the resulting table
print(highvol_standards_meeting_targets_table)


                       fiscal_yr  highvol_standards_meeting_target  \
0                      2018-2019                               101   
1                      2019-2020                               142   
2                      2020-2021                               143   
3                      2021-2022                               105   
4                      2022-2023                               189   
5                      2023-2024                               194   
6  Timestamp:2025-04-01_14:23:25                                 0   

   total_highvol_standards  pct_highvol_standards_meeting_target  
0                      159                             63.522013  
1                      194                             73.195876  
2                      276                             51.811594  
3                      218                             48.165138  
4                      283                             66.784452  
5                      288           