In [1]:
import pandas as pd
import numpy as np
import re, pytz, os, requests, sys
from pathlib import Path
from datetime import datetime
import sys
sys.path.append("/workspaces/service-data")

from src.clean import clean_percentage, clean_fiscal_yr, normalize_string, standardize_column_names
from src.load import load_csv
from src.export import export_to_csv
from src.merge import merge_si, merge_ss

base_dir = Path.cwd()
parent_dir = base_dir.parent

In [2]:
si = pd.read_csv("https://github.com/gcperformance/service-data/releases/latest/download/2025-03-01_si.csv",
                keep_default_na=False, 
                na_values='', 
                delimiter=';',
                engine='python',
                skipfooter=2
                )


ss = pd.read_csv("https://github.com/gcperformance/service-data/releases/latest/download/2025-03-01_ss.csv", 
                keep_default_na=False, 
                na_values='', 
                delimiter=';',
                engine='python',
                skipfooter=2
                )

In [4]:
si

Unnamed: 0,fiscal_yr,org_id,service_id,service_name_en,service_name_fr,service_description_en,service_description_fr,service_type,service_recipient_type,service_scope,...,service_uri_fr,num_applications_total,org_name_variant,department_en,department_fr,program_id,automated_decision_system_description_fr,automated_decision_system,automated_decision_system_description_en,fy_org_id_service_id
0,2018-2019,129,1000,Reconciliation,Réconciliation,Advancing the whole of Government reconciliati...,Promouvoir une approche pangouvernementale rel...,INFO,CLIENT,"ENTERPRISE, EXTERN",...,https://www.rcaanc-cirnac.gc.ca/fra/1499711968...,0.0,Crown-Indigenous Relations and Northern Affair...,Crown-Indigenous Relations and Northern Affair...,Relations Couronne-Autochtones et Affaires du ...,BWM06,,,,2018-2019_129_1000
1,2022-2023,128,1001,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,The Old Age Security (OAS) pension is a monthl...,La pension de la Sécurité de la vieillesse (SV...,RES,CLIENT,EXTERN,...,https://www.canada.ca/fr/services/prestations/...,3286418.0,Employment and Social Development Canada,Employment and Social Development Canada,Emploi et Développement social Canada,BGN01,,,,2022-2023_128_1001
2,2018-2019,128,1001,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,The Old Age Security (OAS) pension is a monthl...,La pension de la Sécurité de la vieillesse (SV...,RES,CLIENT,EXTERN,...,https://www.canada.ca/fr/services/prestations/...,2948447.0,Employment and Social Development Canada,Employment and Social Development Canada,Emploi et Développement social Canada,BGN01,,,,2018-2019_128_1001
3,2019-2020,128,1001,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,The Old Age Security (OAS) pension is a monthl...,La pension de la Sécurité de la vieillesse (SV...,RES,CLIENT,EXTERN,...,https://www.canada.ca/fr/services/prestations/...,3154536.0,Employment and Social Development Canada,Employment and Social Development Canada,Emploi et Développement social Canada,BGN01,,,,2019-2020_128_1001
4,2020-2021,128,1001,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,The Old Age Security (OAS) pension is a monthl...,La pension de la Sécurité de la vieillesse (SV...,RES,CLIENT,EXTERN,...,https://www.canada.ca/fr/services/prestations/...,3217790.0,Employment and Social Development Canada,Employment and Social Development Canada,Emploi et Développement social Canada,BGN01,,,,2020-2021_128_1001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8929,2023-2024,139,1273,War Veterans Allowance,Allocation aux anciens combattants,The War Veterans Allowance (WVA) is in recogni...,L’allocation aux anciens combattants (AAC) est...,RES,CLIENT,EXTERN,...,https://www.veterans.gc.ca/fra/financial-suppo...,174.0,vac-acc,Veterans Affairs Canada,Anciens Combattants Canada,BWI11,S.O.,N,,2023-2024_139_1273
8930,2023-2024,333,53,Review and Appeal hearings,Audiences de révision et d'appel,The independent avenue of appeal for disabilit...,Voie d’appel indépendante à l’égard des décisi...,RES,CLIENT,EXTERN,...,https://www.vrab-tacra.gc.ca/fr,0.0,vrab-tacra,Veterans Review and Appeal Board,Tribunal des anciens combattants (révision et ...,BWL01,,N,,2023-2024_333_53
8931,2023-2024,246,2104,Women's Program,Programme de promotion de la femme,The purpose of the Women’s Program is to advan...,Le Programme de promotion de la femme vise à f...,GNC,SOCIETY,EXTERN,...,https://www.canada.ca/fr/femmes-egalite-genres...,772.0,wage,Women and Gender Equality Canada,Femmes et Égalité des genres Canada,BXR02,,N,,2023-2024_246_2104
8932,2023-2024,246,2105,Gender-Based Violence Program,Programme de financement de la lutte contre la...,The Gender-Based Violence (GBV) Program takes ...,Les mesures du Programme de financement de la ...,GNC,SOCIETY,EXTERN,...,https://www.canada.ca/fr/femmes-egalite-genres...,1.0,wage,Women and Gender Equality Canada,Femmes et Égalité des genres Canada,BXR02,,N,,2023-2024_246_2105


In [3]:
# === DATA PACK ===
# Set up dataframes and columns for data pack analysis

# Define high-volume threshold
HIGH_VOLUME_THRESHOLD = 45000

# Define online interaction point (OIP) columns
OIP_COLS = [
    'os_account_registration',
    'os_authentication',
    'os_application',
    'os_decision',
    'os_issuance',
    'os_issue_resolution_feedback',
]

# Define application volume columns
APP_COLS = [
    'num_applications_by_phone', 
    'num_applications_online', 
    'num_applications_in_person', 
    'num_applications_by_mail', 
    'num_applications_by_email', 
    'num_applications_by_fax', 
    'num_applications_by_other',
    'num_applications_total',
    'num_phone_enquiries',
]

# Set si_dp and ss_dp as working DataFrames
si_dp = si.copy()
ss_dp = ss.copy()

# Create a new column to identify 'omnichannel' services
# Omnichannel = phone, online, and in-person applications are applicable
# Note that in previous versions calculated in excel sheets the omnichannel boolean
# was mistakenly ignoring phone channels. This adjustment ('or' on phones, 'and' with online & in person)
# was defined in May 2025.
def is_filled(col):
    return col.notna() & (col.astype(str).str.strip() != '') & (col != 'NA') & (col != 'ND')

si_dp['omnichannel'] = (
    (is_filled(si_dp['num_phone_enquiries']) | is_filled(si_dp['num_applications_by_phone'])) &
    is_filled(si_dp['num_applications_online']) &
    is_filled(si_dp['num_applications_in_person'])
)

# Convert all application volume columns to numeric
si_dp[APP_COLS] = si_dp[APP_COLS].apply(pd.to_numeric, errors='coerce').fillna(0)

# Create 'num_phone_apps_enquiries' column (sum of phone enquiries + phone applications)
si_dp['num_phone_apps_enquiries'] = si_dp['num_phone_enquiries'] + si_dp['num_applications_by_phone']

# Create 'num_transactions_total' by summing all application channels and phone enquiries
si_dp['num_transactions_total'] = si_dp['num_applications_total'] + si_dp['num_phone_enquiries']  

# Create a new column to identify external services
si_dp['external'] = si_dp['service_scope'].str.contains('EXTERN', na=False)

# Create a new column to identify high-volume services
si_dp['highvolume'] = si_dp['num_transactions_total'] >= HIGH_VOLUME_THRESHOLD

# Count online interaction point statuses
si_dp['online_enabled_Y'] = si_dp[OIP_COLS].apply(lambda row: (row == 'Y').sum(), axis=1)
si_dp['online_enabled_N'] = si_dp[OIP_COLS].apply(lambda row: (row == 'N').sum(), axis=1)
si_dp['online_enabled_NA'] = si_dp[OIP_COLS].apply(lambda row: (row == 'NA').sum(), axis=1)

# Define total expected interaction points
TOTAL_POINTS = len(OIP_COLS)

# Determine if service is fully online end-to-end
si_dp['online_e2e'] = (
    (si_dp['online_enabled_Y'] + si_dp['online_enabled_NA'] == TOTAL_POINTS) &
    (si_dp['online_enabled_Y'] > 0)
)

# Identify services with at least one online-enabled point
si_dp['min_one_point_online'] = si_dp['online_enabled_Y'] > 0

# Identify service standards for external services
ss_dp = ss_dp.merge(
    si_dp[['fy_org_id_service_id','external', 'highvolume']],
    how = 'left',
    on='fy_org_id_service_id'
)

ss_dp['external'] = ss_dp['external'].fillna(False)
ss_dp['highvolume'] = ss_dp['highvolume'].fillna(False)
ss_dp['target_met'] = ss_dp['target_met'].fillna('NA')

# === DATA PACK METRICS 2, 3a, 3b, 3c: external/enterprise services ===
# 2: Total number of transactions
dp_metrics = si_dp.copy().groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique',
        'num_transactions_total': 'sum',
        'num_applications_online': 'sum',
        'num_phone_apps_enquiries': 'sum',
        'num_applications_in_person': 'sum',
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'})

# 3: Shares by channel
dp_metrics['online_percentage'] = dp_metrics['num_applications_online'] / dp_metrics['num_transactions_total']
dp_metrics['phone_percentage'] = dp_metrics['num_phone_apps_enquiries'] / dp_metrics['num_transactions_total']
dp_metrics['in-person_percentage'] = dp_metrics['num_applications_in_person'] / dp_metrics['num_transactions_total']


# === DATA PACK METRICS 4, 5a, 5b, 5c: omnichannel services ===
# 4: Share of services that are omnichannel
# 5: Shares by channel for omnichannel services

dp_filtered = si_dp[si_dp['omnichannel']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique',
        'num_transactions_total': 'sum',
        'num_applications_online': 'sum',
        'num_phone_apps_enquiries': 'sum',
        'num_applications_in_person': 'sum'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_omni')
)

dp_metrics['omni_service_percentage'] = dp_metrics['total_services_omni'] / dp_metrics['total_services']

dp_metrics['omni_online_percentage'] = dp_metrics['num_applications_online_omni'] / dp_metrics['num_transactions_total_omni']
dp_metrics['omni_phone_percentage'] = dp_metrics['num_phone_apps_enquiries_omni'] / dp_metrics['num_transactions_total_omni']
dp_metrics['omni_in-person_percentage'] = dp_metrics['num_applications_in_person_omni'] / dp_metrics['num_transactions_total_omni']


# === DATA PACK METRIC 6, 8, 10, 11, 12, 13: external services ===
# 6: Number of departments delivering external services
# 8: Number of external services
# 10: Total online transactions for external services
# 11: Total phone transactions for external services
# 12: Total in person transactions for external services
# 13: Total mail transactions for external services
# si_dp_ext = si_dp[si_dp['external']].copy()

# Services excluded by convention or decision:
# 669: Traveller / Highway traveller processing - decision prior to our arrival
# 1677: The Canadian Astronomy Data Centre (CADC) - too many online apps
EXCLUDED_SERVICES_ID = ['669', '1677']

dp_filtered = si_dp[si_dp['external']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'org_id': 'nunique',
        'fy_org_id_service_id': 'nunique',
        'num_applications_online':'sum', 
        'num_applications_in_person':'sum',
        'num_phone_apps_enquiries':'sum',
        'num_applications_by_mail':'sum'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services', 'org_id': 'total_orgs_ext'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext')
)

dp_filtered = si_dp[si_dp['external'] & ~si_dp['service_id'].isin(EXCLUDED_SERVICES_ID)]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'num_applications_online':'sum', # remove 1677
        'num_applications_in_person':'sum', # remove 669
    }).reset_index(),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_excl_services')
)

# === DATA PACK METRIC 7 ===
# 7: External programs
dp_filtered = si_dp[si_dp['external']].loc[:, ['service_id', 'fiscal_yr', 'program_id', 'org_id']].copy()
dp_programs = dp_filtered.copy()

dp_programs['program_id'] = dp_programs['program_id'].astype(str).str.split(',')
dp_programs = dp_programs.explode('program_id')
dp_programs = dp_programs[dp_programs['program_id'].notna()]
dp_programs['program_id'] = dp_programs['program_id'].str.strip()

dp_filtered = dp_programs
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'program_id':'nunique'
    }).reset_index().rename(columns={'program_id':'total_programs_ext'}),
    on='fiscal_yr',
    how='left'
)


# === DATA PACK METRIC 14 ===
# 14: Share of external services that are online end-to-end
dp_filtered = si_dp[si_dp['external'] & si_dp['online_e2e']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_online')
)

dp_metrics['ext_online_service_percentage'] = dp_metrics['total_services_ext_online']/dp_metrics['total_services_ext']


# === DATA PACK METRIC 15 ===
# 15: Share of external services that have at least one online interaction point activated
dp_filtered = si_dp[si_dp['external'] & si_dp['min_one_point_online']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_1oip')
)

dp_metrics['ext_1oip_service_percentage'] = dp_metrics['total_services_ext_1oip']/dp_metrics['total_services_ext']


# === DATA PACK METRIC 16 ===
# 16: Share of external service standards meeting target
dp_filtered = ss_dp[ss_dp['external'] & ((ss_dp['target_met']=='Y') | (ss_dp['target_met']=='N'))]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id_std_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id_std_id': 'total_standards_ext'}),
    on='fiscal_yr',
    how='left',
)

dp_filtered = ss_dp[ss_dp['external'] & (ss_dp['target_met']=='Y')]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id_std_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id_std_id': 'total_standards_met_ext'}),
    on='fiscal_yr',
    how='left'
)

dp_metrics['ext_standard_met_percentage'] = dp_metrics['total_standards_met_ext'] / dp_metrics['total_standards_ext']


# === DATA PACK METRIC 17 ===
# 17: Share of external, high volume services that are online end-to-end
dp_filtered = si_dp[si_dp['external'] & si_dp['highvolume']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_hv')
)

dp_filtered = si_dp[si_dp['external'] & si_dp['highvolume'] & si_dp['online_e2e']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_hv_online')
)

dp_metrics['ext_hv_online_service_percentage'] = dp_metrics['total_services_ext_hv_online']/dp_metrics['total_services_ext_hv']


# === DATA PACK METRIC 18 ===
# 17: Share of external, high volume services that have at least one online interaction point activated
dp_filtered = si_dp[si_dp['external'] & si_dp['highvolume'] & si_dp['min_one_point_online']]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id':'total_services'}),
    on='fiscal_yr',
    how='left',
    suffixes=('', '_ext_hv_1oip')
)

dp_metrics['ext_hv_1oip_service_percentage'] = dp_metrics['total_services_ext_hv_1oip']/dp_metrics['total_services_ext_hv']


# === DATA PACK METRIC 19 ===
# 19: Share of external high-volume service standards meeting target
dp_filtered = ss_dp[ss_dp['external'] & ss_dp['highvolume'] & ((ss_dp['target_met']=='Y') | (ss_dp['target_met']=='N'))]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id_std_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id_std_id': 'total_standards_ext_hv'}),
    on='fiscal_yr',
    how='left',
)

dp_filtered = ss_dp[ss_dp['external'] & ss_dp['highvolume'] & (ss_dp['target_met']=='Y')]
dp_metrics = dp_metrics.merge(
    dp_filtered.groupby('fiscal_yr').agg({
        'fy_org_id_service_id_std_id': 'nunique'
    }).reset_index().rename(columns={'fy_org_id_service_id_std_id': 'total_standards_met_ext_hv'}),
    on='fiscal_yr',
    how='left'
)

dp_metrics['ext_hv_standard_met_percentage'] = dp_metrics['total_standards_met_ext_hv']/dp_metrics['total_standards_ext_hv']


In [4]:
RANK_COLS = [
    'fy_org_id_service_id',
    'fiscal_yr', 
    'service_name_en', 
    'service_name_fr', 
    'online_enabled_Y', 
    'online_enabled_N', 
    'online_enabled_NA',
    'num_applications_total',
    'num_applications_by_phone', 
    'num_applications_online', 
    'num_applications_in_person', 
    'num_applications_by_mail', 
    'num_applications_by_email', 
    'num_applications_by_fax', 
    'num_applications_by_other'
]

EXCLUDED_SERVICES_ID = ['1111', '1108', '3728', '669', '1677', '1112']

# Filter and select columns
dp_services_rank = si_dp[
    si_dp['external'] & 
    (si_dp['num_applications_total'] > 0) & 
    (~si_dp['service_id'].isin(EXCLUDED_SERVICES_ID))
][RANK_COLS].copy()

# Add ranking
dp_services_rank['num_applications_rank'] = (
    dp_services_rank
    .groupby('fiscal_yr')['num_applications_total']
    .rank(method='dense', ascending=False)
)

dp_services_rank = dp_services_rank[dp_services_rank['num_applications_rank'] <= 20]

dp_services_rank

Unnamed: 0,fy_org_id_service_id,fiscal_yr,service_name_en,service_name_fr,online_enabled_Y,online_enabled_N,online_enabled_NA,num_applications_total,num_applications_by_phone,num_applications_online,num_applications_in_person,num_applications_by_mail,num_applications_by_email,num_applications_by_fax,num_applications_by_other,num_applications_rank
1,2022-2023_128_1001,2022-2023,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,6,0,0,3286418.0,0.0,276390.0,0.0,792026.0,0.0,0.0,2218002.0,9.0
2,2018-2019_128_1001,2018-2019,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,4,2,0,2948447.0,0.0,0.0,0.0,716175.0,0.0,0.0,2232272.0,13.0
3,2019-2020_128_1001,2019-2020,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,5,1,0,3154536.0,0.0,10287.0,0.0,857286.0,0.0,0.0,2286963.0,12.0
4,2020-2021_128_1001,2020-2021,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,6,0,0,3217790.0,0.0,43052.0,0.0,957503.0,0.0,0.0,2217235.0,12.0
5,2021-2022_128_1001,2021-2022,Old Age Security (OAS) Benefits,Prestations de la Sécurité de la vieillesse,6,0,0,3280494.0,0.0,266468.0,0.0,803939.0,0.0,0.0,2210087.0,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8094,2023-2024_123_1428,2023-2024,Study Permit,Permis d'études,5,0,1,1056098.0,0.0,1042829.0,6286.0,4172.0,0.0,0.0,2811.0,20.0
8102,2023-2024_123_2238,2023-2024,Client Support Centre,Centre de soutien à la clientèle,1,0,5,4125157.0,2659147.0,0.0,0.0,0.0,1466010.0,0.0,0.0,7.0
8108,2023-2024_123_SRV03049,2023-2024,Passports & Travel Documents,Délivrance de passeports et de titres de voyage,1,4,1,5001781.0,0.0,0.0,3966065.0,1035716.0,0.0,0.0,0.0,5.0
8110,2023-2024_123_SRV03051,2023-2024,Work Permit,Permis de travail,6,0,0,1507933.0,0.0,1222800.0,250569.0,26252.0,0.0,0.0,8312.0,18.0


In [5]:
# si_dp.loc[:, ['omnichannel','num_phone_enquiries', 'num_applications_by_phone','num_applications_online','num_applications_in_person']]
# si_dp[si_dp['num_applications_by_phone'].isna()]
# dp_2_3[dp_2_3['fiscal_yr'] == '2022-2023']
# dp_4_5[dp_4_5['fiscal_yr'] == '2022-2023'].T
#dp_6

# si_dp[(si_dp['fiscal_yr']=='2022-2023') & si_dp['omnichannel']].groupby(['fiscal_yr', 'department_en']).agg({
#         'service_id': 'nunique',
#         'fy_org_id_service_id': 'nunique',
#         'num_transactions_total': 'sum',
#         'num_applications_online': 'sum',

#         'num_phone_apps_enquiries': 'sum',
#         'num_applications_in_person': 'sum'
# }).reset_index().sort_values(['service_id', 'department_en'], ascending=False)

# dp_metrics[dp_metrics['fiscal_yr'] == '2023-2024'].T
# dp_metrics.T.reset_index()

# ss_dp['target_met'].unique()

ss.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12710 entries, 0 to 12709
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   fiscal_yr                    12710 non-null  object 
 1   org_id                       12710 non-null  int64  
 2   service_id                   12710 non-null  object 
 3   service_name_en              12710 non-null  object 
 4   service_name_fr              12710 non-null  object 
 5   service_standard_id          12710 non-null  object 
 6   service_standard_en          12702 non-null  object 
 7   service_standard_fr          12642 non-null  object 
 8   type                         12710 non-null  object 
 9   gcss_tool_fiscal_yr          5709 non-null   object 
 10  channel                      12710 non-null  object 
 11  channel_comments_en          4567 non-null   object 
 12  channel_comments_fr          4554 non-null   object 
 13  target_type     