In [2]:
# setting my si data frame
import pandas as pd
data = '/workspaces/service-data/outputs/si.csv'
si = pd.read_csv(data, sep=';')


In [3]:
from datetime import datetime
import pytz

# Specify the timezone
timezone = pytz.timezone("America/Montreal")

# Get the current date and time in the specified timezone
current_date = datetime.now(timezone)

# Format the current date and time into the desired string format
current_datestr = current_date.strftime("%Y-%m-%d_%H:%M:%S")

# Print the current date and time
print(f"current date: {current_datestr}")


current date: 2025-03-27_15:18:53


In [4]:
# want to view my column names
colnames = si.columns.tolist()
print(colnames)

['fiscal_yr', 'service_id', 'service_name_en', 'service_name_fr', 'service_description_en', 'service_description_fr', 'service_type', 'service_recipient_type', 'service_scope', 'client_target_groups', 'program_name_en', 'program_name_fr', 'client_feedback_channel', 'service_fee', 'last_GBA', 'ident_platform', 'ident_platform_comments', 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback', 'os_comments_client_interaction_en', 'os_comments_client_interaction_fr', 'how_has_the_service_been_assessed_for_accessibility', 'last_service_review', 'last_service_improvement', 'sin_usage', 'cra_bn_identifier_usage', 'num_phone_enquiries', 'num_applications_by_phone', 'num_website_visits', 'num_applications_online', 'num_applications_in_person', 'num_applications_by_mail', 'num_applications_by_email', 'num_applications_by_fax', 'num_applications_by_other', 'special_remarks_en', 'special_remarks_fr', 'service_uri_en', 'service

In [5]:
# to add a new column for phone apps inquiries
# phone apps inquiries are num_phone_enquiries plus num_applications_by_phone
# based on error received 'can only concatenate str (not int) to string

# Convert the columns to numeric, forcing any non-numeric values to NaN
si['num_phone_enquiries'] = pd.to_numeric(si['num_phone_enquiries'], errors='coerce')
si['num_applications_by_phone'] = pd.to_numeric(si['num_applications_by_phone'], errors='coerce')

# Add the new column 'phone_apps_inquiries' by filling NaN values with 0
si['phone_apps_inquiries'] = si['num_phone_enquiries'].fillna(0) + si['num_applications_by_phone'].fillna(0)




In [6]:
# adding a new column for total transactions
# to avoid error, convert the columns to numeric

si['num_applications_by_email'] = pd.to_numeric(si['num_applications_by_email'], errors='coerce')
si['num_applications_by_fax'] = pd.to_numeric(si['num_applications_by_fax'], errors='coerce')
si['num_applications_by_mail'] = pd.to_numeric(si['num_applications_by_mail'], errors='coerce')
si['num_applications_by_other'] = pd.to_numeric(si['num_applications_by_other'], errors='coerce')
si['num_applications_in_person'] = pd.to_numeric(si['num_applications_in_person'], errors='coerce')
si['num_applications_online'] = pd.to_numeric(si['num_applications_online'], errors='coerce')

# now add all six columns including the column phone apps inquiries to get the total transactions
si['total_transactions'] = (
si['num_applications_by_email'].fillna(0) + 
si['num_applications_by_fax'].fillna(0) + 
si['num_applications_by_mail'].fillna(0) + 
si['num_applications_by_other'].fillna(0) + 
si['num_applications_in_person'].fillna(0) + 
si['num_applications_online'].fillna(0) + 
si['phone_apps_inquiries'].fillna(0)
)


In [7]:
# adding a new column for applications done by phone, online and in person only
si['apps_online_and_per'] = (
    si['num_applications_in_person'].fillna(0) + 
    si['num_applications_online'].fillna(0) + 
    si['phone_apps_inquiries'].fillna(0)
)

In [8]:
# adding a new column for omnichannels
si['omnichannel'] = si.apply(
    lambda row: 1 if pd.notna(row['phone_apps_inquiries']) and pd.notna(row['num_applications_online']) and pd.notna(row['num_applications_in_person']) else 0, 
    axis=1
)

In [9]:
# adding a new column for high volume services
si['highvolume'] = (si['total_transactions'] >= 45000).astype(int)

In [10]:
# adding a new column for online enabled Y
# creating columns to check which lists out the columns from os_account_registration to os_issue_resolution_feedback
columns_to_check = [ 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback']

si['online_enabledY'] = si[columns_to_check].apply(lambda row: (row == 'Y').sum(), axis=1)


In [11]:
# adding column for online enabled N
si['online_enabledN'] = si[columns_to_check].apply(lambda row: (row == 'N').sum(), axis=1)

In [12]:
# adding column for online enabled NA
si['online_enabledNA'] = si[columns_to_check].isna().sum(axis=1)

In [13]:
# adding a new column for online end to end
si['onlineE2E'] = (
    si.apply(lambda row: "0" if row['online_enabledNA'] == 6 
    else "1" if row['online_enabledY'] + row['online_enabledNA'] == 6 
    else "0", axis=1)
    )

In [14]:
# adding a new column for online one or more points
si['onl_morepoints'] = si['online_enabledY'].apply(lambda x: '1' if x >= 1 else '0')

In [15]:
# importing the service standards data
ss_data = '/workspaces/service-data/outputs/ss.csv'
ss = pd.read_csv(ss_data, sep=';')

In [16]:
# to view the column names in the ss data frame
colnames_ss = ss.columns.tolist()
print(colnames_ss)

['fiscal_yr', 'service_id', 'service_name_en', 'service_name_fr', 'service_standard_id', 'service_standard_en', 'service_standard_fr', 'type', 'gcss_tool_fiscal_yr', 'channel', 'channel_comments_en', 'channel_comments_fr', 'target_type', 'target', 'volume_meeting_target', 'total_volume', 'performance', 'comments_en', 'comments_fr', 'target_met', 'standards_targets_uri_en', 'standards_targets_uri_fr', 'performance_results_uri_en', 'performance_results_uri_fr', 'org_name_variant', 'org_id', 'department_en', 'department_fr', 'fy_org_id_service_id']


In [17]:
# adding column for services with standards and standards met
# grouping and summarizing the 'ss' DataFrame
ss_count = ss.groupby(['service_id', 'fiscal_yr']).agg(
    standards_count=('service_id', 'size'),  # Count the occurrences
    standards_met=('target_met', lambda x: (x == 'Y').sum())  # Count where target_met is 'Y'
).reset_index()

# merging the 'ss_count' DataFrame with the 'si' DataFrame
si = si.merge(ss_count, on=['service_id', 'fiscal_yr'], how='left')

# replacing NaN values in 'standards_count' and 'standards_met' with 0
si['standards_count'] = si['standards_count'].fillna(0)
si['standards_met'] = si['standards_met'].fillna(0)


In [18]:
# adding a new column for services that met at least one standard
si['STDS_metsome'] = (si['standards_met'] >= 1).astype(int)

In [19]:
# creating the FYSID column by merging fiscal_yr and service_id columns
si['FYSID'] = si['fiscal_yr'].astype(str) + si['service_id'].astype(str)

In [20]:
# BEGIN THE DATA PACK METRICS
# filter data for fiscal year 2023-2024
sidata = si[si['fiscal_yr'] == '2023-2024']

In [21]:
# metric 2: total number of transactions for fiscal year 2023-2024
# the number is in millions
totaltransactions = sidata['total_transactions'].sum() / 1000000
print(totaltransactions)

475.890462


In [22]:
# count the number of transactions(services) in 2023-2024
count = sidata['total_transactions'].notna().sum()
print(count)

1681


In [23]:
# metric 3a: online as a share of total transactions
# total online transactions for 2023-2024 (replacing NaN values with 0)
online_transactions = sidata['num_applications_online'].fillna(0).sum()

# total transactions
transactionstotal = sidata['total_transactions'].sum()

# fraction of online as a share of total transactions
online_fraction = (online_transactions / transactionstotal) * 100
print(online_fraction)

56.10337868044937


In [24]:
# metric 3b: telephone as a share of total transactions
# total telephone transactions for 2023-2024 (replacing NaN values with 0)
telephone_transactions = sidata['phone_apps_inquiries'].fillna(0).sum()

# fraction of telephone as a share of total transactions
telephone_fraction = (telephone_transactions / transactionstotal) * 100
print(telephone_fraction)

15.06537968815185


In [25]:
# metric 3c: in-person as a share of total transactions
# total in-person transactions for 2023-2024 (replacing NaN values with 0)
in_person_transactions = sidata['num_applications_in_person'].fillna(0).sum()

#fraction of in-person as a share of total transactions
in_person_fraction = ( in_person_transactions / transactionstotal) * 100
print(in_person_fraction)

21.556712771436047


In [26]:
# metric 4: share of GC services wih omnichannel offerings
# count of the distinct service_id where omnichannel is 1
omni_count = sidata[sidata['omnichannel'] == 1]['service_id'].nunique()

# count of the distinct service_id in the entire dataset
total_count = sidata['service_id'].nunique()

# share of omnichannel as a percentage
share_omni = (omni_count / total_count) * 100
print(share_omni)

42.415229030339084


In [27]:
# metric 5a: online as a share of omnichannel usage
# total transactions where omnichannel is 1
totaltransactions = sidata[sidata['omnichannel'] == 1]['total_transactions'].sum()

# sum of online applications where omnichannel is 1
sum_onlineapps = sidata[sidata['omnichannel'] == 1]['num_applications_online'].sum()

# online as a share of omnichannel usage
share_online = (sum_onlineapps / totaltransactions) * 100
print(share_online)

74.16997154447247


In [28]:
# metric 5b: phone as a share of omnichannel usage
# sum of phone app inquiries where omnichannel is 1
sum_phone = sidata[sidata['omnichannel'] == 1]['phone_apps_inquiries'].sum()

# phone as a share of omnichannel usage
share_phone = (sum_phone / totaltransactions) * 100
print(share_phone)

14.159869295532912


In [29]:
# metric 5c: in-person as a share of omnichannel usage
sum_in_person = sidata[sidata['omnichannel'] == 1]['num_applications_in_person'].sum()

# in_person as a share of omnichannel usage
share_in_person = (sum_in_person / totaltransactions) * 100
print(share_in_person)

3.0216317825912165


In [30]:
# metric 6: number of departments
no_departments = sidata['department_en'].nunique()
print(no_departments)

78


In [31]:
# metric 7: number of programs
no_programs = sidata['program_name_en'].nunique()
print(no_programs)

494


In [32]:
# metric 8: number of services (data is already filtered for external)
no_external_service = sidata['service_id'].nunique()
print(no_external_service)

1681


In [33]:
# metric 9: number of high volume services
no_highvolume_services = sidata[sidata['highvolume'] == 1]['service_id'].nunique()
print(no_highvolume_services)

128


In [34]:
# metric 10: total online transactions (in millions)
total_online_transactions = sidata['num_applications_online'].sum() / 1000000
print(total_online_transactions)

266.990628


In [36]:
# metric 11: total phone transactions (in millions)
total_phone_transactions = sidata['phone_apps_inquiries'].sum() / 1000000
print(total_phone_transactions)

71.694705


In [37]:
# metric 12: total in person transactions (in millions)
in_person_applications = sidata['num_applications_in_person'].sum()
# look up canadian boarder services ending in 669 '2023-2024669'
lookup_value = "2023-2024669"
lookup_result = sidata[sidata['FYSID'] == lookup_value]['total_transactions'].iloc[0]

# subtract CBSA from in person applications
total_in_person_applications = (in_person_applications - lookup_result) / 1000000
print(total_in_person_applications)

13.432088


In [38]:
# metric 13: total mail applications ( in millions)
total_mail_applications = sidata['num_applications_by_mail'].sum() / 1000000
print(total_mail_applications)

21.405343


In [46]:
# metric 14: share of external services online end to end (data is already filtered for external)
# convert the 'onlineE2E' column to numeric values (in case it's stored as strings)
sidata['onlineE2E'] = pd.to_numeric(sidata['onlineE2E'], errors='coerce')

# distinct services online end to end
onl_E2E = sidata[(sidata['onlineE2E'] == 1)]['service_id'].nunique()

# distinct services where online enabled NA < 6
onl_enabledNA = sidata[(sidata['online_enabledNA'] < 6)]['service_id'].nunique()

# share of external services online end to end
share_ext_onlE2E = (onl_E2E / onl_enabledNA) * 100
print(share_ext_onlE2E)

33.41443633414436


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sidata['onlineE2E'] = pd.to_numeric(sidata['onlineE2E'], errors='coerce')


In [51]:
# metric 15: share of external serices that have at least one point online (data is already filtered for external services)

# convert the 'onl_morepoints' column to numeric values (in case it's stored as strings)
sidata['onl_morepoints'] = pd.to_numeric(sidata['onl_morepoints'], errors='coerce')

# services with atleast one point online
onl_onepoint = sidata[(sidata['onl_morepoints'] == 1)]['service_id'].nunique()

# all services
all_services = sidata['service_id'].nunique()

# share of services with one at least one point online
share_service = (onl_onepoint / all_services) * 100
print(share_service)


46.2819750148721


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sidata['onl_morepoints'] = pd.to_numeric(sidata['onl_morepoints'], errors='coerce')


In [52]:
# metric 16: services meeting service standards
# services that met some standards
ser_metsome = sidata[(sidata['STDS_metsome'] == 1)]['service_id'].nunique()

# share of services meeting service standards
ser_metstds = (ser_metsome / all_services) * 100
print(ser_metstds)

39.321832242712674


In [53]:
# metric 17: share of external high volume services online end to end (data is already filtered for external services)
# high volume services online end to end
highvol_E2E = sidata[(sidata['highvolume'] == 1) & 
(sidata['onlineE2E'] == 1)]['service_id'].nunique()

# high volume online enabled NA services < 6
highvol_enabledNA = sidata[(sidata['highvolume'] == 1) &
                           (sidata['online_enabledNA'] < 6)]['service_id'].nunique()

# share of highvolume services online end to end as a percentage 
high_vol_E2E = (highvol_E2E / highvol_enabledNA) * 100
print(high_vol_E2E)

57.009345794392516


In [None]:
# metric 18: share of external high volume services which have at least one point online (data is already filtered for external services)
# high volume services with at least one point online
highvol_1point_count = sidata[(sidata['highvolume'] == 1) & 
                              (sidata['onl_morepoints'] == 1)]['service_id'].nunique()
# high volume services
highvol_all_count = sidata[(sidata['highvolume'] == 1)]['service_id'].nunique()

# share as a percentage
share_highvol_1point = (highvol_1point_count / highvol_all_count) * 100
print(share_highvol_1point)

75.78125


In [55]:
# metric 19: high volume services meeting service standards
# high-volume services that meet service standards
high_vol_ser_metstds_count = sidata[(sidata['highvolume'] == 1) & 
                                    (sidata['STDS_metsome'] == 1)]['service_id'].nunique()

# share as a percentage
high_vol_ser_metstds = (high_vol_ser_metstds_count / highvol_all_count) * 100
print(high_vol_ser_metstds)

50.78125
