In [17]:
# setting my si data frame
import pandas as pd
data = '/workspaces/service-data/outputs/si.csv'
si = pd.read_csv(data, sep=';')
programid = '/workspaces/service-data/inputs/serv_prog.csv'
programs = pd.read_csv(programid, sep=',')
from datetime import datetime
import pytz
import numpy as np

# Specify the timezone
timezone = pytz.timezone("America/Montreal")

# Get the current date and time in the specified timezone
current_date = datetime.now(timezone)

# Format the current date and time into the desired string format
current_datestr = current_date.strftime("%Y-%m-%d_%H:%M:%S")

# Print the current date and time
print(f"current date: {current_datestr}")


current date: 2025-04-08_10:20:45


In [18]:
# to add a new column for phone apps inquiries
# phone apps inquiries are num_phone_enquiries plus num_applications_by_phone
# based on error received 'can only concatenate str (not int) to string

# Convert the columns to numeric, forcing any non-numeric values to NaN
si['num_phone_enquiries'] = pd.to_numeric(si['num_phone_enquiries'], errors='coerce')
si['num_applications_by_phone'] = pd.to_numeric(si['num_applications_by_phone'], errors='coerce')

# Add the new column 'phone_apps_inquiries' by filling NaN values with 0
si['phone_apps_inquiries'] = si['num_phone_enquiries'].fillna(0) + si['num_applications_by_phone'].fillna(0)

# adding a new column for total transactions
# to avoid error, convert the columns to numeric

si['num_applications_by_email'] = pd.to_numeric(si['num_applications_by_email'], errors='coerce')
si['num_applications_by_fax'] = pd.to_numeric(si['num_applications_by_fax'], errors='coerce')
si['num_applications_by_mail'] = pd.to_numeric(si['num_applications_by_mail'], errors='coerce')
si['num_applications_by_other'] = pd.to_numeric(si['num_applications_by_other'], errors='coerce')
si['num_applications_in_person'] = pd.to_numeric(si['num_applications_in_person'], errors='coerce')
si['num_applications_online'] = pd.to_numeric(si['num_applications_online'], errors='coerce')

# now add all six columns including the column phone apps inquiries to get the total transactions
si['total_transactions'] = (
si['num_applications_by_email'].fillna(0) + 
si['num_applications_by_fax'].fillna(0) + 
si['num_applications_by_mail'].fillna(0) + 
si['num_applications_by_other'].fillna(0) + 
si['num_applications_in_person'].fillna(0) + 
si['num_applications_online'].fillna(0) + 
si['phone_apps_inquiries'].fillna(0)
)

In [19]:
# adding a new column for applications done by phone, online and in person only
si['apps_online_and_per'] = (
    si['num_applications_in_person'].fillna(0) + 
    si['num_applications_online'].fillna(0) + 
    si['phone_apps_inquiries'].fillna(0)
)

# adding a new column for omnichannels
si['omnichannel'] = si.apply(
    lambda row: 1 if pd.notna(row['phone_apps_inquiries']) and pd.notna(row['num_applications_online']) and pd.notna(row['num_applications_in_person']) else 0, 
    axis=1
)

# adding a new column for external
si['external'] = si['service_scope'].str.contains('EXTERN', na=False).astype(int)
#convert the column to numeric 
si['external'] = pd.to_numeric(si['external'], errors='coerce')

# adding a new column for high volume services
si['highvolume'] = (si['total_transactions'] >= 45000).astype(int)

# adding a new column for online enabled Y
# creating columns to check which lists out the columns from os_account_registration to os_issue_resolution_feedback
columns_to_check = [ 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback']
si['online_enabledY'] = si[columns_to_check].apply(lambda row: (row == 'Y').sum(), axis=1)

# adding column for online enabled N
si['online_enabledN'] = si[columns_to_check].apply(lambda row: (row == 'N').sum(), axis=1)

# adding column for online enabled NA
si['online_enabledNA'] = si[columns_to_check].isna().sum(axis=1)

# adding a new column for online end to end
si['onlineE2E'] = (
    si.apply(lambda row: "0" if row['online_enabledNA'] == 6 
    else "1" if row['online_enabledY'] + row['online_enabledNA'] == 6 
    else "0", axis=1)
    )

# adding a new column for online one or more points
si['onl_morepoints'] = si['online_enabledY'].apply(lambda x: '1' if x >= 1 else '0')

In [20]:
# importing the service standards data
ss_data = '/workspaces/service-data/outputs/ss.csv'
ss = pd.read_csv(ss_data, sep=';')

# adding column for services with standards and standards met
# grouping and summarizing the 'ss' DataFrame
ss_count = ss.groupby(['service_id', 'fiscal_yr']).agg(
    standards_count=('service_id', 'size'),  # Count the occurrences
    standards_met=('target_met', lambda x: (x == 'Y').sum())  # Count where target_met is 'Y'
).reset_index()
# merging the 'ss_count' DataFrame with the 'si' DataFrame
si = si.merge(ss_count, on=['service_id', 'fiscal_yr'], how='left')
# replacing NaN values in 'standards_count' and 'standards_met' with 0
si['standards_count'] = si['standards_count'].fillna(0)
si['standards_met'] = si['standards_met'].fillna(0)

# adding a new column for services that met at least one standard
si['STDS_metsome'] = (si['standards_met'] >= 1).astype(int)

# creating the FYSID column by merging fiscal_yr and service_id columns
si['FYSID'] = si['fiscal_yr'].astype(str) + si['service_id'].astype(str)

# creating column for external_service in ss data frame

# Step 1: Merge the two DataFrames on 'fy_org_id_service_id'
merged_df = pd.merge(ss, si[['fy_org_id_service_id', 'external']], on='fy_org_id_service_id', how='left')

# Step 2: Create the 'external_service' column based on the condition
merged_df['external_service'] = (merged_df['external'] == 1).astype(int)

# Step 3: Now, we only need to keep the original columns of ss and the new 'external_service' column
ss['external_service'] = merged_df['external_service']

In [21]:
#BEGIN DATAPACK METRICS

In [22]:
# METRIC 2-3c TOTAL TRANSACTIONS

# Select the required columns from the si DataFrame
selected_columns = si[['fiscal_yr', 'department_en', 'total_transactions', 'num_applications_online', 
                       'phone_apps_inquiries', 'num_applications_in_person']]

# Rename the columns in the selected_columns DataFrame
Transactions_table = selected_columns.rename(columns={
    'fiscal_yr': 'fiscal_year',
    'num_applications_online': 'online applications',
    'phone_apps_inquiries': 'phone applications',
    'num_applications_in_person': 'in_person_apps'
})

# Add new columns for online_share, phone_share, and in_person_share
Transactions_table['online_share'] = Transactions_table['online applications'] / Transactions_table['total_transactions']
Transactions_table['phone_share'] = Transactions_table['phone applications'] / Transactions_table['total_transactions']
Transactions_table['in_person_share'] = Transactions_table['in_person_apps'] / Transactions_table['total_transactions']


In [None]:
# METRIC 2-3c TOTAL TRANSACTIONS (Filtered for 'external' == 1)

# Filter the si DataFrame where 'external' == 1
filtered_si = si[si['external'] == 1]

# Select the required columns from the filtered DataFrame
selected_columns = filtered_si[['fiscal_yr', 'department_en', 'total_transactions', 'num_applications_online', 
                                'phone_apps_inquiries', 'num_applications_in_person']]

# Rename the columns in the selected_columns DataFrame
Transactions_table = selected_columns.rename(columns={
    'fiscal_yr': 'fiscal_year',
    'num_applications_online': 'online applications',
    'phone_apps_inquiries': 'phone applications',
    'num_applications_in_person': 'in_person_apps'
})

# Add new columns for online_share, phone_share, and in_person_share
Transactions_table['online_share'] = Transactions_table['online applications'] / Transactions_table['total_transactions']
Transactions_table['phone_share'] = Transactions_table['phone applications'] / Transactions_table['total_transactions']
Transactions_table['in_person_share'] = Transactions_table['in_person_apps'] / Transactions_table['total_transactions']

print(Transactions_table)

     fiscal_year                                      department_en  \
0      2018-2019  Crown-Indigenous Relations and Northern Affair...   
1      2022-2023           Employment and Social Development Canada   
2      2018-2019           Employment and Social Development Canada   
3      2019-2020           Employment and Social Development Canada   
4      2020-2021           Employment and Social Development Canada   
...          ...                                                ...   
8928   2023-2024                            Veterans Affairs Canada   
8929   2023-2024                   Veterans Review and Appeal Board   
8930   2023-2024                   Women and Gender Equality Canada   
8931   2023-2024                   Women and Gender Equality Canada   
8932   2023-2024                   Women and Gender Equality Canada   

      total_transactions  online applications  phone applications  \
0                    0.0                  0.0                 0.0   
1        

In [32]:
# METRIC 4-5C OMNICHANNEL OFFERINGS

# Metric 4: Share of GC services with omnichannel offerings
# Group by fiscal year and calculate the number of distinct service_ids with omnichannel offerings (where omnichannel == 1)
omni_count_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['service_id'].nunique()

# Group by fiscal year and calculate the total number of distinct service_ids in the entire dataset
total_count_by_year = si.groupby('fiscal_yr')['service_id'].nunique()

# Calculate the share of omnichannel services as a percentage for each fiscal year
share_omni_by_year = (omni_count_by_year / total_count_by_year) * 100

# Metric 5a: Online as a share of omnichannel usage
# Group by fiscal year and calculate total transactions where omnichannel is 1 for each fiscal year
total_transactions_by_year_omni = si[si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()

# Group by fiscal year and calculate the sum of online applications where omnichannel is 1 for each fiscal year
sum_online_apps_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_online'].sum()

# Calculate the share of online applications as a percentage of omnichannel usage for each fiscal year
share_online_by_year = (sum_online_apps_by_year / total_transactions_by_year_omni) * 100

# Metric 5b: Phone as a share of omnichannel usage
# Group by fiscal year and calculate the sum of phone app inquiries where omnichannel is 1 for each fiscal year
sum_phone_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['phone_apps_inquiries'].sum()

# Calculate the share of phone applications as a percentage of omnichannel usage for each fiscal year
share_phone_by_year = (sum_phone_by_year / total_transactions_by_year_omni) * 100

# Metric 5c: In-person as a share of omnichannel usage
# Group by fiscal year and calculate the sum of in-person applications where omnichannel is 1 for each fiscal year
sum_in_person_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_in_person'].sum()

# Calculate the share of in-person applications as a percentage of omnichannel usage for each fiscal year
share_in_person_by_year = (sum_in_person_by_year / total_transactions_by_year_omni) * 100

# Now, let's align all the series on the fiscal year index
all_fiscal_years = total_transactions_by_year_omni.index

# Reindex each metric to align the fiscal years
share_omni_by_year = share_omni_by_year.reindex(all_fiscal_years, fill_value=float('nan'))
share_online_by_year = share_online_by_year.reindex(all_fiscal_years, fill_value=float('nan'))
share_phone_by_year = share_phone_by_year.reindex(all_fiscal_years, fill_value=float('nan'))
share_in_person_by_year = share_in_person_by_year.reindex(all_fiscal_years, fill_value=float('nan'))

# Combine all metrics into a single DataFrame
omnichannel_metric = pd.DataFrame({
    'Fiscal_Year': all_fiscal_years,
    'Share_of_Omnichannel_Services (%)': share_omni_by_year.values,
    'Online_as_a_Share_of_Omnichannel_Usage_(%)': share_online_by_year.values,
    'Phone_as_a_Share_of_Omnichannel_Usage_(%)': share_phone_by_year.values,
    'In-Person_as_a_Share_of_Omnichannel_Usage_(%)': share_in_person_by_year.values
})

print(omnichannel_metric)


  Fiscal_Year  Share_of_Omnichannel_Services (%)  \
0   2018-2019                          99.839228   
1   2019-2020                          99.843994   
2   2020-2021                         100.000000   
3   2021-2022                          10.000000   
4   2022-2023                           7.810651   
5   2023-2024                          42.380952   
6   2024-2025                         100.000000   

   Online_as_a_Share_of_Omnichannel_Usage_(%)  \
0                                   42.649981   
1                                   39.721145   
2                                   53.936630   
3                                   82.968571   
4                                   73.981982   
5                                   74.169989   
6                                         NaN   

   Phone_as_a_Share_of_Omnichannel_Usage_(%)  \
0                                  12.474025   
1                                  21.444695   
2                                  34.454594  

In [25]:
# METRIC 6-9


In [33]:
# METRIC 10-13 EXTERNAL TRANSACTIONS
# Create new columns 'online_transactions, phone in-person, and mail transactions based on the condition in 'external'
si['online_transactions'] = np.where(si['external'] == 1, si['num_applications_online'], 0)
si['phone_transactions'] = np.where(si['external'] == 1, si['phone_apps_inquiries'], 0)
si['in_person_transactions'] = np.where(si['external'] == 1, si['num_applications_in_person'], 0)
si['mail_transactions'] = np.where(si['external'] == 1, si['num_applications_by_mail'], 0)


# Select the required columns from the si DataFrame
External_transactions = si[['fiscal_yr', 'department_en', 'online_transactions', 'phone_transactions', 'in_person_transactions', 'mail_transactions']]

print(External_transactions)

                          fiscal_yr  \
0                         2018-2019   
1                         2022-2023   
2                         2018-2019   
3                         2019-2020   
4                         2020-2021   
...                             ...   
8929                      2023-2024   
8930                      2023-2024   
8931                      2023-2024   
8932                      2023-2024   
8933  Timestamp:2025-04-01_14:23:25   

                                          department_en  online_transactions  \
0     Crown-Indigenous Relations and Northern Affair...                  0.0   
1              Employment and Social Development Canada             276390.0   
2              Employment and Social Development Canada                  0.0   
3              Employment and Social Development Canada              10287.0   
4              Employment and Social Development Canada              43052.0   
...                                                

In [None]:
# METRIC 14-15 EXTERNAL PERCENTAGES



In [None]:
# METRIC 16 EXTERNAL SERVICES MEETING TARGETS


In [28]:
# METRIC 17-18 EXTERNAL HIGH VOLUME PERCENTAGES