In [1]:
# setting my si data frame
import pandas as pd
data = '/workspaces/service-data/outputs/si.csv'
si = pd.read_csv(data, sep=';')
programid = '/workspaces/service-data/inputs/serv_prog.csv'
programs = pd.read_csv(programid, sep=',')
from datetime import datetime
import pytz

# Specify the timezone
timezone = pytz.timezone("America/Montreal")

# Get the current date and time in the specified timezone
current_date = datetime.now(timezone)

# Format the current date and time into the desired string format
current_datestr = current_date.strftime("%Y-%m-%d_%H:%M:%S")

# Print the current date and time
print(f"current date: {current_datestr}")


current date: 2025-04-15_15:43:18


In [2]:
# to add a new column for phone apps inquiries
# phone apps inquiries are num_phone_enquiries plus num_applications_by_phone
# based on error received 'can only concatenate str (not int) to string

# Convert the columns to numeric, forcing any non-numeric values to NaN
si['num_phone_enquiries'] = pd.to_numeric(si['num_phone_enquiries'], errors='coerce')
si['num_applications_by_phone'] = pd.to_numeric(si['num_applications_by_phone'], errors='coerce')

# Add the new column 'phone_apps_inquiries' by filling NaN values with 0
si['phone_apps_inquiries'] = si['num_phone_enquiries'].fillna(0) + si['num_applications_by_phone'].fillna(0)

# adding a new column for total transactions
# to avoid error, convert the columns to numeric

si['num_applications_by_email'] = pd.to_numeric(si['num_applications_by_email'], errors='coerce')
si['num_applications_by_fax'] = pd.to_numeric(si['num_applications_by_fax'], errors='coerce')
si['num_applications_by_mail'] = pd.to_numeric(si['num_applications_by_mail'], errors='coerce')
si['num_applications_by_other'] = pd.to_numeric(si['num_applications_by_other'], errors='coerce')
si['num_applications_in_person'] = pd.to_numeric(si['num_applications_in_person'], errors='coerce')
si['num_applications_online'] = pd.to_numeric(si['num_applications_online'], errors='coerce')

# now add all six columns including the column phone apps inquiries to get the total transactions
si['total_transactions'] = (
si['num_applications_by_email'].fillna(0) + 
si['num_applications_by_fax'].fillna(0) + 
si['num_applications_by_mail'].fillna(0) + 
si['num_applications_by_other'].fillna(0) + 
si['num_applications_in_person'].fillna(0) + 
si['num_applications_online'].fillna(0) + 
si['phone_apps_inquiries'].fillna(0)
)

In [3]:
# adding a new column for applications done by phone, online and in person only
si['apps_online_and_per'] = (
    si['num_applications_in_person'].fillna(0) + 
    si['num_applications_online'].fillna(0) + 
    si['phone_apps_inquiries'].fillna(0)
)

In [None]:
# adding a new column for omnichannels
si['omnichannel'] = si.apply(
    lambda row: 1 if pd.notna(row['phone_apps_inquiries']) and pd.notna(row['num_applications_online']) and pd.notna(row['num_applications_in_person']) else 0, 
    axis=1
)

In [None]:




# adding a new column for external
si['external'] = si['service_scope'].str.contains('EXTERN', na=False).astype(int)
#convert the column to numeric 
si['external'] = pd.to_numeric(si['external'], errors='coerce')

# adding a new column for high volume services
si['highvolume'] = (si['total_transactions'] >= 45000).astype(int)

# adding a new column for online enabled Y
# creating columns to check which lists out the columns from os_account_registration to os_issue_resolution_feedback
columns_to_check = [ 'os_account_registration', 'os_authentication', 'os_application', 'os_decision', 'os_issuance', 'os_issue_resolution_feedback']
si['online_enabledY'] = si[columns_to_check].apply(lambda row: (row == 'Y').sum(), axis=1)

# adding column for online enabled N
si['online_enabledN'] = si[columns_to_check].apply(lambda row: (row == 'N').sum(), axis=1)

# adding column for online enabled NA
si['online_enabledNA'] = si[columns_to_check].isna().sum(axis=1)

# adding a new column for online end to end
si['onlineE2E'] = (
    si.apply(lambda row: "0" if row['online_enabledNA'] == 6 
    else "1" if row['online_enabledY'] + row['online_enabledNA'] == 6 
    else "0", axis=1)
    )

# adding a new column for online one or more points
si['onl_morepoints'] = si['online_enabledY'].apply(lambda x: '1' if x >= 1 else '0')

In [34]:
# importing the service standards data
ss_data = '/workspaces/service-data/outputs/ss.csv'
ss = pd.read_csv(ss_data, sep=';')

# adding column for services with standards and standards met
# grouping and summarizing the 'ss' DataFrame
ss_count = ss.groupby(['service_id', 'fiscal_yr']).agg(
    standards_count=('service_id', 'size'),  # Count the occurrences
    standards_met=('target_met', lambda x: (x == 'Y').sum())  # Count where target_met is 'Y'
).reset_index()
# merging the 'ss_count' DataFrame with the 'si' DataFrame
si = si.merge(ss_count, on=['service_id', 'fiscal_yr'], how='left')
# replacing NaN values in 'standards_count' and 'standards_met' with 0
si['standards_count'] = si['standards_count'].fillna(0)
si['standards_met'] = si['standards_met'].fillna(0)

# adding a new column for services that met at least one standard
si['STDS_metsome'] = (si['standards_met'] >= 1).astype(int)

# creating the FYSID column by merging fiscal_yr and service_id columns
si['FYSID'] = si['fiscal_yr'].astype(str) + si['service_id'].astype(str)

# creating column for external_service in ss data frame

# Step 1: Merge the two DataFrames on 'fy_org_id_service_id'
merged_df = pd.merge(ss, si[['fy_org_id_service_id', 'external']], on='fy_org_id_service_id', how='left')

# Step 2: Create the 'external_service' column based on the condition
merged_df['external_service'] = (merged_df['external'] == 1).astype(int)

# Step 3: Now, we only need to keep the original columns of ss and the new 'external_service' column
ss['external_service'] = merged_df['external_service']

In [35]:
# BEGINNING THE DATA PACK METRICS

In [36]:
# metric 2: total number of transactions for fiscal years
# the number is in millions

# Group by fiscal year and sum the transactions for each year
total_transactions_by_year = si.groupby('fiscal_yr')['total_transactions'].sum() / 1000000

# Convert to a DataFrame to create a table-like structure
total_transactions_table = total_transactions_by_year.reset_index()

# Rename the columns for clarity
total_transactions_table.columns = ['Fiscal_Year', 'Total_Transactions (in millions)']

# Display the resulting table
print(total_transactions_table)


                     Fiscal_Year  Total_Transactions (in millions)
0                      2018-2019                        431.192682
1                      2019-2020                        424.790717
2                      2020-2021                        399.085166
3                      2021-2022                        369.012303
4                      2022-2023                        476.872933
5                      2023-2024                        475.890397
6                      2024-2025                          0.000000
7  Timestamp:2025-04-01_14:23:25                          0.000000


In [37]:
# metric 2: total number of transactions for fiscal years filtering for external == 1
# the number is in millions
# Filter the si dataset for external transactions (external == 1)
si_external = si[si['external'] == 1]

# Group by fiscal year and sum the transactions for each year, for external services only
total_transactions_by_year_external = si_external.groupby('fiscal_yr')['total_transactions'].sum() / 1000000

# Convert to a DataFrame to create a table-like structure
total_transactions_table_external = total_transactions_by_year_external.reset_index()

# Rename the columns for clarity
total_transactions_table_external.columns = ['Fiscal_Year', 'Total_Transactions (in millions)']

# Display the resulting table
print(total_transactions_table_external)

  Fiscal_Year  Total_Transactions (in millions)
0   2018-2019                        421.133885
1   2019-2020                        412.398882
2   2020-2021                        386.183263
3   2021-2022                        358.629620
4   2022-2023                        464.461678
5   2023-2024                        462.845966
6   2024-2025                          0.000000


In [38]:
# metric 3a: online as a share of total transactions
# Fill NaN values in 'num_applications_online' with 0
si['num_applications_online'] = si['num_applications_online'].fillna(0)

# Group by fiscal year and calculate the sum of online transactions and total transactions for each fiscal year
online_transactions_by_year = si.groupby('fiscal_yr')['num_applications_online'].sum() / 1000000
total_transactions_by_year = si.groupby('fiscal_yr')['total_transactions'].sum() / 1000000

# Calculate the online fraction for each fiscal year
online_fraction_by_year = (online_transactions_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
online_fraction_table = online_fraction_by_year.reset_index()

# Rename the columns for clarity
online_fraction_table.columns = ['Fiscal_Year', 'Online_as_a_Share_of_Total_Transactions (%)']

# Display the resulting table
print(online_fraction_table)


                     Fiscal_Year  Online_as_a_Share_of_Total_Transactions (%)
0                      2018-2019                                    42.649981
1                      2019-2020                                    39.721145
2                      2020-2021                                    53.936630
3                      2021-2022                                    53.480450
4                      2022-2023                                    56.587775
5                      2023-2024                                    56.103385
6                      2024-2025                                          NaN
7  Timestamp:2025-04-01_14:23:25                                          NaN


In [39]:
# metric 3b: telephone as a share of total transactions
# Fill NaN values in 'phone_apps_inquiries' with 0
si['phone_apps_inquiries'] = si['phone_apps_inquiries'].fillna(0)

# Group by fiscal year and calculate the sum of telephone transactions and total transactions for each fiscal year
telephone_transactions_by_year = si.groupby('fiscal_yr')['phone_apps_inquiries'].sum() / 1000000
total_transactions_by_year = si.groupby('fiscal_yr')['total_transactions'].sum() / 1000000

# Calculate the telephone fraction for each fiscal year
telephone_fraction_by_year = (telephone_transactions_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
telephone_fraction_table = telephone_fraction_by_year.reset_index()

# Rename the columns for clarity
telephone_fraction_table.columns = ['Fiscal_Year', 'Telephone_as_a_Share_of_Total_Transactions (%)']

# Display the resulting table
print(telephone_fraction_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2019-2020   
2                      2020-2021   
3                      2021-2022   
4                      2022-2023   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Telephone_as_a_Share_of_Total_Transactions (%)  
0                                       12.474025  
1                                       21.444695  
2                                       34.454594  
3                                       28.307156  
4                                       15.396291  
5                                       15.065369  
6                                             NaN  
7                                             NaN  


In [40]:
# metric 3c: in-person as a share of total transactions
# Fill NaN values in 'num_applications_in_person' with 0
si['num_applications_in_person'] = si['num_applications_in_person'].fillna(0)

# Group by fiscal year and calculate the sum of in-person transactions and total transactions for each fiscal year
in_person_transactions_by_year = si.groupby('fiscal_yr')['num_applications_in_person'].sum() / 1000000
total_transactions_by_year = si.groupby('fiscal_yr')['total_transactions'].sum() / 1000000

# Calculate the in-person fraction for each fiscal year
in_person_fraction_by_year = (in_person_transactions_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
in_person_fraction_table = in_person_fraction_by_year.reset_index()

# Rename the columns for clarity
in_person_fraction_table.columns = ['Fiscal_Year', 'In-Person_as_a_Share_of_Total_Transactions_(%)']

# Display the resulting table
print(in_person_fraction_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2019-2020   
2                      2020-2021   
3                      2021-2022   
4                      2022-2023   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   In-Person_as_a_Share_of_Total_Transactions_(%)  
0                                       27.581419  
1                                       26.142931  
2                                        3.808385  
3                                        7.575717  
4                                       16.557836  
5                                       21.556716  
6                                             NaN  
7                                             NaN  


In [43]:
# Metric 4: Share of GC services with omnichannel offerings

# Group by fiscal year and calculate the number of distinct service_ids with omnichannel offerings (where omnichannel == 1)
num_omni_services = si[si['omnichannel'] == 1].groupby('fiscal_yr')['service_id'].nunique()

# Group by fiscal year and calculate the total number of distinct service_ids in the entire dataset
total_services = si.groupby('fiscal_yr')['service_id'].nunique()

# Calculate the share of omnichannel services as a percentage for each fiscal year
GC_omnichannel_share = (num_omni_services / total_services) * 100

# Combine all metrics into a single DataFrame
share_omni_table = pd.DataFrame({
    'Fiscal_Year': GC_omnichannel_share.index,
    'GC_omnichannel_share (%)': GC_omnichannel_share.values,
    'Omnichannel_Service_Count': num_omni_services.reindex(GC_omnichannel_share.index).values,
    'Total_Service_Count': total_services.reindex(GC_omnichannel_share.index).values
})

# Display the resulting table
print(share_omni_table)



                     Fiscal_Year  GC_omnichannel_share (%)  \
0                      2018-2019                 99.839228   
1                      2019-2020                 99.843994   
2                      2020-2021                100.000000   
3                      2021-2022                 10.000000   
4                      2022-2023                  7.810651   
5                      2023-2024                 42.380952   
6                      2024-2025                100.000000   
7  Timestamp:2025-04-01_14:23:25                       NaN   

   Omnichannel_Service_Count  Total_Service_Count  
0                     1242.0                 1244  
1                     1280.0                 1282  
2                     1426.0                 1426  
3                      161.0                 1610  
4                      132.0                 1690  
5                      712.0                 1680  
6                        1.0                    1  
7                        

In [44]:
print(si['omnichannel'].value_counts())


omnichannel
1    4954
0    3980
Name: count, dtype: int64


In [None]:
# metric 5a: online as a share of omnichannel usage
# Group by fiscal year and calculate total transactions where omnichannel is 1 for each fiscal year
total_transactions_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()

# Group by fiscal year and calculate the sum of online applications where omnichannel is 1 for each fiscal year
sum_online_apps_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_online'].sum()

# Calculate the share of online applications as a percentage of omnichannel usage for each fiscal year
share_online_by_year = (sum_online_apps_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
share_online_table = share_online_by_year.reset_index()

# Rename the columns for clarity
share_online_table.columns = ['Fiscal_Year', 'Online_as_a_Share_of_Omnichannel_Usage_(%)']

# Display the resulting table
print(share_online_table)


  Fiscal_Year  Online_as_a_Share_of_Omnichannel_Usage_(%)
0   2018-2019                                   42.649981
1   2019-2020                                   39.721145
2   2020-2021                                   53.936630
3   2021-2022                                   82.968571
4   2022-2023                                   73.981982
5   2023-2024                                   74.169989
6   2024-2025                                         NaN


In [None]:
# metric 5b: phone as a share of omnichannel usage
# Group by fiscal year and calculate total transactions where omnichannel is 1 for each fiscal year
total_transactions_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()

# Group by fiscal year and calculate the sum of phone app inquiries where omnichannel is 1 for each fiscal year
sum_phone_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['phone_apps_inquiries'].sum()

# Calculate the share of phone applications as a percentage of omnichannel usage for each fiscal year
share_phone_by_year = (sum_phone_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
share_phone_table = share_phone_by_year.reset_index()

# Rename the columns for clarity
share_phone_table.columns = ['Fiscal_Year', 'Phone_as_a_Share_of_Omnichannel_Usage_(%)']

# Display the resulting table
print(share_phone_table)


  Fiscal_Year  Phone_as_a_Share_of_Omnichannel_Usage_(%)
0   2018-2019                                  12.474025
1   2019-2020                                  21.444695
2   2020-2021                                  34.454594
3   2021-2022                                   8.402004
4   2022-2023                                   8.808450
5   2023-2024                                  14.159849
6   2024-2025                                        NaN


In [None]:
# metric 5c: in-person as a share of omnichannel usage
# Group by fiscal year and calculate total transactions where omnichannel is 1 for each fiscal year
total_transactions_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['total_transactions'].sum()

# Group by fiscal year and calculate the sum of in-person applications where omnichannel is 1 for each fiscal year
sum_in_person_by_year = si[si['omnichannel'] == 1].groupby('fiscal_yr')['num_applications_in_person'].sum()

# Calculate the share of in-person applications as a percentage of omnichannel usage for each fiscal year
share_in_person_by_year = (sum_in_person_by_year / total_transactions_by_year) * 100

# Convert to DataFrame for display
share_in_person_table = share_in_person_by_year.reset_index()

# Rename the columns for clarity
share_in_person_table.columns = ['Fiscal_Year', 'In-Person_as_a_Share_of_Omnichannel_Usage_(%)']

# Display the resulting table
print(share_in_person_table)


  Fiscal_Year  In-Person_as_a_Share_of_Omnichannel_Usage_(%)
0   2018-2019                                      27.581419
1   2019-2020                                      26.142931
2   2020-2021                                       3.808385
3   2021-2022                                       6.231632
4   2022-2023                                       9.687857
5   2023-2024                                       3.021633
6   2024-2025                                            NaN


In [None]:
# metric 6: number of external departments
# Group by fiscal year and calculate the number of distinct external departments for each fiscal year
no_departments_by_year = si[si['external'] == 1].groupby('fiscal_yr')['department_en'].nunique()

# Convert to DataFrame for display
no_departments_table = no_departments_by_year.reset_index()

# Rename the columns for clarity
no_departments_table.columns = ['Fiscal_Year', 'Number_of_External_Departments']

# Display the resulting table
print(no_departments_table)


  Fiscal_Year  Number_of_External_Departments
0   2018-2019                              72
1   2019-2020                              76
2   2020-2021                              77
3   2021-2022                              77
4   2022-2023                              74
5   2023-2024                              76
6   2024-2025                               1


In [None]:
# metric 7: number of programs
# Group by fiscal year and calculate the number of distinct programs for each fiscal year
no_programs_by_year = si.groupby('fiscal_yr')['program_id'].nunique()

# Convert to DataFrame for display
no_programs_table = no_programs_by_year.reset_index()

# Rename the columns for clarity
no_programs_table.columns = ['Fiscal_Year', 'Number_of_Programs']

# Display the resulting table
print(no_programs_table)


                     Fiscal_Year  Number_of_Programs
0                      2018-2019                 459
1                      2019-2020                 458
2                      2020-2021                 514
3                      2021-2022                 496
4                      2022-2023                 464
5                      2023-2024                 507
6                      2024-2025                   1
7  Timestamp:2025-04-01_14:23:25                   0


In [None]:
# metric 8: number of external services (data is already filtered for external)
# Group by fiscal year and calculate the number of distinct external services for each fiscal year
no_external_service_by_year = si[si['external'] == 1].groupby('fiscal_yr')['service_id'].nunique()

# Convert to DataFrame for display
no_external_service_table = no_external_service_by_year.reset_index()

# Rename the columns for clarity
no_external_service_table.columns = ['Fiscal_Year', 'Number_of_External_Services']

# Display the resulting table
print(no_external_service_table)


  Fiscal_Year  Number_of_External_Services
0   2018-2019                         1132
1   2019-2020                         1181
2   2020-2021                         1304
3   2021-2022                         1482
4   2022-2023                         1560
5   2023-2024                         1559
6   2024-2025                            1


In [None]:
# metric 9: number of high volume services
# Group by fiscal year and calculate the number of distinct high volume external services for each fiscal year
no_highvolume_services_by_year = si[(si['external'] == 1) & (si['highvolume'] == 1)].groupby('fiscal_yr')['service_id'].nunique()

# Convert to DataFrame for display
no_highvolume_services_table = no_highvolume_services_by_year.reset_index()

# Rename the columns for clarity
no_highvolume_services_table.columns = ['Fiscal_Year', 'Number_of_High_Volume_Services']

# Display the resulting table
print(no_highvolume_services_table)


  Fiscal_Year  Number_of_High_Volume_Services
0   2018-2019                             117
1   2019-2020                             126
2   2020-2021                             123
3   2021-2022                             134
4   2022-2023                             116
5   2023-2024                             117


In [None]:
# metric 10: total online transactions (in millions)
# Group by fiscal year and calculate the total online transactions (in millions) for each fiscal year where external == 1
total_online_transactions_by_year = si[si['external'] == 1].groupby('fiscal_yr')['num_applications_online'].sum() / 1000000

# Convert to DataFrame for display
total_online_transactions_table = total_online_transactions_by_year.reset_index()

# Rename the columns for clarity
total_online_transactions_table.columns = ['Fiscal_Year', 'Total_Online_Transactions_(Millions)']

# Display the resulting table
print(total_online_transactions_table)



  Fiscal_Year  Total_Online_Transactions_(Millions)
0   2018-2019                            174.630378
1   2019-2020                            156.831471
2   2020-2021                            202.706431
3   2021-2022                            196.235510
4   2022-2023                            268.647250
5   2023-2024                            254.970341
6   2024-2025                              0.000000


In [None]:
# metric 11: total phone transactions (in millions)
# Group by fiscal year and calculate the total phone transactions (in millions) for each fiscal year where external == 1
total_phone_transactions_by_year = si[si['external'] == 1].groupby('fiscal_yr')['phone_apps_inquiries'].sum() / 1000000

# Convert to DataFrame for display
total_phone_transactions_table = total_phone_transactions_by_year.reset_index()

# Rename the columns for clarity
total_phone_transactions_table.columns = ['Fiscal_Year', 'Total_Phone_Transactions_(Millions)']

# Display the resulting table
print(total_phone_transactions_table)


  Fiscal_Year  Total_Phone_Transactions_(Millions)
0   2018-2019                            53.099291
1   2019-2020                            90.704008
2   2020-2021                           137.242208
3   2021-2022                           104.413441
4   2022-2023                            72.927098
5   2023-2024                            71.119731
6   2024-2025                             0.000000


In [None]:
# metric 12: total in person transactions (in millions)
# Function to calculate total in-person transactions for each fiscal year
def calculate_in_person_transactions(group):
    # Total in-person applications for the fiscal year
    in_person_applications = group[group['external'] == 1]['num_applications_in_person'].sum()

    # Generate the lookup value based on the fiscal year
    lookup_value = f"{group['fiscal_yr'].iloc[0]}669" 
    # Check if the lookup_value exists in the 'FYSID' column
    lookup_data = si[si['FYSID'] == lookup_value]
    
    if not lookup_data.empty:
        lookup_result = lookup_data['total_transactions'].iloc[0]
    else:
        # Handle the case where the lookup_value doesn't exist (e.g., set to 0 or None)
        print(f"Warning: {lookup_value} not found in 'FYSID'. Setting total transactions to 0.")
        lookup_result = 0

    # Subtract CBSA from in-person applications and convert to millions
    total_in_person_applications = (in_person_applications - lookup_result) / 1000000

    return total_in_person_applications

# Group by fiscal year
grouped_data = si.groupby('fiscal_yr')

# Prepare data to store results
results = []

# Calculate total in-person transactions for each fiscal year
for fiscal_year, group in grouped_data:
    result = calculate_in_person_transactions(group)
    results.append([fiscal_year, result])

# Create a DataFrame for better visualization
results_df = pd.DataFrame(results, columns=['Fiscal Year', 'Total In-Person Transactions (Millions)'])

# Display the table
print(results_df)




                     Fiscal Year  Total In-Person Transactions (Millions)
0                      2018-2019                                61.242407
1                      2019-2020                                55.250738
2                      2020-2021                                 6.520391
3                      2021-2022                                 4.573685
4                      2022-2023                                 8.597962
5                      2023-2024                                13.431918
6                      2024-2025                                 0.000000
7  Timestamp:2025-04-01_14:23:25                                 0.000000


In [None]:
# metric 13: total mail applications ( in millions)
# Group by fiscal year and calculate the total mail applications (in millions) for each fiscal year where external == 1
total_mail_applications_by_year = si[si['external'] == 1].groupby('fiscal_yr')['num_applications_by_mail'].sum() / 1000000

# Convert to DataFrame for display
total_mail_applications_table = total_mail_applications_by_year.reset_index()

# Rename the columns for clarity
total_mail_applications_table.columns = ['Fiscal_Year', 'Total_Mail_Applications_(Millions)']

# Display the resulting table
print(total_mail_applications_table)


  Fiscal_Year  Total_Mail_Applications_(Millions)
0   2018-2019                           42.316481
1   2019-2020                           33.221419
2   2020-2021                           19.468112
3   2021-2022                           22.377948
4   2022-2023                           31.707932
5   2023-2024                           21.405253
6   2024-2025                            0.000000


In [None]:
# Metric 14: Share of External Services Online End to End - Updated Formula for All Fiscal Years

# Convert the 'onlineE2E' and 'external' columns to numeric values (in case they're stored as strings)
si['onlineE2E'] = pd.to_numeric(si['onlineE2E'], errors='coerce')
si['external'] = pd.to_numeric(si['external'], errors='coerce')

# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and online end-to-end services (onlineE2E == 1)
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['onlineE2E'] == 1)]
    
    # Filter for external services in the current fiscal year
    fiscal_year_external = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1)]

    # Distinct external services
    no_external_service = fiscal_year_external['service_id'].nunique()
    
    # Sum the 'external' column for the filtered data (for onlineE2E == 1)
    sum_external_services = fiscal_year_data['external'].sum()

    # Calculate the share of external services online end-to-end
    if no_external_service != 0:
        share_of_ext_onl_ser = (sum_external_services / no_external_service) * 100
    else:
        share_of_ext_onl_ser = 0  # If no external services, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, share_of_ext_onl_ser])

# Convert results to DataFrame
share_ext_onlE2E_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_External_Services_Online_End-to-End_(%)'])

# Display the resulting table
print(share_ext_onlE2E_table)

                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_External_Services_Online_End-to-End_(%)  
0                                         18.727915  
1                                         22.051282  
2                                         18.374259  
3                                         22.699387  
4                                         21.524966  
5                                         24.053881  
6                                          0.000000  
7                                          0.000000  


In [None]:
# metric 15: share of external serices that have at least one point online 
# Convert the 'onl_morepoints' column to numeric values (in case it's stored as strings)
si['onl_morepoints'] = pd.to_numeric(si['onl_morepoints'], errors='coerce')

# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and external services
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1)]
    
    # Services with at least one point online (onl_morepoints == 1)
    onl_onepoint = fiscal_year_data[fiscal_year_data['onl_morepoints'] == 1]['service_id'].nunique()
    
    # All services for the fiscal year
    all_services = fiscal_year_data['service_id'].nunique()
    
    # Calculate the share of services with at least one point online
    if all_services != 0:
        share_service = (onl_onepoint / all_services) * 100
    else:
        share_service = 0  # If no services, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, share_service])

# Convert results to DataFrame
share_service_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_External_Services_with_At_Least_One_Point_Online_(%)'])

# Display the resulting table
print(share_service_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_External_Services_with_At_Least_One_Point_Online_(%)  
0                                          47.703180              
1                                          44.679487              
2                                          48.010161              
3                                          49.463190              
4                                          46.018893              
5                                          45.670301              
6                                           0.000000              
7                                           0.000000              


In [None]:
# metric 16: services meeting service standards
# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and external services
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1)]
    
    # Services that met some standards (STDS_metsome == 1)
    ser_metsome = fiscal_year_data[fiscal_year_data['STDS_metsome'] == 1]['service_id'].nunique()
    
    # All services for the fiscal year
    all_services = fiscal_year_data['service_id'].nunique()
    
    # Calculate the share of services meeting service standards
    if all_services != 0:
        ser_metstds = (ser_metsome / all_services) * 100
    else:
        ser_metstds = 0  # If no services, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, ser_metstds])

# Convert results to DataFrame
ser_metstds_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_Services_Meeting_Service_Standards_(%)'])

# Display the resulting table
print(ser_metstds_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_Services_Meeting_Service_Standards_(%)  
0                                        40.194346  
1                                        42.051282  
2                                        42.082981  
3                                        43.711656  
4                                        41.632928  
5                                        39.704939  
6                                         0.000000  
7                                         0.000000  


In [None]:
# metric 16: services meeting service standards #UPDATED by fiscal year
# Standards Meeting Targets
# Filter rows based on the conditions for each fiscal year
filtered_data = ss[(ss['target_met'] == 'Y') & 
                   (ss['external_service'] == 1)]

# Group by fiscal year and count the rows that meet the target for each fiscal year
standards_meeting_targets_by_year = filtered_data.groupby('fiscal_yr').size()

# Total Standards
# Filter rows based on the conditions for each fiscal year
filtered_data_total = ss[(ss['target_met'].notna()) & 
                         (ss['external_service'] == 1)]

# Group by fiscal year and count the rows for each fiscal year
total_standards_by_year = filtered_data_total.groupby('fiscal_yr').size()

# Calculate the percentage of standards meeting target for each fiscal year
percentage_of_standards_meeting_target_by_year = (standards_meeting_targets_by_year / total_standards_by_year) * 100

# Combine the results into a DataFrame for easy visualization
standards_results = pd.DataFrame({
    'Standards_Meeting_Targets': standards_meeting_targets_by_year,
    'Total_Standards': total_standards_by_year,
    'Percentage_Of_Standards_Meeting_Target': percentage_of_standards_meeting_target_by_year
})

# Reset index to make 'fiscal_yr' a column and display the result
standards_results = standards_results.reset_index()

# Rename the columns for clarity
standards_results.columns = ['Fiscal_Year', 'Standards_Meeting_Targets', 'Total_Standards', 'Percentage_Of_Standards_Meeting_Target']

# Display the resulting table
print(standards_results)


  Fiscal_Year  Standards_Meeting_Targets  Total_Standards  \
0   2018-2019                        832             1210   
1   2019-2020                        946             1319   
2   2020-2021                       1236             1713   
3   2021-2022                       1178             1747   
4   2022-2023                       1315             1909   
5   2023-2024                       1228             1674   

   Percentage_Of_Standards_Meeting_Target  
0                               68.760331  
1                               71.721001  
2                               72.154116  
3                               67.429880  
4                               68.884233  
5                               73.357228  


In [None]:
# metric 17: share of external high volume services online end to end
# Convert the 'onlineE2E' and 'online_enabledNA' columns to numeric values (in case they're stored as strings)
si['onlineE2E'] = pd.to_numeric(si['onlineE2E'], errors='coerce')
si['online_enabledNA'] = pd.to_numeric(si['online_enabledNA'], errors='coerce')

# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and external high volume services
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1) & (si['highvolume'] == 1)]
    
    # High volume services online end to end (onlineE2E == 1)
    highvol_E2E = fiscal_year_data[fiscal_year_data['onlineE2E'] == 1]['service_id'].nunique()
    
    # High volume online enabled NA services where online_enabledNA < 6
    highvol_enabledNA = fiscal_year_data[fiscal_year_data['online_enabledNA'] < 6]['service_id'].nunique()
    
    # Calculate the share of high volume services online end to end
    if highvol_enabledNA != 0:
        high_vol_E2E = (highvol_E2E / highvol_enabledNA) * 100
    else:
        high_vol_E2E = 0  # If no services are online-enabled, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, high_vol_E2E])

# Convert results to DataFrame
high_vol_E2E_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_High_Volume_Services_Online_End-to-End_(%)'])

# Display the resulting table
print(high_vol_E2E_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_High_Volume_Services_Online_End-to-End_(%)  
0                                          32.323232    
1                                          51.020408    
2                                          39.639640    
3                                          55.140187    
4                                          52.941176    
5                                          56.122449    
6                                           0.000000    
7                                           0.000000    


In [None]:
# metric 18: share of external high volume services which have at least one point online 
# Convert the 'onl_morepoints' column to numeric values (in case it's stored as strings)
si['onl_morepoints'] = pd.to_numeric(si['onl_morepoints'], errors='coerce')

# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and external high volume services
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1) & (si['highvolume'] == 1)]
    
    # High volume services with at least one point online (onl_morepoints == 1)
    highvol_1point_count = fiscal_year_data[fiscal_year_data['onl_morepoints'] == 1]['service_id'].nunique()
    
    # High volume services (all)
    highvol_all_count = fiscal_year_data['service_id'].nunique()
    
    # Calculate the share of high volume services with at least one point online
    if highvol_all_count != 0:
        share_highvol_1point = (highvol_1point_count / highvol_all_count) * 100
    else:
        share_highvol_1point = 0  # If no high volume services, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, share_highvol_1point])

# Convert results to DataFrame
highvol_1point_share_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_High_Volume_Services_with_At_Least_One_Point_Online_(%)'])

# Display the resulting table
print(highvol_1point_share_table)

                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_High_Volume_Services_with_At_Least_One_Point_Online_(%)  
0                                          71.794872                 
1                                          76.724138                 
2                                          76.190476                 
3                                          77.235772                 
4                                          79.104478                 
5                                          75.213675                 
6                                           0.000000                 
7                                           0.000000                 


In [None]:
# metric 19: high volume services meeting service standards
# Create a list to store results
results = []

# Loop through each fiscal year
for fiscal_year in si['fiscal_yr'].unique():
    # Filter for the current fiscal year and external high volume services
    fiscal_year_data = si[(si['fiscal_yr'] == fiscal_year) & (si['external'] == 1) & (si['highvolume'] == 1)]
    
    # High volume services that meet service standards (STDS_metsome == 1)
    high_vol_ser_metstds_count = fiscal_year_data[fiscal_year_data['STDS_metsome'] == 1]['service_id'].nunique()
    
    # High volume services (all)
    highvol_all_count = fiscal_year_data['service_id'].nunique()
    
    # Calculate the share of high volume services meeting service standards
    if highvol_all_count != 0:
        high_vol_ser_metstds = (high_vol_ser_metstds_count / highvol_all_count) * 100
    else:
        high_vol_ser_metstds = 0  # If no high volume services, set share to 0
    
    # Append the result for the fiscal year
    results.append([fiscal_year, high_vol_ser_metstds])

# Convert results to DataFrame
high_vol_ser_metstds_table = pd.DataFrame(results, columns=['Fiscal_Year', 'Share_of_High_Volume_Services_Meeting_Service_Standards_(%)'])

# Display the resulting table
print(high_vol_ser_metstds_table)


                     Fiscal_Year  \
0                      2018-2019   
1                      2022-2023   
2                      2019-2020   
3                      2020-2021   
4                      2021-2022   
5                      2023-2024   
6                      2024-2025   
7  Timestamp:2025-04-01_14:23:25   

   Share_of_High_Volume_Services_Meeting_Service_Standards_(%)  
0                                          44.444444            
1                                          54.310345            
2                                          49.206349            
3                                          45.528455            
4                                          39.552239            
5                                          52.136752            
6                                           0.000000            
7                                           0.000000            
