In [1]:
import pandas as pd
import numpy as np
import re
import pytz
import os
from pathlib import Path
import sys
sys.path.append("/home/jovyan/shared/service-data")
import requests

from src.clean import clean_percentage, normalize_string, standardize_column_names, clean_fiscal_yr
from src.load import load_csv_from_raw
from src.export import export_to_csv
from src.merge import merge_si, merge_ss

base_dir = Path.cwd()
parent_dir = base_dir.parent

In [38]:
url = 'https://open.canada.ca/data/en/recombinant-published-schema/service.json'
response = requests.get(url)
data = response.json()

dd = pd.json_normalize(data)

dd_r = dd.explode('resources')
dd_r = pd.json_normalize(dd_r['resources'])
dd_r = dd_r.explode('fields').reset_index().drop(columns=['index'])

resource_prefix_cols = dd_r.columns[1:] # all but the first one
dd_r = dd_r.rename(columns=lambda col: 'resource_' + col if col in resource_prefix_cols else col)

dd_rf = pd.json_normalize(dd_r['resource_fields'])
dd_rf = dd_rf.loc[:, ~dd_rf.columns.str.startswith('choices.')]

field_prefix_cols = dd_rf.columns
dd_rf = dd_rf.rename(columns=lambda col: 'field_' + col if col in field_prefix_cols else col)

dd_r = dd_r.drop(columns=['resource_fields'])
dd_r = dd_r.merge(dd_rf, left_index=True, right_index=True)

dd_r = standardize_column_names(dd_r)

dd_r

Unnamed: 0,resource_name,resource_title_en,resource_title_fr,field_id,field_datastore_type,field_obligation_en,field_obligation_fr,field_label_en,field_label_fr,field_description_en,field_description_fr,field_validation_en,field_validation_fr,field_character_limit
0,service,Service Identification Information & Metrics,Information et paramètres sur l’identification...,fiscal_yr,text,Mandatory,Obligatoire,Fiscal Year,Année financière,Identifies the fiscal year (April 1 to March 3...,Indique l'exercice financier (1 avril au 31 ma...,This field must not be empty.,Ce champ ne doit pas être vide.,
1,service,Service Identification Information & Metrics,Information et paramètres sur l’identification...,service_id,text,Mandatory,Obligatoire,Service ID Number,Numéro d'identification du service,The unique number assigned to a service in the...,Le numéro unique attribué à un service dans le...,This field must not be empty.\nThis field cann...,Ce champ ne doit pas être vide.\nCe champ ne p...,
2,service,Service Identification Information & Metrics,Information et paramètres sur l’identification...,service_name_en,text,Mandatory,Obligatoire,Service Name (English),Nom du service (en anglais),Identifies the official name of the service.,Indique le nom officiel du service.,This field must not be empty.\nThis field has ...,Ce champ ne doit pas être vide.\nCe champ ne p...,350.0
3,service,Service Identification Information & Metrics,Information et paramètres sur l’identification...,service_name_fr,text,Mandatory,Obligatoire,Service Name (French),Nom du service (en français),Identifies the official name of the service.,Indique le nom officiel du service.,This field must not be empty.\nThis field has ...,Ce champ ne doit pas être vide.\nCe champ ne p...,350.0
4,service,Service Identification Information & Metrics,Information et paramètres sur l’identification...,service_description_en,text,Mandatory,Obligatoire,Service Description (English),Description du service (en anglais),"Provides a brief description of the service, i...","Offre une brève description du service, en lan...",This field must not be empty.\nThis field has ...,Ce champ ne doit pas être vide.\nCe champ ne p...,1800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,service-std,Service Standards & Performance Results,Normes de service et résultats du rendement,standards_targets_uri_fr,text,Mandatory,Obligatoire,URL to Service Standards and Targets (French),URL vers les normes relatives aux services et ...,Identifies the departmental webpage (Canada.ca...,Indique la page Web ministérielle (Canada.ca) ...,This field must not be empty.\nThis field has ...,Ce champ ne doit pas être vide.\nCe champ ne p...,1500.0
66,service-std,Service Standards & Performance Results,Normes de service et résultats du rendement,performance_results_uri_en,text,Optional,Facultatif,URL to Real-Time Performance Results (English),URL aux résultats de rendement en temps réel (...,Identifies the departmental webpage where the ...,Identifie la site Web du service sur laquelle ...,This field has a maximum length of 1500 charac...,Ce champ ne peut excéder une longueur maximale...,1500.0
67,service-std,Service Standards & Performance Results,Normes de service et résultats du rendement,performance_results_uri_fr,text,Optional,Facultatif,URL to Real-Time Performance Results (French),URL aux résultats de rendement en temps réel (...,Identifies the departmental webpage where the ...,Identifie la site Web du service sur laquelle ...,This field has a maximum length of 1500 charac...,Ce champ ne peut excéder une longueur maximale...,1500.0
68,service-std,Service Standards & Performance Results,Normes de service et résultats du rendement,owner_org,text,Mandatory,Obligatoire,Department ID,ID du ministère,Identifies the applied ID of the department or...,Identifie la ID d'usage du ministère ou de l'o...,,,


In [3]:
# si_2018 = load_csv_from_raw('si_2018.csv')

# si_2018.iloc[:, 19:24].head()

In [4]:
# si = merge_si()
# ss = merge_ss()

In [5]:
# si = si.loc[si['service_scope_ext_or_ent']]

# si_inscope = si.set_index(['fiscal_yr','service_id'])

# ss_inscope = ss.set_index(['fiscal_yr', 'service_id']).merge(si_inscope['service_scope_ext_or_ent'], how='left', left_index=True, right_index=True)
# ss_inscope = ss_inscope.loc[ss_inscope['service_scope_ext_or_ent'].notna()].reset_index()


In [6]:
# ss = ss_inscope

In [7]:
# # MAF Question 1: Existence of service standards
# # As service standards are required under the Policy on Service and Digital, what is the percentage of services that have service standards?

# # Select relevant columns from service inventory
# maf1 = si.loc[:, ['fiscal_yr', 'service_id', 'department_en','department_fr', 'org_id']]

# # Deduplicate service standards to prevent one-to-many expansion
# ss_unique = ss[['fiscal_yr', 'service_id']].drop_duplicates()

# # Determine whether each service has a standard by checking for existence in 'service standards'
# # Merge with 'ss' to check if (fiscal_yr, service_id) exists
# maf1 = maf1.merge(
#     ss_unique,  # Use de-duplicated version to check
#     on=['fiscal_yr', 'service_id'],  # Merge on fiscal year and service ID
#     how='left',  # Keep all 'maf1' records, add matches from 'ss'
#     indicator=True  # Adds a column "_merge" to show if a match was found
# )

# # Create boolean column: True if the service exists in 'ss', otherwise False
# maf1['service_std_tf'] = maf1['_merge'] == 'both'

# # Drop the '_merge' column (no longer needed)
# maf1 = maf1.drop(columns=['_merge'])

# # Group by department and fiscal year, counting services with and without standards
# maf1 = maf1.groupby(['fiscal_yr', 'department_en', 'department_fr', 'org_id']).agg(
#     service_with_std_count=('service_std_tf', 'sum'),  # Count services that have standards (True = 1)
#     service_count_maf1=('service_id', 'count')  # Count all services
# ).reset_index()

# maf1['maf1_score'] = (maf1['service_with_std_count']/maf1['service_count_maf1'])*100

In [8]:
# maf2 = ss.loc[:, ['fiscal_yr', 'service_standard_id', 'department_en','department_fr', 'org_id', 'target_met']].dropna()
    
# maf2_num = maf2[maf2['target_met']=='Y'].groupby(['fiscal_yr', 'department_en','department_fr', 'org_id'])['service_standard_id'].count().reset_index()
# maf2_denom = maf2.groupby(['fiscal_yr', 'department_en','department_fr', 'org_id'])['service_standard_id'].count().reset_index()

# maf2 = pd.merge(
#     maf2_num,
#     maf2_denom,
#     suffixes=['_met','_total'],
#     on=['fiscal_yr', 'department_en','department_fr', 'org_id'],
#     how='outer'
# )

# maf2['maf2_score'] = (maf2['service_standard_id_met']/maf2['service_standard_id_total'])*100

# maf2.loc[maf2['fiscal_yr'] == '2023-2024', ['service_standard_id_total']].sum()

In [9]:


# ifoi_en = pd.read_csv(parent_dir / "inputs" / "ifoi_en.csv")
# ifoi_fr = pd.read_csv(parent_dir / "inputs" / "ifoi_fr.csv")

# ifoi_en = ifoi_en.set_index(ifoi_en.columns[0], drop=True).add_suffix('_en')
# ifoi_fr = ifoi_fr.set_index(ifoi_fr.columns[0], drop=True).add_suffix('_fr')

# ifoi_en.set_index(ifoi_en.columns[0], drop=True)
# ifoi_fr.set_index(ifoi_fr.columns[0], drop=True)

# ifoi = pd.concat([ifoi_en, ifoi_fr], axis=1)

# # Extract column lists
# en_cols = ifoi_en.columns.tolist()
# fr_cols = ifoi_fr.columns.tolist()

# # Interleave them by index
# merged_cols = [col for pair in zip(en_cols, fr_cols) for col in pair]

# # Apply new column order
# ifoi = standardize_column_names(ifoi[merged_cols].reset_index())

# ifoi