In [1]:
import pandas as pd
import numpy as np
import re
import pytz
import os
from pathlib import Path
import sys
sys.path.append("/home/jovyan/shared/service-data")
import requests

from src.clean import clean_percentage, normalize_string, standardize_column_names, clean_fiscal_yr
from src.load import load_csv_from_raw
from src.export import export_to_csv
from src.merge import merge_si, merge_ss

base_dir = Path.cwd()
parent_dir = base_dir.parent

In [42]:
url = 'https://open.canada.ca/data/en/recombinant-published-schema/service.json'
response = requests.get(url)
data = response.json()

data_dict = pd.json_normalize(data)
data_dict = data_dict.explode('resources').reset_index(drop=True)
data_dict = pd.json_normalize(data_dict['resources'])
data_dict = data_dict.explode('fields').reset_index(drop=True)

ddf = pd.json_normalize(data_dict['fields'])
ddf = ddf.merge(data_dict, left_index=True, right_index=True)

ddf = ddf.melt(
    id_vars = ['resource_name', 'title.en', 'title.fr','id','label.en', 'label.fr'], 
    value_vars=[col for col in ddf.columns if col.startswith('choices.')]
)

ddf.dropna(subset=['value'], inplace=True)

ddf['code'] = ddf['variable'].str.split('.').str[1]
ddf['en_fr'] = ddf['variable'].str.split('.').str[2]
ddf = ddf.dropna(subset='en_fr')


ddf_pivot = ddf.pivot(index=['resource_name', 'id', 'code'], columns='en_fr', values='value')
ddf_pivot = ddf_pivot.reset_index()

ddf_pivot = ddf_pivot.rename(columns={'id':'field', 'resource_name':'table'})

ddf_pivot.info()




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149 entries, 0 to 1148
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   table   1149 non-null   object
 1   field   1149 non-null   object
 2   code    1149 non-null   object
 3   en      1149 non-null   object
 4   fr      1149 non-null   object
dtypes: object(5)
memory usage: 45.0+ KB


In [46]:
url = 'https://open.canada.ca/data/en/recombinant-published-schema/service.json'
response = requests.get(url)
data = response.json()

data_dict = pd.json_normalize(data)
data_dict = data_dict.explode('resources').reset_index(drop=True)
data_dict = pd.json_normalize(data_dict['resources'])
data_dict = data_dict.explode('fields').reset_index(drop=True)

data_dict_fields = pd.json_normalize(data_dict['fields'])
data_dict = data_dict.merge(data_dict_fields, left_index=True, right_index=True)

# prep dictionary
dd_field_names = data_dict.loc[:, ~data_dict.columns.str.startswith('choices.')].drop(columns=['fields'])

# prep choices file
dd_choices = data_dict.melt(
    id_vars = ['resource_name', 'title.en', 'title.fr','id','label.en', 'label.fr'], 
    value_vars=[col for col in data_dict.columns if col.startswith('choices.')]
)

dd_choices.dropna(subset=['value'], inplace=True)

dd_choices['code'] = dd_choices['variable'].str.split('.').str[1]
dd_choices['en_fr'] = dd_choices['variable'].str.split('.').str[2]
dd_choices = dd_choices.dropna(subset='en_fr')


dd_choices = dd_choices.pivot(index=['resource_name', 'id', 'code'], columns='en_fr', values='value')
dd_choices = dd_choices.reset_index()

# Standardize column names
dd_field_names = standardize_column_names(dd_field_names)
dd_choices = standardize_column_names(dd_choices)


en_fr,resource_name,id,code,en,fr
0,service,automated_decision_system,N,No,Non
1,service,automated_decision_system,Y,Yes,Oui
2,service,client_feedback_channel,EML,Email,Courriel
3,service,client_feedback_channel,FAX,Fax,Télécopieur
4,service,client_feedback_channel,NON,No feedback collected,Aucune rétroaction possible
...,...,...,...,...,...
1144,service-std,target_met,Y,Yes,Oui
1145,service-std,type,ACS,Access,Accès
1146,service-std,type,ACY,Accuracy,Exactitude
1147,service-std,type,OTH,Other,Autre


In [None]:
# si_2018 = load_csv_from_raw('si_2018.csv')

# si_2018.iloc[:, 19:24].head()

In [None]:
# si = merge_si()
# ss = merge_ss()

In [None]:
# si = si.loc[si['service_scope_ext_or_ent']]

# si_inscope = si.set_index(['fiscal_yr','service_id'])

# ss_inscope = ss.set_index(['fiscal_yr', 'service_id']).merge(si_inscope['service_scope_ext_or_ent'], how='left', left_index=True, right_index=True)
# ss_inscope = ss_inscope.loc[ss_inscope['service_scope_ext_or_ent'].notna()].reset_index()


In [None]:
# ss = ss_inscope

In [None]:
# # MAF Question 1: Existence of service standards
# # As service standards are required under the Policy on Service and Digital, what is the percentage of services that have service standards?

# # Select relevant columns from service inventory
# maf1 = si.loc[:, ['fiscal_yr', 'service_id', 'department_en','department_fr', 'org_id']]

# # Deduplicate service standards to prevent one-to-many expansion
# ss_unique = ss[['fiscal_yr', 'service_id']].drop_duplicates()

# # Determine whether each service has a standard by checking for existence in 'service standards'
# # Merge with 'ss' to check if (fiscal_yr, service_id) exists
# maf1 = maf1.merge(
#     ss_unique,  # Use de-duplicated version to check
#     on=['fiscal_yr', 'service_id'],  # Merge on fiscal year and service ID
#     how='left',  # Keep all 'maf1' records, add matches from 'ss'
#     indicator=True  # Adds a column "_merge" to show if a match was found
# )

# # Create boolean column: True if the service exists in 'ss', otherwise False
# maf1['service_std_tf'] = maf1['_merge'] == 'both'

# # Drop the '_merge' column (no longer needed)
# maf1 = maf1.drop(columns=['_merge'])

# # Group by department and fiscal year, counting services with and without standards
# maf1 = maf1.groupby(['fiscal_yr', 'department_en', 'department_fr', 'org_id']).agg(
#     service_with_std_count=('service_std_tf', 'sum'),  # Count services that have standards (True = 1)
#     service_count_maf1=('service_id', 'count')  # Count all services
# ).reset_index()

# maf1['maf1_score'] = (maf1['service_with_std_count']/maf1['service_count_maf1'])*100

In [None]:
# maf2 = ss.loc[:, ['fiscal_yr', 'service_standard_id', 'department_en','department_fr', 'org_id', 'target_met']].dropna()
    
# maf2_num = maf2[maf2['target_met']=='Y'].groupby(['fiscal_yr', 'department_en','department_fr', 'org_id'])['service_standard_id'].count().reset_index()
# maf2_denom = maf2.groupby(['fiscal_yr', 'department_en','department_fr', 'org_id'])['service_standard_id'].count().reset_index()

# maf2 = pd.merge(
#     maf2_num,
#     maf2_denom,
#     suffixes=['_met','_total'],
#     on=['fiscal_yr', 'department_en','department_fr', 'org_id'],
#     how='outer'
# )

# maf2['maf2_score'] = (maf2['service_standard_id_met']/maf2['service_standard_id_total'])*100

# maf2.loc[maf2['fiscal_yr'] == '2023-2024', ['service_standard_id_total']].sum()

In [None]:


# ifoi_en = pd.read_csv(parent_dir / "inputs" / "ifoi_en.csv")
# ifoi_fr = pd.read_csv(parent_dir / "inputs" / "ifoi_fr.csv")

# ifoi_en = ifoi_en.set_index(ifoi_en.columns[0], drop=True).add_suffix('_en')
# ifoi_fr = ifoi_fr.set_index(ifoi_fr.columns[0], drop=True).add_suffix('_fr')

# ifoi_en.set_index(ifoi_en.columns[0], drop=True)
# ifoi_fr.set_index(ifoi_fr.columns[0], drop=True)

# ifoi = pd.concat([ifoi_en, ifoi_fr], axis=1)

# # Extract column lists
# en_cols = ifoi_en.columns.tolist()
# fr_cols = ifoi_fr.columns.tolist()

# # Interleave them by index
# merged_cols = [col for pair in zip(en_cols, fr_cols) for col in pair]

# # Apply new column order
# ifoi = standardize_column_names(ifoi[merged_cols].reset_index())

# ifoi