In [1]:
import pandas as pd
import numpy as np
import re
import pytz
import os
from pathlib import Path

In [2]:
# Define the base directory
base_dir = Path.cwd()
parent_dir = base_dir.parent

# File paths for outputs
data_files = {
    "si": parent_dir / "outputs" / "si.csv",
    "ss": parent_dir / "outputs" / "ss.csv",
    "qa_issues": parent_dir / "src" / "qa_issues_descriptions.csv",
    "dept": parent_dir / "outputs" / "utils" / "dept.csv"
}

# File paths for inputs
data_files.update({
    "rbpo": parent_dir / "inputs" / "rbpo.csv",
    "org_var": parent_dir / "inputs" / "org_var.csv",
    "ifoi_en": parent_dir / "inputs" / "ifoi_en.csv",
    "ifoi_fr": parent_dir / "inputs" / "ifoi_fr.csv"
})

# Load data into dataframes
si = pd.read_csv(data_files["si"], sep=';', na_values=[], keep_default_na=False)
ss = pd.read_csv(data_files["ss"], sep=';', na_values=[], keep_default_na=False)

In [101]:
# biggest swings in service volumes
sv = si.loc[:,['fiscal_yr', 'org_id','department_en', 'service_id', 'num_applications_total']]
sv = sv.iloc[:-1] #cut last row, is timestamp

sv['num_applications_total'] = pd.to_numeric(sv['num_applications_total'])

sv = sv.pivot_table(
    values='num_applications_total', 
    index=['org_id','department_en', 'service_id'], 
    columns='fiscal_yr',
    aggfunc='sum')

sv = sv.fillna(0).reset_index()

sv_diff = sv.iloc[:,:-1]

sv_diff['diff_latest'] = sv_diff.iloc[:, -1]-sv_diff.iloc[:, -2]

sv_diff = sv_diff.sort_values(by='diff_latest', ascending=False)


sv_diff

fiscal_yr,org_id,department_en,service_id,2018-2019,2019-2020,2020-2021,2021-2022,2022-2023,2023-2024,diff_latest
2211,46,Canada Revenue Agency,3728,0.0,0.0,0.0,0.0,0.0,45126410.0,45126410.0
1706,26,Canada Border Services Agency,669,57653256.0,55801982.0,8678302.0,23381642.0,70361874.0,89154252.0,18792378.0
1609,228,National Research Council Canada,1677,0.0,0.0,0.0,0.0,57000000.0,69000000.0,12000000.0
2173,46,Canada Revenue Agency,1110,1525.0,1035.0,16908.0,60202.0,131499.0,6142697.0,6011198.0
1724,26,Canada Border Services Agency,728,21739228.0,20207714.0,19892542.0,21910803.0,22976516.0,25317100.0,2340584.0
...,...,...,...,...,...,...,...,...,...,...
2174,46,Canada Revenue Agency,1111,43107477.0,39999259.0,42481757.0,39880293.0,40803035.0,39760412.0,-1042623.0
1848,282,Public Service Commission of Canada,1195,1000000.0,488504.0,1242967.0,1128947.0,1128947.0,10234.0,-1118713.0
1494,151,Financial Consumer Agency of Canada,1726,0.0,0.0,10243558.0,9830975.0,10104531.0,2504.0,-10102027.0
2224,46,Canada Revenue Agency,SRV03577,0.0,0.0,0.0,0.0,32992344.0,0.0,-32992344.0
