# Quality assurance and automated service data review

This notebook reviews published service data for common mistakes. Relies on gc-service-data-script outputs to function.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Import service inventory and service standards to dataframes
si = pd.read_csv('si.csv', sep=';')
ss = pd.read_csv('ss.csv', sep=';')

# Extract date of generation from timestamp on last line
date = pd.to_datetime(si.iloc[-1, 0].split(': ')[1].split('_')[0])

# Remove last line with datestamp from dataframes
si = si.iloc[:-1]
ss = ss.iloc[:-1]


In [3]:
# Record is reported for a fiscal year that is incomplete or in the future.
si['fiscal_yr_end_date'] = pd.to_datetime(si['fiscal_yr'].str.split('-').str[1]+'-04-01')
si['qa_fiscal_yr_in_future'] = si['fiscal_yr_end_date'] >= date

In [4]:
# Record has contradiction between client feedback channels and online interaction points for feedback
si['qa_client_feedback_contradiction'] = (

    # Service accepts client feedback via the online channel (ONL) but online issue resolution or feedback is not applicable or not activated
    (
        si['client_feedback_channel'].str.contains('ONL') & 
        (
            si['os_issue_resolution_feedback'].isna() | 
            (si['os_issue_resolution_feedback'] == 'N')
        )
    ) |
    # Service has not listed the online channel (ONL) for client feedback but online issue resolution or feedback is activated
    (
        (~si['client_feedback_channel'].str.contains('ONL')) &
        (si['os_issue_resolution_feedback'] == 'Y')
    )
)

# si[['client_feedback_channel', 'os_issue_resolution_feedback', 'client_feedback_contradiction']].loc[si['client_feedback_contradiction'] == True]

In [5]:
# Service standards have volume but no volume indicated at service level
ss_vol_by_service = (
    ss.groupby(['fiscal_yr', 'service_id'])['total_volume']
    .sum()
    .reset_index()
    .rename(columns={'total_volume':'total_volume_ss'})
)

si = (
    si.merge(ss_vol_by_service, on=['fiscal_yr', 'service_id'], how='left')
    .fillna({'total_volume_ss': 0})
)

si['qa_ss_vol_without_si_vol'] = (
    (si['total_volume_ss'] > 0) & (si['num_applications_total'] == 0)
)

In [6]:
# Compare service id's to service names over time