<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [12]</a>'.</span>

<img src="../images/AzPTravel_PPM.png">

## Validation Report Creation Script

#### This script reads the output of the "us_vdf*" and "us_vcomments*" data files and summarizes  them into a report: Validations Review _{BU}.xlsx

#### When finished, it will be able to be run on its own or as part of the pipeline wherever called (ie from a papermill script or other user inteface).


### User Variables
- These are overwritten if inherited from run_control.ipynb.
- Feel Free to reset them for a manual run if you like
- Do not save without percode = "-f"

In [1]:
commit_message = "Development and testing."
# Give a brief reason for the run.

run_control = 1
#run_type = 0 - Lite run with no reporting, not recommended.
#run_type = 1 - Lite run with normal reporting, default setting.
#run_type = 2 - Heavy run with full reporting, available for audits and troubleshooting.
#run_type = 5 - A default setting. Indicates the script is being run by an outside process without an inherited value

percode = "2021.Q1"
# Data Collection Code, this controls file paths and output names
# "-f" is the value indicating a bad inheritance from run with arg

s_format = "p"
# denotes the source data format x == Excel; j == json, p == parquet

#----------
# do not edit - this either inherits the full instance timestamp from the papermill book or captures the run time of this script.
from datetime import datetime  # datetime options
inst_datetime = datetime.now().strftime("%m%d%Y%H%M%S")

In [2]:
# Parameters
run_control = 1
percode = "2021.Q1"
commit_message = "New AUS needed to be converted from json"
inst_datetime = "05262021102758"


#### Notebook display options

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### import packages

In [4]:
#### Packages used

import os # System commands
import sys # System commands

import warnings # custom warnigns options

import glob # Directory operations
import getpass # Work with parquet
import json # Work with json

import matplotlib.pyplot as plt #Plots and Graphs
import numpy as np # Series and math
import pandas as pd #DataFrame and math

#excel operations
import re
import nicexcel as nl # Excel operations
import xlsxwriter # Excel operations
import openpyxl # Excel operations


#### Default Variables, these govern logic, do not edit.

In [5]:
default_dc = "20XX.QX"
default_rc = 0 #extra lite mode
dummy_perc = "33Q3" # bad inheritance

#### Script determining run context ie, manual, run_control.ipynb, or other.

In [6]:
if run_control == 5:
    run_control = default_rc 
else:
    run_control = run_control

try:
    if sys.argv[1] == "-f":
        percode = percode
    else:
        percode = sys.argv[1]

except IndexError:
    percode = default_dc
except NameError:
    percode = default_dc


#### style settings

In [7]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


## Begin Input Read
- Make paths used in script
- all paths should be modular and easily replaced ahead of operations.

In [8]:
#root directory
rt_path = f'//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/{str(percode)}'
#live sources directory
ls_path = os.path.join( rt_path, 'live_sources')
#report ouput dir
rep_path = os.path.abspath(os.path.join(rt_path, '..','..',f"{percode}", 'Validation Reports'))
#report ouput archive dir
rep_arch_path = os.path.abspath(os.path.join(rep_path, '..','..',f"{percode}",  'Archive'))


#### Make sources used in script
- all sources should be modular and easily replaced ahead of operations.

In [9]:
rep_xls = os.path.join(rep_path ,"Validations Review_xx.xlsx"   )

#### Get a list of only source files in the path that start with "us_dat".
#### Logic determines the source file types.

#### User instructions:
- Make sure that you have 1 file per source in this folder.
    -  For instance, do not have two files for Portugal. If there is an update, archive the old one.
- Do not overwrite files in the archive.
    - Rename newly archived files, no strict convention, we keep track of these by the modified date.
- It is ok to have multiple sources in one file.

In [10]:
files = os.listdir(ls_path)
files = [files.lower() for files in files]

files_sour = [f for f in files if f[-5:]  == '.json' or  f[-8:] == '.parquet' and f[:7] != 'us_orig']

files_sour

['us_dat_au_05142021143612.parquet',
 'us_dat_ca_05202021134242.parquet',
 'us_dat_ch_05182021192455.parquet',
 'us_dat_cz_05262021090243.parquet',
 'us_dat_es_05202021105141.parquet',
 'us_dat_gr_05252021115404.parquet',
 'us_dat_it_05242021111520.parquet',
 'us_dat_pl_05262021084450.parquet',
 'us_dat_pt_05202021121056.parquet',
 'us_orig_au_05142021143611.json',
 'us_survey_au_02172021174808.parquet',
 'us_survey_au_05142021143750.parquet',
 'us_survey_ca_05202021135610.parquet',
 'us_survey_ch_05182021192905.parquet',
 'us_survey_cz_05262021090440.parquet',
 'us_survey_es_05202021105431.parquet',
 'us_survey_gr_05252021115827.parquet',
 'us_survey_it_05242021111759.parquet',
 'us_survey_pl_05262021084708.parquet',
 'us_survey_pt_05202021121304.parquet',
 'us_vcomments_au_02172021174434.parquet',
 'us_vcomments_au_02172021174440.parquet',
 'us_vcomments_au_05142021143612.parquet',
 'us_vcomments_au_05142021143617.parquet',
 'us_vcomments_au_mnual_creation.parquet',
 'us_vcomments_ca

#### Make a list of BUs that have comments available

In [11]:
list_jfiles = glob.glob(os.path.join(ls_path, 'us_vcomments_*.json'))
list_pfiles = glob.glob(os.path.join(ls_path, 'us_vcomments_*.parquet'))

list_files = []
    
for i in list_jfiles : list_files.append(i)    
for i in list_pfiles : list_files.append(i)


for idx, i in enumerate(list_jfiles):
    list_jfiles[idx] = list_jfiles[idx][-23:-19]
    list_jfiles[idx] = list_jfiles[idx].replace('_','')

for idx, i in enumerate(list_pfiles):
    list_pfiles[idx] = list_pfiles[idx][-26:-22]
    list_pfiles[idx] = list_pfiles[idx].replace('_','')

list_bus = []
    
for i in list_jfiles : list_bus.append(i)    
for i in list_pfiles : list_bus.append(i)


BU_set = set(list_bus)
BU_list = list(BU_set)

list_files


['//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_AU_02172021174434.parquet',
 '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_AU_02172021174440.parquet',
 '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_AU_05142021143612.parquet',
 '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_AU_05142021143617.parquet',
 '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_AU_mnual_creation.parquet',
 '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [12]:

datafiles = [  "us_vcomment", "us_vdf" ]


for idx, b in enumerate(BU_list):

    list_files_a = []
    latest_valcomments = []
    list_files_a = list(filter(lambda x: "us_vcomments" in  x and f"_{b}" in  x , list_files))

    print(list_files_a)
    recent_vers_a = max(list_files_a, key=os.path.getctime)
    try:
        latest_valcomments.append([f'us_vcomment',pd.read_json(recent_vers_a, orient="table")])
    except UnicodeDecodeError:
        latest_valcomments.append([f'us_vcomment',pd.read_parquet(recent_vers_a, engine="pyarrow")])

    list_files_b = glob.glob(os.path.join(ls_path, f'us_vdf_{BU_list[idx]}*.json'))
    recent_vers_b = max(list_files_b, key=os.path.getctime)

    with open(recent_vers_b, 'r') as fp:
        data_dict = json.load(fp)
    latest_vdfdata_sets = { key: pd.DataFrame(data_dict[key]) for key in data_dict  }

    valcomm_dict = {}
    valdf_dict = {}
    valcomdf = []
    valcomdf = pd.DataFrame(latest_valcomments[0][1])

    for idx2, row in valcomdf.iterrows():

        valname = row["Validation Rule"][0:25]
        sheetname = valname + '-' + str(idx2)
        valcomm_dict["{0}".format(sheetname)] = pd.DataFrame(row).reindex()
        try:
            valdf_dict[sheetname] = data_dict[str(idx2)]
        except KeyError:
            pass
        
    writer = pd.ExcelWriter(rep_xls, engine='xlsxwriter')
    
    for i, j  in zip(valdf_dict,valcomm_dict ):
        valdf_dict[i] = pd.DataFrame(valdf_dict[i])
        valdf_dict[i].index.name = 'Row Number'
        valcomm_dict[i].to_excel(writer, sheet_name=i,header=False, startrow=0, startcol=0)
        valdf_dict[i].to_excel(writer, sheet_name= i , startrow=10, startcol=0)
    
    fin_name = rep_xls.replace("_xx",f"_{BU_list[idx]}")
    arch_name = fin_name.replace(f"Validations Review_{BU_list[idx]}",f"\Archive\Validations Review_{BU_list[idx]}{inst_datetime}")
    
    try:
        os.remove(arch_name)
    except FileNotFoundError:
        pass
    try:
        os.rename(fin_name, arch_name )
    except FileNotFoundError:
        print(f"New BU {BU_list[idx]}")
        pass

    writer.save()
    os.rename(rep_xls,rep_xls.replace("_xx",f"_{BU_list[idx]}"))


['//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_PT_05202021121056.parquet']


['//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_ES_05202021105141.parquet', '//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_ES_05202021105142.parquet']


['//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/2021.Q1\\live_sources\\us_vcomments_PL_05262021084450.parquet']


FileCreateError: [Errno 13] Permission denied: '\\\\hecate\\Insurance_US\\Product Development\\Product Management\\Global PPM\\Reporting\\Data Collection\\2021.Q1\\Validation Reports\\Validations Review_xx.xlsx'

In [None]:
### Open the folder with the documents saved

In [None]:
os.startfile(rep_path)