<img src="../images/AzPTravel_PPM.png">

## Finance Comparison Report

#### This script reads the output of the Upload Assistant and summarizes all finance views with comments along with each report.

#### This can be run on its own or as part of the pipeline wherever called (ie from a papermill script or other user inteface).

### User Variables
- These are overwritten if inherited from run_control.ipynb.
- Feel Free to reset them for a manual run if you like
- Do not save without percode = "-f"

In [37]:
commit_message = "Development and testing."
# Give a brief reason for the run.

run_control = 1
#run_type = 0 - Lite run with no reporting, not recommended.
#run_type = 1 - Lite run with normal reporting, default setting.
#run_type = 2 - Heavy run with full reporting, available for audits and troubleshooting.
#run_type = 5 - A default setting. Indicates the script is being run by an outside process without an inherited value

percode = "2021.Q1"
# Data Collection Code, this controls file paths and output names
# "-f" is the value indicating a bad inheritance from run with arg

s_format = "p"
# denotes the source data format x == Excel; j == json, p == parquet

#----------
# do not edit - this either inherits the full instance timestamp from the papermill book or captures the run time of this script.
from datetime import datetime
inst_datetime = datetime.now().strftime("%m%d%Y%H%M%S")

#### Notebook display options

In [38]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### import packages

In [39]:
#### Packages used

import os
import sys

from datetime import datetime
import warnings

import glob
import getpass
import json

import matplotlib.pyplot as plt
import openpyxl
import numpy as np
import pandas as pd

import re
import nicexcel as nl
import xlsxwriter

global df

#### Default Variables, these govern logic, do not edit.

In [40]:
default_dc = "20XX.QX"
default_rc = 0 #extra lite mode
dummy_perc = "33Q3" # bad inheritance

#### Script determining run context ie, manual, run_control.ipynb, or other.

In [41]:
if run_control == 5:
    run_control = default_rc 
else:
    run_control = run_control

try:
    if sys.argv[1] == "-f":
        percode = percode
    else:
        percode = sys.argv[1]

except IndexError:
    percode = default_dc
except NameError:
    percode = default_dc


#### style settings

In [42]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### Make paths for the source folder

In [43]:
rt_path = f'//hecate/Insurance_US/Product Development/Product Management/Global PPM/Reporting/Data Collection/Production/{str(percode)}'
ls_path = os.path.join( rt_path, 'live_sources')
rep_path = os.path.abspath(os.path.join(rt_path, '..','..',f"{percode}", 'Standard Reports'))
rep_arch_path = os.path.abspath(os.path.join(rt_path, '..','..',f"{percode}", 'Standard Reports', 'Archive'))


#### Get a list of only source files in the path that start with "us_dat".
#### Logic determines the source file types.

#### User instructions:
- Make sure that you have 1 file per source in this folder.
    -  For instance, do not have two files for Portugal. If there is an update, archive the old one.
- Do not overwrite files in the archive.
    - Rename newly archived files, no strict convention, we keep track of these by the modified date.
- It is ok to have multiple sources in one file.

In [44]:
files = os.listdir(ls_path)
files = [files.lower() for files in files]

files_sour = [f for f in files if f[-5:]  == '.json' or  f[-8:] == '.parquet' and f[:7] != 'us_orig']

files_sour

['us_dat_nz+_05142021114427.parquet',
 'us_dat_nz+_05162021153809.parquet',
 'us_dat_us_05122021094008.parquet',
 'us_survey_us_05122021094728.parquet',
 'us_vcomments_nz+_05142021114428.json',
 'us_vcomments_nz+_05142021114428.parquet',
 'us_vcomments_nz+_05162021153810.parquet',
 'us_vcomments_us_05122021094708.json',
 'us_vcomments_us_05122021094708.parquet',
 'us_vdf_nz+_05142021114428.json',
 'us_vdf_nz+_05162021153810.json',
 'us_vdf_us_05122021094708.json']

In [45]:
list_jfiles = glob.glob(os.path.join(ls_path, 'us_vcomments_*.json'))
list_pfiles = glob.glob(os.path.join(ls_path, 'us_vcomments_*.parquet'))

for idx, i in enumerate(list_jfiles):
    list_jfiles[idx] = list_jfiles[idx][-23:-19]
    list_jfiles[idx] = list_jfiles[idx].replace('_','')

for idx, i in enumerate(list_pfiles):
    list_pfiles[idx] = list_pfiles[idx][-26:-22]
    list_pfiles[idx] = list_pfiles[idx].replace('_','')

list_files = []
    
for i in list_jfiles : list_files.append(i)    
for i in list_pfiles : list_files.append(i)


BU_set = set(list_files)
BU_list = list(BU_set)

BU_list


['US', 'NZ+']

In [46]:
files_cmnts = [i for i in files_sour if "vcomments" in i  and "parq" in i ]

files_cmnts

['us_vcomments_nz+_05142021114428.parquet',
 'us_vcomments_nz+_05162021153810.parquet',
 'us_vcomments_us_05122021094708.parquet']

In [47]:
cols = ['Business Unit', 'Validation Rule', 'Comments', 'Row Counts', 'Check Type']


latest_valcomments = pd.DataFrame(columns = cols)


for idx, i in enumerate(BU_list):
    list_files_a = glob.glob(os.path.join(ls_path, f'us_vcomments_{i}*.parquet'))
    recent_vers_a = max(list_files_a, key=os.path.getctime)
    idf  =  pd.read_parquet(recent_vers_a, engine = "pyarrow")
    idf.insert(0, "Business Unit",BU_list[idx])
    latest_valcomments = latest_valcomments.append(idf)

latest_valcomments = latest_valcomments.reset_index(drop=True)

latest_valcomments

Unnamed: 0,Business Unit,Validation Rule,Comments,Row Counts,Check Type
0,US,Finance Reporting.,Please add any commentary on differences from ...,1,Written Revenue
1,US,Finance Reporting.,Please add any commentary on differences from ...,1,Earned Revenue
2,US,Finance Reporting.,Please add any commentary on differences from ...,1,Loss Ratio
3,US,Finance Reporting.,Please add any commentary on differences from ...,1,Commission Ratio
4,US,Finance Reporting.,Please add any commentary on differences from ...,1,Expense Ratio
5,US,Reporting Date From,Please replace this text with an explanation o...,10,Row Check
6,US,Units of Risk (Earned),Please replace this text with an explanation o...,2,Row Check
7,US,Number of Policies (Earned),Please replace this text with an explanation o...,12,Row Check
8,US,Reporting Date To,Please replace this text with an explanation o...,10,Row Check
9,US,Date of Analysis,Please replace this text with an explanation o...,10,Row Check


In [48]:
latest_vdfdata_sets = []

for idx, i in enumerate(BU_list):
    list_files_b = glob.glob(os.path.join(ls_path, f'us_vdf_{i}*.json'))
    recent_vers_b = max(list_files_b, key=os.path.getctime)
    with open(recent_vers_b, 'r') as fp:
        data_dict = json.load(fp)
        dfslist =   { key: pd.DataFrame(data_dict[key]) for key in data_dict  }
        for i in dfslist: latest_vdfdata_sets.append(dfslist[i])

latest_vdfdata_sets


[         Country (LC) Submission Written Revenue net of Taxes  \
 0  United States                                      29,302    
 
   (EUR) Submission Written Revenue net of Taxes  \
 0                                       24,832    
 
   (EUR) Finance Sales Gross Written (EUR) Difference  
 0                      145,310,310     -145,285,478   ,
          Country (LC): Submission Earned Revenues net of Taxes  \
 0  United States                                       22,054    
 
   (EUR): Submission Earned Revenues net of Taxes (EUR) Finance Net Earnings  \
 0                                        18,690                 98,303,270    
 
   (EUR) Difference  
 0     -98,284,580   ,
          Country Submission Loss Ratio Finance Loss Ratio Difference (%)
 0  United States                 19.5%              18.4%           1.1%,
          Country Submission Commission Ratio Finance Commission Ratio  \
 0  United States                       26.9%                    47.7%   
 
   Di

In [49]:
latest_valcomments["Applies to"] = latest_valcomments["Business Unit"]
latest_valcomments

Unnamed: 0,Business Unit,Validation Rule,Comments,Row Counts,Check Type,Applies to
0,US,Finance Reporting.,Please add any commentary on differences from ...,1,Written Revenue,US
1,US,Finance Reporting.,Please add any commentary on differences from ...,1,Earned Revenue,US
2,US,Finance Reporting.,Please add any commentary on differences from ...,1,Loss Ratio,US
3,US,Finance Reporting.,Please add any commentary on differences from ...,1,Commission Ratio,US
4,US,Finance Reporting.,Please add any commentary on differences from ...,1,Expense Ratio,US
5,US,Reporting Date From,Please replace this text with an explanation o...,10,Row Check,US
6,US,Units of Risk (Earned),Please replace this text with an explanation o...,2,Row Check,US
7,US,Number of Policies (Earned),Please replace this text with an explanation o...,12,Row Check,US
8,US,Reporting Date To,Please replace this text with an explanation o...,10,Row Check,US
9,US,Date of Analysis,Please replace this text with an explanation o...,10,Row Check,US


In [50]:
for idx, i in enumerate(latest_valcomments["Business Unit"]):
    if "+" in  latest_valcomments["Business Unit"][idx]:
        latest_valcomments.iat[idx , 5]  = "Multiple Business Units"
    else:
        latest_valcomments.iat[idx , 5]   = "Business Unit Only"


In [51]:
for idx, i  in  enumerate(latest_vdfdata_sets):
            c = len(i.columns)
            i.insert(c , "Submission Comment", latest_valcomments["Comments"][idx])
            i.insert(c +1, "Applies to", latest_valcomments["Applies to"][idx])

print(latest_vdfdata_sets)

[         Country (LC) Submission Written Revenue net of Taxes  \
0  United States                                      29,302    

  (EUR) Submission Written Revenue net of Taxes  \
0                                       24,832    

  (EUR) Finance Sales Gross Written (EUR) Difference  \
0                      145,310,310     -145,285,478    

                                  Submission Comment          Applies to  
0  Please add any commentary on differences from ...  Business Unit Only  ,          Country (LC): Submission Earned Revenues net of Taxes  \
0  United States                                       22,054    

  (EUR): Submission Earned Revenues net of Taxes (EUR) Finance Net Earnings  \
0                                        18,690                 98,303,270    

  (EUR) Difference                                 Submission Comment  \
0     -98,284,580   Please add any commentary on differences from ...   

           Applies to  
0  Business Unit Only  ,          Coun

In [52]:
wrcols = ['Country', '(LC) Submission Written Revenue net of Taxes',
       '(EUR) Submission Written Revenue net of Taxes',
       '(EUR) Finance Sales Gross Written', '(EUR) Difference', "Submission Comment","Applies to" ]

ercols = ['Country', '(LC): Submission Earned Revenues net of Taxes',
       '(EUR): Submission Earned Revenues net of Taxes',
       '(EUR) Finance Net Earnings', '(EUR) Difference', "Submission Comment","Applies to" ]

lrcols = ['Country', 'Submission Loss Ratio', 'Finance Loss Ratio',
       'Difference (%)', "Submission Comment","Applies to" ]

crcols = ['Country', 'Submission Commission Ratio', 'Finance Commission Ratio',
       'Difference (%)', "Submission Comment","Applies to" ]

etcols = ['Country', 'Submission Expense Ratio', 'Finance Expense Ratio',
       'Difference (%)', "Submission Comment","Applies to" ]

Written_Revenue_Report = pd.DataFrame(columns = wrcols )
Earned_Revenue_Report = pd.DataFrame(columns = ercols )
Loss_Ratio_Report = pd.DataFrame(columns = lrcols )
Commission_Ratio_Report = pd.DataFrame(columns = crcols )
Expense_Ratio_Report = pd.DataFrame(columns = etcols )


for idx, i in enumerate(latest_vdfdata_sets):
    try:
        Written_Revenue_Report = Written_Revenue_Report.append(i[wrcols])
    except:
        pass
    try:
        Earned_Revenue_Report = Earned_Revenue_Report.append(i[ercols])
    except:
        pass
    try:
        Loss_Ratio_Report = Loss_Ratio_Report.append(i[lrcols])
    except:
        pass
    try:
        Commission_Ratio_Report = Commission_Ratio_Report.append(i[crcols])
    except:
        pass
    try:
        Expense_Ratio_Report = Expense_Ratio_Report.append(i[etcols])
    except:
        pass

In [53]:
fin_rep_dict = { "Written Revenue Comparison" : Written_Revenue_Report , "Earned Revenue Comparison" : 
                Earned_Revenue_Report , "Loss Ratio Comparison" : Loss_Ratio_Report,
                "Commission Ratio Comparison" : Commission_Ratio_Report, "Expense Ratio Comparison"  :
                Expense_Ratio_Report  }

fin_rep_dict.keys()

dict_keys(['Written Revenue Comparison', 'Earned Revenue Comparison', 'Loss Ratio Comparison', 'Commission Ratio Comparison', 'Expense Ratio Comparison'])

In [54]:
filename =  os.path.join(rep_path, "Finance Comparison.xlsx")
archname = os.path.join(rep_path,rep_arch_path ,f"Finance Comparison_{inst_datetime}.xlsx")

os.copyfile(filename, archname)

while not os.path.isfile(filename):
    df = pd.DataFrame(fin_rep_dict['Written Revenue Comparison'])
    df.to_excel(filename, sheet_name='Written Revenue Comparison') 

    
    
writer = pd.ExcelWriter( filename, engine='xlsxwriter')

for k, v  in fin_rep_dict.items():
    v = pd.DataFrame(v)
    b = pd.DataFrame({k: k}, index = [0])
    b.to_excel(writer, sheet_name=k,header=False, startrow=3, startcol=4 , index = False)
    v.to_excel(writer, sheet_name= k , startrow=7, startcol=0 , index = False)

writer.save()

print(filename)



AttributeError: module 'os' has no attribute 'copyfile'