# Input job parameter

In [1]:
import json 

job_para = json.load(open('data/pat_job_test.json'))
job_para

{'portfolio_name': 'PAT_Test_AIG',
 'type_of_rating': 'PSOLD',
 'coverage': 'Building + Contents + Time Element',
 'peril_subline': 'All_Perils',
 'subject_premium': 100000000.0,
 'loss_alae_ratio': 1,
 'average_accident_date': '1/1/2022',
 'trend_factor': 1.035,
 'additional_coverage': 2,
 'deductible_treatment': 'Retains Limit',
 'server': 'DFWCAT-RMS5SQL1',
 'edm_database': 'RMS_EPL_AIG_PAT_Testing_202107_EDM181',
 'rdm_database': 'RMS_EPL_AIG_PAT_Testing_202107_RDM181',
 'portinfoid': 6,
 'perilid': 5,
 'analysisid': 5}

# Create job

In [2]:
import numpy as np
import pandas as pd

from pat import PatJob, PatFlag

job = PatJob(job_para)
job.extract_edm_rdm()

# Check data

In [3]:
job.check_data()

# Net of Fac

In [4]:
# Splitting PsuedoPolicies by FAC Layering
if len(job.df_fac) > 0:
    job.net_of_fac()

# Allocate with PSOLD

In [5]:
job.allocate_with_psold()

# Comparison

In [12]:
df1 = job.df_pat.reset_index(drop=True)

df2 =pd.read_csv(r'C:\_Working\PAT_20201019\__temp\dfpat.csv',dtype={
                     'LocationIDStack': str
                 }, index_col=0).reset_index(drop=True)
df2.PseudoPolicyID = df2.PseudoPolicyID.str.replace(' _ ', '_').reset_index(drop=True) 
df1.columns = df2.columns


# df1 = df1.sort_values(by=['PseudoPolicyID','LocationIDStack','PseudoLayerID']).reset_index(drop=True)
# df2 = df2.sort_values(by=['PseudoPolicyID','LocationIDStack','PseudoLayerID']).reset_index(drop=True)
# df1 = df1.sort_values(by=['PseudoPolicyID']).reset_index(drop=True)
# df2 = df2.sort_values(by=['PseudoPolicyID']).reset_index(drop=True)
# df2 = df2.sort_values(by=['PseudoPolicyID','FacAttachment', 'FacLimit','FacCeded']).reset_index(drop=True)

# df1 = df1.sort_values(by=['PseudoPolicyID','Retention']).reset_index(drop=True)
# df2 = df2.sort_values(by=['PseudoPolicyID','Retention']).reset_index(drop=True)


print(df1.shape, df2.shape)
pd.DataFrame(df1.dtypes,columns=['DF1']).join(pd.DataFrame(df2.dtypes,columns=['DF2']),how='outer')

(101757, 13) (101757, 13)


Unnamed: 0,DF1,DF2
Limit,float64,float64
Retention,float64,float64
Allocated.Premium,float64,float64
Participation,float64,float64
LossRatio,float64,int64
AOI,float64,float64
LocationIDStack,object,object
Rating.Group,int64,int64
Original.Policy.ID,int64,int64
PseudoPolicyID,object,object


In [13]:
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype

cmp =pd.DataFrame(columns=['name','type', 'match','df1_nan','df2_nan'])
for c in df1.columns:
    if is_string_dtype(df1[c]):
        if np.all(df1[c].fillna(value="") != df2[c].fillna(value="")):
            cmp = cmp.append({'name': c,'type':df1[c].dtype,'match':'NO'}, ignore_index=True)
        else:
            cmp = cmp.append({'name': c,'type':df1[c].dtype,'match':'YES'}, ignore_index=True)
    elif is_numeric_dtype(df1[c]):
        dif =np.max(np.abs(df1[c].astype('float') - df2[c].astype('float')) / np.maximum(df1[c].astype('float'), df2[c].astype('float')) )
        cmp = cmp.append({'name': c,'type':df1[c].dtype,'match':dif,
            'df1_nan':np.sum(np.isnan(df1[c])),
            'df2_nan':np.sum(np.isnan(df2[c]))
        }, ignore_index=True)
    else:
        cmp = cmp.append({'name': c,'type':df1[c].dtype,'match':'UNK'}, ignore_index=True)

cmp

Unnamed: 0,name,type,match,df1_nan,df2_nan
0,Limit,float64,0.0,0.0,0.0
1,Retention,float64,0.0,0.0,0.0
2,Allocated.Premium,float64,0.000122,72.0,72.0
3,Participation,float64,0.0,0.0,0.0
4,LossRatio,float64,0.0,0.0,0.0
5,AOI,float64,0.0,0.0,0.0
6,LocationIDStack,object,YES,,
7,Rating.Group,int64,0.0,0.0,0.0
8,Original.Policy.ID,int64,0.0,0.0,0.0
9,PseudoPolicyID,object,YES,,


# Test Area

In [None]:
job.get_validation_counts()

df_a, df_b, df_c =job.get_validation_data()
print(df_a.shape, df_b.shape, df_c.shape)

df_b.sort_values(['LocationIDStack', 'AOI']).to_csv(r'C:\_Working\PAT_20201019\__temp\loc_correction.csv',index=False)