In [None]:
import numpy as np
import pandas as pd
import itertools
import csv
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
#from sklearn.datasets import load_digits, make_moons, make_blobs
#from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, precision_recall_curve
#import matplotlib.pylab as pylab
#from matplotlib.colors import ListedColormap
import torch.distributions
import configparser
config = configparser.ConfigParser()
config.read('config_parameters.ini')
from loader_LSTM import format_and_label, rearrange_columns, get_dataloaders


torch.manual_seed(2019)
np.random.seed(2019)

data_valid = pd.read_csv('data_valid.csv', sep=';')
data_test = pd.read_csv('data_test.csv', sep=';')

In [None]:
from LSTM_model import LSTM_Model, train, save_model
net = LSTM_Model()
net.load_state_dict(torch.load("weights/Weight_LSTM_Model_2048_0.2.pt"))
net.eval()

In [None]:
scale_param = pd.read_csv('scale_param.csv', sep=';')
amount_mean = scale_param['amount_mean'].values[0] 
amount_std = scale_param['amount_std'].values[0]

In [None]:
def pred_dev(data, data_copy): #to predict development
    #data_copy with unstandardized payments
    data_label = format_and_label(data) #data with labeled columns to be recognized by the LSTM
    x, y = rearrange_columns(data_label) #identify input and target
    with torch.no_grad():
        Ind_pay_pred, pay_pred = net.predict(x) #get prediction
    print(Ind_pay_pred.shape)
    # arrange the tensors dimensions
    Ind_pay_pred = Ind_pay_pred.reshape(len(data), 11).numpy()
    pay_pred = pay_pred.reshape(len(data), 11).numpy()
    dev = np.arange(1, 12)
    columns_name_1 = ['Ind_Pay_pred_' + str(i) for i in dev]
    columns_name_2 = ['Pay_pred_' + str(i) for i in dev]
    data_1 = pd.DataFrame(data=Ind_pay_pred, index=range(len(data)), dtype=np.float64, columns=columns_name_1)
    data_2 = pd.DataFrame(data=pay_pred, index=range(len(data)), dtype=np.float64, columns=columns_name_2)
    data_2 = pd.concat([data_1, data_2], axis=1)
    data_v1 = pd.concat([data, data_2], axis=1) #data with predicted payments and probabilities
    
    # inverse standardize payment
    for i in range(11):
        pay_pred[:, i] = (pay_pred[:, i] * amount_std) + amount_mean
    data_2 = pd.DataFrame(data=pay_pred, index=range(len(data)), dtype=np.float64, columns=columns_name_2)
    data_2 = pd.concat([data_1, data_2], axis=1)
    data_v2 = pd.concat([data_copy, data_2], axis=1) #data with unstandardized predicted payment

    return data_v1, data_v2 

# -----------------------------------------------
# data_valid
data_valid_copy = pd.read_csv('data_valid_copy.csv', sep=';')
data_valid_v1, data_valid_v2 = pred_dev(data_valid, data_valid_copy)
# data_test
data_test_copy = pd.read_csv('data_test_copy.csv', sep=';')
data_test_v1, data_test_v2 = pred_dev(data_test, data_test_copy)



## Data preparation for comparison plots

In [None]:
# define column name lists
dev = np.arange(0,12)

Ind_pay_pred = ["Ind_Pay_0"] + ['Ind_Pay_pred_'+str(i) for i in dev[1:]]
Ind_pay = ['Ind_Pay_'+str(i) for i in dev[1:]]
Ind_pay_true = ['Ind_Pay_0']+[ x + '_true' for x in Ind_pay ]

pay_pred = ["Pay00"] + ['Pay_pred_'+str(i) for i in dev[1:]]
pay =  ['Pay_'+str(i) for i in dev[1:]]
pay_true = ["Pay00"]+[ x + '_true' for x in pay ]

In [None]:
data_valid_brute = pd.read_csv('./Datasets/Simulated_Cashflow_valid_brute_LSTM.csv', sep=';')
data_v = data_valid_brute.loc[:,pay_true+Ind_pay_true]

data_test_brute = pd.read_csv('./Datasets/Simulated_Cashflow_test_brute_LSTM.csv', sep=';')
data_t = data_test_brute.loc[:,pay_true+Ind_pay_true]


In [None]:
data_valid_comp = pd.concat([data_valid_v2, data_v], axis=1)
data_test_comp = pd.concat([data_test_v2, data_t], axis=1)

select_columns = Ind_pay_true + Ind_pay_pred + pay_true + pay_pred
data_v_plot = data_valid_comp[select_columns]
data_t_plot = data_test_comp[select_columns]

## Plots 

In [None]:
from matplotlib.ticker import ScalarFormatter

class ScalarFormatterClass(ScalarFormatter):
    def _set_format(self):
        self.format = "%1.1f"

In [None]:
fig = plt.figure()
plt.rcParams['figure.figsize'] = [15,12]
plt.rcParams["font.weight"] = "bold"

fontdict={'fontsize': 25,
          'weight' : 'bold'}

fontdicty={'fontsize': 18,
          'weight' : 'bold',
          'verticalalignment': 'baseline',
          'horizontalalignment': 'center'}

fontdictx={'fontsize': 18,
          'weight' : 'bold',
          'horizontalalignment': 'center'}
plt.subplots_adjust(wspace=0.4, hspace=0.3)

range_i = [0,1,2,4,6,8,10]
axe_y_name = ["$\hat{Y}_2$","$\hat{Y}_3$","$\hat{Y}_5$","$\hat{Y}_7$","$\hat{Y}_9$","$\hat{Y}_{11}$"]
axe_x_name = ["$Y_2$","$Y_3$","$Y_5$","$Y_7$","$Y_9$","$Y_{11}$"]


print("payment columns are indexed 0 to 11 for periods 1 to 12")
for i in range(1,len(range_i)):
    ax = fig.add_subplot(3,3,i)
    subdata = data_t_plot.loc[data_t_plot.loc[:,pay_true[range_i[i]]]!=0]
    ax.scatter(pay_true[range_i[i]],pay_pred[range_i[i]],data=subdata, alpha =0.2 )
    ax.plot(ax.get_ylim(),ax.get_ylim(), color="black")
    print(pay_true[range_i[i]]) #payment columns are indexed 0 to 11 for periods 1 to 12
    yScalarFormatter = ScalarFormatterClass(useMathText=True)
    yScalarFormatter.set_powerlimits((0,0))
    ax.yaxis.set_major_formatter(yScalarFormatter)
    ax.xaxis.set_major_formatter(yScalarFormatter)
    ax.tick_params(axis='x',labelsize=13)
    ax.tick_params(axis='y',labelsize=13)
    ax.set_xlabel(axe_x_name[i-1], fontdict=fontdicty, position=(0.5,0.5), fontsize = 17)
    ax.set_ylabel(axe_y_name[i-1], fontdict=fontdicty, position=(0,0.5), fontsize = 17)
    ax.xaxis.labelpad=17
    if i==6:
        plt.xlim(-30000,50000)


In [None]:
fig = plt.figure()
plt.rcParams['figure.figsize'] = [15,3.5]
plt.rcParams["font.weight"] = "bold"

fontdict={'fontsize': 25,
          'weight' : 'bold'}

fontdicty={'fontsize': 18,
          'verticalalignment': 'baseline',
          'horizontalalignment': 'center'}

fontdictx={'fontsize': 18,
          'horizontalalignment': 'center'}
plt.subplots_adjust(wspace=0.3, hspace=0.4)
green_diamond = dict(markerfacecolor="lightcoral", marker='D',markersize=2)

range_i = [0,1,2,4,8] 
axe_y_name = ["$\hat{p}_2$","$\hat{p}_3$","$\hat{p}_5$","$\hat{p}_9$"]
axe_x_name = ["         $I_2$","         $I_3$","         $I_5$","         $I_9$"]

print("Indicator columns are indexed 0 to 11 for periods 1 to 12")

for i in range(1,len(range_i)): 
    ax = fig.add_subplot(1,4,i)  
    data_a = data_v_plot[data_v_plot.loc[:,Ind_pay_true[range_i[i]]]==0] 
    data_b = data_v_plot[data_v_plot.loc[:,Ind_pay_true[range_i[i]]]==1]
    plt.boxplot(data_a.loc[:,Ind_pay_pred[range_i[i]]],flierprops=green_diamond,widths=0.6,positions=[1],boxprops=dict(facecolor="orange",color="black"),patch_artist=True)
    plt.boxplot(data_b.loc[:,Ind_pay_pred[range_i[i]]],flierprops=green_diamond,widths=0.6,positions=[5],boxprops=dict(facecolor="orange",color="black"),patch_artist=True)
    
    data_a = data_t_plot[data_t_plot.loc[:,Ind_pay_true[range_i[i]]]==0]
    data_b = data_t_plot[data_t_plot.loc[:,Ind_pay_true[range_i[i]]]==1]
    plt.boxplot(data_a.loc[:,Ind_pay_pred[range_i[i]]],flierprops=green_diamond,widths=0.6,positions=[3],boxprops=dict(facecolor="skyblue",color="black"),patch_artist=True)
    plt.boxplot(data_b.loc[:,Ind_pay_pred[range_i[i]]],flierprops=green_diamond,widths=0.6,positions=[7],boxprops=dict(facecolor="skyblue",color="black"),patch_artist=True)
    print(Ind_pay_pred[range_i[i]],Ind_pay_true[range_i[i]])
    plt.axvline(x=4,color="black")
    ax.set_xticklabels(["    " + axe_x_name[i-1]+"=0", " " + axe_x_name[i-1]+"=1","", ""],rotation=0, fontsize = 15)
    ax.set_ylabel(axe_y_name[i-1], fontsize = 15)

In [None]:
fig = plt.figure()
plt.rcParams['figure.figsize'] = [15,7]
plt.rcParams["font.weight"] = "bold"

fontdict={'fontsize': 25,
          'weight' : 'bold'}

fontdicty={'fontsize': 18,
          'weight' : 'bold',
          'verticalalignment': 'baseline',
          'horizontalalignment': 'center'}

fontdictx={'fontsize': 18,
          'weight' : 'bold',
          'horizontalalignment': 'center'}
plt.subplots_adjust(wspace=0.3, hspace=0.4)

range_i = [0,1,2,4,8]

titles = ["$I_2$","$I_3$","$I_5$","$I_9$"]

for i in range(1,len(range_i)):
    ax = fig.add_subplot(2,4,i)
    
    y_val = data_v_plot.loc[:,Ind_pay_true[range_i[i]]]
    y_proba_val = data_v_plot.loc[:,Ind_pay_pred[range_i[i]]]
    fpr_rf, tpr_rf, thresholds = roc_curve(y_val, y_proba_val)

    ax.plot([0, 1], [0, 1], 'k--')
    ax.plot(fpr_rf, tpr_rf, label='MT',color="orange",linewidth=4)
    
    y_val = data_t_plot.loc[:,Ind_pay_true[range_i[i]]]
    y_proba_val = data_t_plot.loc[:,Ind_pay_pred[range_i[i]]]
    fpr_rf, tpr_rf, thresholds = roc_curve(y_val, y_proba_val)

    ax.plot([0, 1], [0, 1], 'k--')
    ax.plot(fpr_rf, tpr_rf, label='MT',linewidth=2.5)
    
    ax.set_xlabel('False positive rate',fontsize=14)
    ax.set_ylabel('True positive rate',fontsize=14)
    ax.set_title('ROC curve of ' +titles[i-1],fontsize=14)


## Expected predicted payments

In [None]:
ESP_pay = ['E_pay_pred_' +str(i) for i in dev[1:]]
for i in range(11):
    data_v_plot[ESP_pay[i]] = data_v_plot[pay_pred[i+1]]*data_v_plot[Ind_pay_pred[i+1]]
    data_t_plot[ESP_pay[i]] = data_t_plot[pay_pred[i+1]]*data_t_plot[Ind_pay_pred[i+1]]

In [None]:
index_pay = list(range(27,39))
index_pay_pred = list(range(52,63))

def reserve_data(data_name,data_plot):
    for i in range(len(data_name)):
        dd = data_name.loc[i, 'Nb_Dev']
        if dd == 12:
            index_to_keep = index_pay
        else:
            index_to_keep = index_pay[0:dd] + index_pay_pred[(dd-1):11]
        data_plot.loc[i, 'Pay_Ult_Pred'] = data_plot.iloc[i, index_to_keep].sum()
        index_to_keep = index_pay[0:dd]
        data_plot.loc[i, 'PAID_AT_ED'] = data_plot.iloc[i, index_to_keep].sum()
    #verification
    print(data_plot[data_name['Nb_Dev']==12]['Pay_Ult_Pred'].sum(), data_plot[data_name['Nb_Dev']==12]['PAID_AT_ED'].sum())
    data_plot['Pay_Ult_true'] = data_plot.iloc[:, index_pay].sum(axis=1)
    data_plot['Pred_res'] = data_plot['Pay_Ult_Pred']-data_plot['PAID_AT_ED']
    data_plot['true_res'] = data_plot['Pay_Ult_true']-data_plot['PAID_AT_ED']
    data_plot['Nb_Dev']= data_name['Nb_Dev']
    return data_plot

In [None]:
data_v_res = reserve_data(data_valid,data_v_plot)

In [None]:
data_t_res = reserve_data(data_test,data_t_plot)

In [None]:
def ratio(data):
    data_res= data[data['Nb_Dev']!=12]
    print("ratio reserve", data_res['Pred_res'].sum()/data_res['true_res'].sum())
    print("ratio ultime", data_res['Pay_Ult_Pred'].sum()/data_res['Pay_Ult_true'].sum())

In [None]:
ratio(data_v_res)

In [None]:
ratio(data_t_res)

In [None]:
fig = plt.figure()
plt.rcParams['figure.figsize'] = [15,12]
plt.rcParams["font.weight"] = "bold"

fontdict={'fontsize': 25,
          'weight' : 'bold'}

fontdicty={'fontsize': 18,
          'weight' : 'bold',
          'verticalalignment': 'baseline',
          'horizontalalignment': 'center'}

fontdictx={'fontsize': 18,
          'weight' : 'bold',
          'horizontalalignment': 'center'}
plt.subplots_adjust(wspace=0.4, hspace=0.3)
data_res = data_t_res
data_res['LoB'] = data_test['LoB']
data_res = data_res[data_res['Nb_Dev']!=12]

for i in range(1,5):
    ax = fig.add_subplot(2,2,i)
    subdata= data_res[data_res['LoB']==i]
    ax.scatter(subdata['true_res'],subdata['Pred_res'], alpha=0.3 )
    ax.plot(ax.get_ylim(),ax.get_ylim(), color="black")
    ax.set_title('Line of business ' + str(i),fontsize = 16,fontdict=fontdicty)
    ax.set_ylabel("Expected reserve", position=(0,0.5), fontsize = 18)
    ax.set_xlabel("Observed reserve", position=(0.5,0), fontsize = 18)
    ax.xaxis.labelpad=15