In [4]:
import os
import sys
from dotenv import load_dotenv
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

scripts_path = os.getcwd()
env_file_path = os.path.join(scripts_path, 'env.txt')
load_dotenv(env_file_path)
main_path = os.getenv("MAIN_PATH")

sys.path.append(main_path)
data_path = os.path.join(main_path, 'data')

In [181]:
samples_file = os.path.join(data_path, 'EW_BusinessXai_v2.xlsx')
samples = pd.read_excel(samples_file, sheet_name = 'EW_MODULI_SCORE_CORPORATE')
predictions = pd.read_excel(samples_file, sheet_name = 'EW_SCORE_CORPORATE')
kpi_map = pd.read_excel(os.path.join(data_path, 'mapping descrizioni KPI_v6.1.xlsx'))
kpi_map = kpi_map.rename(columns={'ID':'KPI'})
kpi_map = kpi_map[kpi_map.NOME_COLONNA_OUTPUT=='VALORE'].drop(['TIPOLOGIA_OUTPUT','COL_TO_TRANSPOSE','VALUE','CONSTANT_TO_ADD','TO_DROP','DESCRIZIONE','DATA_INIZIO_VALIDITA','DATA_FINE_VALIDITA'], axis = 1)

weights = pd.read_excel(os.path.join(data_path, 'logit_weights.xlsx'), sheet_name='weights')
weights = weights.set_index('Variabile').join(kpi_map[(kpi_map.KPI.isin(samples.KPI_CD.unique()))].set_index('CAMPO')).reset_index().sort_values(['MODELLO','MODULO_DS']).set_index('KPI')

data = samples.pivot(index = 'ID', columns = 'KPI_CD', values='VALORE_QT')

In [None]:
logit = lambda coefs, intercept, samples: 1/(1+np.exp(-1*np.matmul(coefs,samples.T)+intercept))

In [185]:
model = 'CORPORATE' 
module = 'Andamentale Interno'

module_weights = weights[(weights.MODELLO == model) & (weights.MODULO_DS == module)]
intercept = module_weights[module_weights.Variabile == 'intercept'].Coefficiente.values[0]
coefs = module_weights[module_weights.Variabile!='intercept'].Coefficiente.sort_index()

input_data = data.loc[:,coefs.index]

In [186]:
logit(coefs.values.reshape(1,-1), intercept, input_data.values)

array([[0.98897997, 0.99129118]])

In [111]:
lr = LogisticRegression()
lr.coef_ = coefs.values
lr.intercept_ = intercept

In [199]:
contrib = lambda coefs, intercept, samples: (logit(coefs, intercept, samples)* 1/(1+np.exp(1*np.matmul(coefs,samples.T)+intercept))).T*samples
contrib(coefs.values.reshape(1,-1), intercept, input_data.values)

array([[-0.57195239,  0.2482833 , -0.01197736, -0.29806582, -0.04801041,
        -0.57395842],
       [-0.57859581, -0.03660696,  0.19790218, -0.30152795, -0.04856807,
         0.03541411]])

In [187]:
contrib = lambda coefs, samples: coefs * samples
contrib(coefs.values, input_data.values)

array([[ 0.49364184,  0.31837781, -0.24095205,  0.14342043,  0.1247628 ,
         0.46582615,  0.27976271, -0.13957967],
       [ 0.49364184,  0.43579543,  0.24929573,  0.14342043,  0.1247628 ,
         0.09587228,  0.27976271, -0.13957967]])

In [189]:
module = 'Centrale Rischi'

module_weights = weights[(weights.MODELLO == model) & (weights.MODULO_DS == module)]
intercept = module_weights[module_weights.Variabile == 'Intercept'].Coefficiente.values[0]
coefs = module_weights[module_weights.Variabile!='Intercept'].Coefficiente.sort_index()

input_data = data.loc[:,coefs.index]
logit(coefs.values.reshape(1,-1), intercept, input_data.values)

array([[0.98530215, 0.98802965]])

In [190]:
module = 'Bilanci'
module_data = samples[(samples.MODELLO == model) & (samples.MODULO_DS == module)].pivot(index = 'ID', columns = 'KPI_CD', values='VALORE_QT')

module_weights = weights[(weights.MODELLO == model) & (weights.MODULO_DS == module)]
intercept = module_weights[module_weights.Variabile == 'Intercept'].Coefficiente.values[0]
coefs = module_weights[module_weights.Variabile!='Intercept'].Coefficiente.sort_index()

input_data = data.loc[:,coefs.index]
logit(coefs.values.reshape(1,-1), intercept, input_data.values)

array([[0.97059142, 0.96280157]])