In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import feature_column
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import log_loss
from DenseFeatureMixer import DenseFeatureMixer
from enum import Enum

In [2]:
df = pd.read_excel('Datasets\prestamos.xlsx')
df.drop(['Descripción CIIU','ID','Fecha','Próxima cuota'], axis=1, inplace=True)
df.rename(columns={'Capital en moneda de origen': 'capital','Plazo':'plazo','Cód CIIU':'ciiu','Capital en pesos': 'capital_pesos','Moneda':'dolares','Tipo de operación':'tipo_operacion','Tasa de interés':'tasa_interes'}, inplace=True)

In [3]:
df['capital'] = df['capital']*-1
df['capital_pesos'] = df['capital_pesos']*-1
df = df[df['tipo_operacion'].isin(['Préstamo plazo fijo','Préstamo amortizable'])]

In [4]:
df['dolares'].replace({2222:1},inplace=True)
df['Vencido'].replace({'NO':0,'SI':1},inplace=True)
df['tipo_operacion'].replace({'Préstamo plazo fijo':1,'Préstamo amortizable':2},inplace=True)
df['moroso'] = np.where(df['Días vencido'] >=30 , 1, 0)

In [5]:
dict_plazos = {
   'Entre 91 y 180 días': 3,
   'Entre 181 y 366 días':4,
   'Entre 31 y 90 días':2,
   'Mayores de 3 años':6,
   'Entre 1 y 3 años':5,
   'Menores de 30 días': 1,
   0:0
}

df['plazo'].replace(dict_plazos,inplace=True)
df.reset_index(inplace=True,drop=True)


In [6]:
df.rename(columns={'moroso':'output_1'}, inplace=True)
df.head()

Unnamed: 0,dolares,capital,capital_pesos,ciiu,Vencido,Días vencido,tipo_operacion,tasa_interes,plazo,output_1
0,1,76000.0,2248156.0,71110,0,0,1,7.0,2,0
1,1,50000.0,1479050.0,51430,0,0,1,7.5,2,0
2,1,40000.0,1183240.0,51430,0,0,1,7.5,3,0
3,1,33000.0,976173.0,52310,0,0,1,5.8,2,0
4,1,20000.0,591620.0,72900,1,11,1,8.0,2,0


In [7]:
x = df.drop(['output_1','Días vencido'],axis=1)
y = df['output_1']

In [8]:
cat = ['plazo','ciiu']
cat.sort()
num = ['capital_pesos','tasa_interes','capital']

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [10]:
emb = DenseFeatureMixer()
fl = emb.fit(x_train,y_train,num,cat,dim=2)

Epoch 1/20
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
61/61 - 1s - loss: 8017249280.0000 - accuracy: 0.4923 - 771ms/epoch - 13ms/step
Epoch 2/20
61/61 - 0s - loss: 4342791168.0000 - accuracy: 0.5082 - 82ms/epoch - 1ms/step
Epoch 3/20
61/61 - 0s - loss: 3743936000.0000 - accuracy: 0.5031 - 86ms/epoch - 1ms/step
Epoch 4/20
61/61 - 0s - loss: 2982135808.0000 - accuracy: 0.5051 - 82ms/epoch - 1ms/step
Epoch 5/20
61/61 - 0s - loss: 2336510464.0000 - accuracy: 0.4964 - 81ms/epoch - 1ms/step
Epoch 6/20
61/61 - 0s - loss: 2078374528.0000 - accuracy: 0.4619 - 82ms/epoch - 1ms/step
Epoch 7/20
61/61 - 0s - loss: 1167692672.0000 - accuracy: 0.5051 - 82ms/epoch - 1ms/step
Epoch 8/20
61/61 - 0s - loss: 1103905920.0000 - accuracy: 0.5149 - 81ms/epoch - 1ms/step
Epoch 9/20
61/61 - 0s - loss: 707388224.0000 - accuracy: 0.4815 - 84ms/epoch - 1ms/step
Epoch 10/20
61/61 - 0s - loss: 825771584.0000 - accuracy: 0.5123 - 86ms/epoch - 1ms/step

In [11]:
nn = emb.transform(x_train)

In [12]:
nn.head()

Unnamed: 0,dolares,capital,capital_pesos,ciiu,Vencido,tipo_operacion,tasa_interes,plazo,ciiu_embedding_0,ciiu_embedding_1,plazo_embedding_0,plazo_embedding_1
1473,1,40000.0,1222120.0,29300,1,1,6.2,3,0.349339,0.637272,-0.408447,0.145236
1567,1,40000.0,1328560.0,51220,0,1,5.75,4,-0.411168,0.752558,-0.408447,0.145236
324,1,10000.0,305830.0,51502,0,1,6.4,2,0.072546,0.214251,-0.042658,0.255313
1343,1,30000.0,857880.0,93090,0,1,8.0,4,-0.085189,-0.030009,-0.408447,0.145236
430,1,57000.0,1641486.0,51430,0,1,6.0,2,-0.208908,-0.835183,-0.042658,0.255313
