In [1]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))
from pyspark.sql import functions as F, DataFrame
import datetime as dt
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
from pyspark.sql.window import Window
import pyspark.sql.types as t
from decimal import Decimal
from pyspark.sql.functions import regexp_replace

In [2]:
from dataproc_sdk.dataproc_sdk_datiopysparksession.datiopysparksession import DatioPysparkSession
datioSparkSession = DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_datiopysparksession import datiopysparksession
dataproc = datiopysparksession.DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_schema.datioschema import DatioSchema
from dataproc_sdk.dataproc_sdk_datiofilesystem.datiofilesystem import DatioFileSystem

In [3]:
# para evitar problemas de tipología de datos
spark.conf.set("spark.sql.parquet.enableVectorizedReader", "false")

In [4]:
# para coger todas las columnas del fichero independientemente que hay fotos que o las tengan
spark.conf.set('spark.sql.parquet.mergeSchema', 'true')

# Configuracion

### Paths in

In [5]:
# limites fijados por el inversor, ya tratados en nuestro proceso (notebook previo de procesamiento del launchpad)
path_limites = '/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/limites/current/'

In [6]:
# base de datos de partida la salida de titulizaciones de joystick
path_titulizaciones = '/data/sandboxes/dslb/data/Joystick/mrr/joystick_mrr'

In [7]:
# adaptar los sectores que tenemos a nivel sector-subsector
path_catalogo_sector_project = '/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/catalogos/sectores_proyecto.csv'

In [8]:
# tabla para el cost income
path_ci = "/data/master/xivg/data/t_xivg_cost_income"
# '/data/master/finance/investments_cost/xivg/data/t_xivg_cost_income/' (OLD)

### Paths out

In [9]:
root_path = '/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date='+format(date.today().strftime("%Y-%m-%d"))
root_path

'/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-10-22'

In [10]:
path_facilities = root_path + '/facilities'

In [11]:
path_limites_only = root_path + '/limites'

In [12]:
path_constantes = root_path + '/constants'

## Diccionarios
Se genera un diccionario para los valores de rating

In [13]:
# non_ig: marca limites peores a rating BB+ (numérico: 18) (BB+1 escala larga)
# rating que tomamos depende del tipo de titulizacion que se quiera realizar
non_ig_limit ={
    'corporate_loan': {'categoria':'gf_ma_expanded_master_scale_id', 'valor':'ma_expanded_master_scale_number'}, # rating regulatorio para tipologia corporate
    'project_finance': {'categoria':'g_lmscl_internal_ratg_type', 'valor':'ind_rating'} # rating interno para tipologia project
}

# Funciones

In [14]:
# calculamos la fecha más reciente de la ruta tomando como campo de particion el pasado como parámetro
def last_partition (p_path:str, campo:str):
    
    datio_path = DatioFileSystem().get().qualify(p_path)
    fs = datio_path.fileSystem()
    path = datio_path.path()
    path_list = fs.listStatus(path)
    paths = [path.getPath().toString() for path in path_list] #listado de todos los paths de la ruta pasada
    
    l_fechas = [element.split(campo+'=')[1] for element in paths if campo in element] #listado de todas las fechas
    return max(l_fechas) # fecha mayor

# Orígenes de Datos

## 1. Límites
información con los límites del inversor activos en la foto actual
- Ya filtrados para los que aplican a la titulizacion en curso (Activos, Project/Corporate y STS)

In [15]:
limites =dataproc.read().parquet(path_limites)

In [16]:
limites.show(5,False)

+-------------------------+----------+------------------+--------------+---------------------------------------+-------------+--------------+-----------+-------------------+--------------------+-----------+-----------+------------+-------------+--------+------------+
|name_list_desc           |limit_date|limit_type        |concept1_desc |concept1_value                         |concept2_desc|concept2_value|limit_value|corporate_loan_flag|project_finance_flag|limit_scope|active_flag|visual_order|complex_limit|id_limit|closing_date|
+-------------------------+----------+------------------+--------------+---------------------------------------+-------------+--------------+-----------+-------------------+--------------------+-----------+-----------+------------+-------------+--------+------------+
|escenario model verano IV|2024-06-24|customer_subsector|subsector_desc|Paper, plastic, metal & glass packaging|null         |null          |0.5        |1                  |0                   |po

### Tipo Titulizacion
pueden ser Corporate/Project

In [17]:
tipo_titulizacion = limites.where(F.col('limit_type')=='portfolio_type').select('corporate_loan_flag','project_finance_flag')
corporate_flag = tipo_titulizacion.select('corporate_loan_flag').collect()[0].corporate_loan_flag
project_flag = tipo_titulizacion.select('project_finance_flag').collect()[0].project_finance_flag

if(corporate_flag==1):
    tipo_titulizacion = 'corporate_loan'
    print('titulizacion de corporate loan')
if(project_flag==1):
    tipo_titulizacion = 'project_finance'
    print('titulizacion de project finance')   

titulizacion de corporate loan


### Fecha Launchpad
para saber al fecha de carga que estamos usando

In [18]:
fecha_launchpad = limites.select('closing_date').distinct().collect()[0].closing_date
print('fecha del launchpad:', fecha_launchpad)

fecha del launchpad: 2024-10-15


## 2. Tabla Titulizaciones
información que se comparte como punto de partida para generar titulizaciones

In [19]:
fecha_titus = last_partition (path_titulizaciones, 'clan_date')
fecha_titus

'20241021'

In [20]:
titus = dataproc.read().parquet(path_titulizaciones
                               ).where(F.col('clan_date')==fecha_titus)

In [21]:
# titus.printSchema()

In [22]:
titus.count()

1615

In [23]:
titus.show(5,False)

+-------------+------------------+---------+------------------------+-------------------------+------------------------------------------+--------------------------+--------------+------------+-----------+-----------------+---------------+----------------------------+-----------+------------------------+------------------+------------------------+---------------------------+------------------------------+-------------------------------+---------------+-------------------------+------------------+---------------+--------------------+------------------------------+------------------------------+----------------+--------------------+-----------------+---------------------+---------------------+-------------------------+---------------+-----------------------------+--------------------------+------------------------+----------------------------+---------------------+--------------------+----------------------+---------------------------+--------------------------+--------------------------

In [24]:
key_t = ['delta_file_id','delta_file_band_id','branch_id']
if (titus.groupBy(*key_t).agg(F.count('delta_file_id').alias('n')).where(F.col('n')>1).count()>1):
    print('Hay duplicados a nivel facility')

In [25]:
# SI HUBIERA DUPLICADOS a nivel key_t, si hay duplicados a nivel 'delta_file_id','delta_file_band_id', unificamos registros a este nivel
# si queremos ultimo status por operacion
# key_fac = ['delta_file_id','delta_file_band_id','branch_id']
# window = Window.partitionBy(*key_fac).orderBy(F.col("clan_date").desc())

# titus = dataproc.read().parquet(path_titulizaciones
#                                ).withColumn("rn", F.row_number().over(window)
#                                ).where(F.col("rn") == 1).drop('rn')  

# analizar fechas usadas de las 2 bases de datos origen
# titus.select('clan_date','basemoto_date','ifrs9_date').distinct().orderBy('clan_date','basemoto_date','ifrs9_date').show(50,False)

# si leemos csv
# titus = spark.read.option('header','True').option('delimiter',',').csv(path_titulizaciones_csv)

In [26]:
# columnas disponibles
# sorted(titus.columns)

**PDTE: datos de garante bei pdte de una incidencia debido al descenso del volumen con esta tipologia**

In [27]:
cols_bei=['non_bei_guaranteed_amount','bei_guaranteed_amount']
titus.select(*cols_bei).show(5,False)

+-------------------------+---------------------+
|non_bei_guaranteed_amount|bei_guaranteed_amount|
+-------------------------+---------------------+
|25978982.88831483        |null                 |
|null                     |null                 |
|null                     |null                 |
|133976420.17000000       |null                 |
|0E-8                     |null                 |
+-------------------------+---------------------+
only showing top 5 rows



In [28]:
# sorted(titus.columns)

## 3. Catálogo Sector Proyecto
Se carga el catálogo con la relación Sector - Subsector

In [29]:
sector_proj = spark.read.option('header','True').option('delimiter',';').csv(path_catalogo_sector_project
                        ).withColumn('project_sector_desc', F.trim('project_sector_desc'))

In [30]:
sector_proj.show(5,False)

+------------------------------------------+----------------------+
|project_sector_desc                       |project_subsector_desc|
+------------------------------------------+----------------------+
|PF-ENE/O&G/GAS/GAS STORAGE                |GAS                   |
|PF-ENE/O&G/GAS/GAS TRANSPORTATION NETWORKS|GAS                   |
|PF-ENE/O&G/GAS/GAS VESSEL                 |GAS                   |
|PF-ENE/O&G/GAS/LIQUEFACTION PLANT         |GAS                   |
|PF-ENE/O&G/GAS/REGASIFICATION PLANT       |GAS                   |
+------------------------------------------+----------------------+
only showing top 5 rows



In [31]:
print('Número de sectores de proyecto: ',sector_proj.select('project_sector_desc').distinct().count())

Número de sectores de proyecto:  36


## 4. Cost Income
costes de explotación

In [32]:
path_ci

'/data/master/xivg/data/t_xivg_cost_income'

In [33]:
fecha_income = last_partition (path_ci, 'gf_cutoff_date')

In [34]:
df = dataproc.read().parquet(path_ci).show(5,False)

+-------------------+-------------------+----------------------------+----------------------------+--------------------------------+-------------------+----------------+-------------------+--------------+
|gf_head_office_desc|gf_business_area_id|gf_total_gross_margin_amount|gf_operating_expenses_amount|gf_customer_contract_control_per|gf_audit_date      |gf_odate_date_id|gf_run_id          |gf_cutoff_date|
+-------------------+-------------------+----------------------------+----------------------------+--------------------------------+-------------------+----------------+-------------------+--------------+
|ARGENTINA          |GTB                |3232348799.57               |-62819751.520000            |0.019434707                     |2024-07-03 08:49:38|20240702        |202407030847401rde7|2019-02-28    |
|CHILE              |GF                 |8385224776.68               |-272375836.490000           |0.032482831                     |2024-07-03 08:49:38|20240702        |20240703084

In [35]:
area = 'GF'

df_ic = dataproc.read().parquet(path_ci).where(F.col('gf_cutoff_date')==fecha_income
                                              ).where(F.col('gf_business_area_id')==area
                                              ).select('gf_customer_contract_control_per','gf_head_office_desc')
df_ic.orderBy('gf_customer_contract_control_per').show(5,False)

+--------------------------------+-------------------+
|gf_customer_contract_control_per|gf_head_office_desc|
+--------------------------------+-------------------+
|0.003649668                     |COLOMBIA           |
|0.047890240                     |MEXICO             |
|0.048922224                     |ARGENTINA          |
|0.056122127                     |PERU               |
|0.075001987                     |VENEZUELA          |
+--------------------------------+-------------------+
only showing top 5 rows



In [36]:
# +-----------------------------+------------------+
# |customer_contract_control_per|head_office_des_id|
# +-----------------------------+------------------+
# |0.003649668                  |COLOMBIA          |
# |0.047890240                  |MEXICO            |
# |0.048922224                  |ARGENTINA         |
# |0.056122127                  |PERU              |
# |0.075001987                  |VENEZUELA         |

# Generación Columnas
Nuevas columnas para resolver los límites marcados por el inversor

## 1. Subsector proyecto
Añadimos una columna para indicar el subsector

In [37]:
# tenemos mas sectores en las titulizaciones que en el catálogo de relación sector-subsector
print('Número de sectores de proyecto en titulizaciones: ',titus.select('project_sector_desc').distinct().count())

Número de sectores de proyecto en titulizaciones:  62


In [38]:
# sectores incluidos en la tabla de titulizaciones
# titus.groupBy('project_sector_desc').count().orderBy('project_sector_desc').show(10,False)

In [39]:
titus_1 = titus.withColumn('project_sector_desc', F.trim('project_sector_desc')
                          ).join(sector_proj,['project_sector_desc'],'left').fillna('No Informado')

In [40]:
titus_1.show(5,False)

+------------------------------------------+-------------+------------------+---------+------------------------+-------------------------+--------------------------+--------------+------------+-----------+-----------------+---------------+----------------------------+-----------+------------------------+------------------+------------------------+---------------------------+------------------------------+-------------------------------+---------------+-------------------------+------------------+---------------+--------------------+------------------------------+------------------------------+----------------+--------------------+-----------------+---------------------+---------------------+-------------------------+---------------+-----------------------------+--------------------------+------------------------+----------------------------+---------------------+--------------------+----------------------+---------------------------+--------------------------+--------------------------

In [41]:
# listado con la tipologia sector-subsector generado en la tabla de titulizaciones
# titus_1.groupBy('project_sector_desc','project_subsector_desc'
#              ).count().orderBy('project_sector_desc','project_subsector_desc'
#              ).show(100,False)

In [42]:
# si queremos el sector-subsector de la actividad del cliente en lugar del proyecto
# titus.groupBy('g_asset_allocation_sector_desc','g_asset_allocation_subsec_desc'
#              ).count().orderBy('g_asset_allocation_sector_desc','g_asset_allocation_subsec_desc'
#              ).show(100,False)

## 2. Marca ICO
flag para indicar si es un tipo de préstamo con financiación ICO

In [43]:
# deal_purpose_type="ICO España"
titus_2 = titus_1.withColumn('ico_flag', F.when(F.trim(F.col('deal_purpose_type'))=="ICO España",1).otherwise(0))
titus_2.groupBy('deal_purpose_type','ico_flag').count().show(20,False)                        

+--------------------------+--------+-----+
|deal_purpose_type         |ico_flag|count|
+--------------------------+--------+-----+
|Other uses                |0       |47   |
|Working Capital line      |0       |19   |
|Capex line                |0       |76   |
|Back-Up line              |0       |8    |
|Project Finance           |0       |307  |
|Purchase of Assets        |0       |2    |
|Pre-Export Credit Line    |0       |4    |
|ICO España                |1       |4    |
|General Corporate Purposes|0       |958  |
|Acquisition line          |0       |58   |
|Asset Finance             |0       |12   |
|Intra-Group Loan          |0       |1    |
|Debt Service Cover line   |0       |30   |
|Public Finance            |0       |1    |
|Bridge                    |0       |10   |
|No Informado              |0       |78   |
+--------------------------+--------+-----+



## 3. Marca Non-IG
flag para indicar que el rating es BB+1 o peor
- depende del tipo de titulizacion:Corporate/Project el rating que se toma es distinto

In [44]:
col_rating_categ = non_ig_limit[tipo_titulizacion]['categoria']
col_rating_categ

'gf_ma_expanded_master_scale_id'

In [45]:
col_rating_pos = non_ig_limit[tipo_titulizacion]['valor']
col_rating_pos

'ma_expanded_master_scale_number'

In [46]:
n_rating = [x[col_rating_pos] for x in titus.select(col_rating_pos).where(F.col(col_rating_categ)=='BB+1').distinct().collect()][0]
print('BB+1 en valor numérico:',n_rating)

BB+1 en valor numérico: 18


In [47]:
# titus.select('ind_rating','g_lmscl_internal_ratg_type').distinct().orderBy('ind_rating').show(100,False)

In [48]:
# los triple b tampoco tienen que ir marcados porque su rating es mejor que BB+1
titus_3 = titus_2.withColumn('non_ig_flag', F.when(((F.col(col_rating_pos)>=n_rating)&(~(F.col(col_rating_categ).like('BBB%')))),1
                                                   ).otherwise(0))

In [49]:
titus_3.groupBy('non_ig_flag').count().show()

+-----------+-----+
|non_ig_flag|count|
+-----------+-----+
|          1|  717|
|          0|  898|
+-----------+-----+



In [50]:
# investment grade: sería el opuesto a non_ig_flag, pero hay ciertos rating que no vienen reflejados, así usamos diccionario
titus_3.where(F.col('non_ig_flag')==1).groupBy(col_rating_pos,col_rating_categ
                                              ).count().orderBy(col_rating_pos,col_rating_categ).show(50,False)

+-------------------------------+------------------------------+-----+
|ma_expanded_master_scale_number|gf_ma_expanded_master_scale_id|count|
+-------------------------------+------------------------------+-----+
|18                             |BB+1                          |3    |
|19                             |BB+2                          |84   |
|21                             |BB1                           |28   |
|22                             |BB2                           |36   |
|24                             |BB-1                          |4    |
|25                             |BB-2                          |40   |
|27                             |B+1                           |13   |
|28                             |B+2                           |14   |
|29                             |B+3                           |20   |
|31                             |B1                            |1    |
|32                             |B2                            |3    |
|33   

## 4. Marca Proyecto en construccion
flag para indicar si el proyecto está en construcción

In [51]:
titus_4 = titus_3.withColumn('building_project_flag', F.when(F.trim(F.col('gf_pf_project_const_type'))=='S',1).otherwise(0))
titus_4.groupBy('building_project_flag','gf_pf_project_const_type').count().show(20,False)

+---------------------+------------------------+-----+
|building_project_flag|gf_pf_project_const_type|count|
+---------------------+------------------------+-----+
|0                    |No Informado            |1140 |
|0                    |N                       |260  |
|1                    |S                       |215  |
+---------------------+------------------------+-----+



## 5. Marca Workout
flag para indicar si está en foco de mora
- watch_list_clasification_type (si es 1 o 2 -> esta en foco workout)

In [52]:
titus_5 = titus_4.withColumn('workout_flag', F.when(F.trim(F.col('watch_list_clasification_type'))!=0,1).otherwise(0))
titus_5.groupBy('workout_flag','watch_list_clasification_type').count().show(20,False)

+------------+-----------------------------+-----+
|workout_flag|watch_list_clasification_type|count|
+------------+-----------------------------+-----+
|0           |0                            |1600 |
|1           |1                            |1    |
|1           |2                            |14   |
+------------+-----------------------------+-----+



## 6. Flag cumple pago STS
convertir columna booleana a formato flag
- (*) problemas en MicroStrategy si no cambiamos a este formato y mejor para hacer conteos

In [53]:
titus_6 = titus_5.withColumn('sts_payment_flag',F.when(F.col('sts_payment_condition')=='true', 1).otherwise(0))
titus_6.groupBy('sts_payment_flag','sts_payment_condition').count().orderBy('sts_payment_flag','sts_payment_condition').show(50,False)

+----------------+---------------------+-----+
|sts_payment_flag|sts_payment_condition|count|
+----------------+---------------------+-----+
|0               |false                |65   |
|1               |true                 |1550 |
+----------------+---------------------+-----+



## 7. Flag condicion RW en STS
convertir columna booleana a formato flag
- (*) problemas en MicroStrategy si no cambiamos a este formato y mejor para hacer conteos

In [54]:
titus_7 = titus_6.withColumn('sts_sm_rw_flag',F.when(F.col('sts_sm_rw_condition')=='true', 1).otherwise(0))
titus_7.groupBy('sts_sm_rw_flag','sts_sm_rw_condition').count().orderBy('sts_sm_rw_flag','sts_sm_rw_condition').show(50,False)

+--------------+-------------------+-----+
|sts_sm_rw_flag|sts_sm_rw_condition|count|
+--------------+-------------------+-----+
|0             |false              |26   |
|1             |true               |1589 |
+--------------+-------------------+-----+



## 8. Flag ESG
Añadimos marca al tablon de datos

**usando marca generada en titulizaciones**

In [55]:
titus_8 = titus_7.withColumn('esg_linked_flag',F.when(F.col('esg_linked')==1,1).otherwise(0))
titus_8.groupBy('esg_linked_flag','esg_linked').count().orderBy('esg_linked_flag').show(50,False)

+---------------+----------+-----+
|esg_linked_flag|esg_linked|count|
+---------------+----------+-----+
|0              |null      |35   |
|0              |0         |1335 |
|1              |1         |245  |
+---------------+----------+-----+



## 9. Flag BEI
generamos flag para saber si el garante el el Banco Europeo Inversiones BEI

In [56]:
titus_9 = titus_8.withColumn('bei_flag',F.when(((F.col('bei_guaranteed_amount')!=0)&(F.col('bei_guaranteed_amount').isNotNull())),1).otherwise(0))
titus_9.groupBy('bei_flag','bei_guaranteed_amount').count().orderBy('bei_flag').show(50,False)

+--------+---------------------+-----+
|bei_flag|bei_guaranteed_amount|count|
+--------+---------------------+-----+
|0       |null                 |1604 |
|0       |0E-8                 |11   |
+--------+---------------------+-----+



## 10. Fecha de los datos
dejamos una columna formato fecha de los datos cogidos

In [57]:
mascara = "%Y-%m-%d"
fecha = str(fecha_titus)[0:4] + '-' + str(fecha_titus)[4:6] + '-' + str(fecha_titus)[6:8]
fecha_datos = datetime.strptime(fecha, mascara).date()

In [58]:
titus_10 = titus_9.withColumn('data_date',F.lit(fecha_datos))
titus_10.select('data_date').distinct().show()

+----------+
| data_date|
+----------+
|2024-10-21|
+----------+



# Constantes

In [59]:
constantes = ['tax_rate', # disponible Datio
              'coupon','crr_b','crr_c','crr_d','crr_e','crr_upper_A', # pasados en el launchpad
             'lgd', # por launchpad si es project finance y dato desde Datio si es tipo corporate loan
             'detach', # launchpad
             'ratio_cet1', # disponible Datio(joystick)
             'attachment_point', # launchpad
             'ci_ratio',] # disponible Datio (joystick?), ponderado por producto y geografia]

In [60]:
# inicializamos lista de constantes Datio 
collect_df=[]

## Incluidas en Limites
Recogemos las constantes incluidas en el launchpad de los limites

In [61]:
df_constantes = limites.where(F.col('limit_type')=='constant_type'
                             ).select(F.col('concept1_desc').alias('constant_type'),
                                      F.col('limit_value').alias('constant_value'))
df_constantes.show()

+----------------+--------------+
|   constant_type|constant_value|
+----------------+--------------+
|               C|         -1.85|
|         upper_A|           0.0|
|               B|          3.56|
|               E|          0.07|
|          Detach|           1.0|
|               D|          0.55|
|Attachment_Point|           0.1|
|          Coupon|          0.05|
+----------------+--------------+



## 1. Tasa impositiva
porcentaje de impuestos a pagar por los préstamos

In [62]:
# Usando Rubik
from rubik.load.rorc import Values as RORCvalues
get_rorc_values = RORCvalues(path="/data", dataproc=dataproc)
tax_rate = get_rorc_values.TaxRate()
tax_rate

0.3

In [63]:
collect_df.append({"constant_type" : 'tax_rate', "constant_value" : tax_rate})

## 2. Ratio CET1
porcentaje sobre los activos ponderado por riesgo

In [64]:
# Usando Rubik
from rubik.load.rorc import Values as RORCvalues
get_rorc_values = RORCvalues(path="/data", dataproc=dataproc)
ratio_cet1 = get_rorc_values.CET1()
ratio_cet1

0.12

In [65]:
collect_df.append({"constant_type" : 'ratio_cet1', "constant_value" : ratio_cet1})

## 3. LGD
Loss given default (porcentaje de pérdida condicional)
- Si es coporate loan: se calcula el dato a posteriori de saber la cartera a titulizar

In [66]:
# dependiendo de la tipología de la titulización

# (corporate_loan) lgd dentro de titulizaciones: el valor de la LGD ponderada de la cartera subyacente a titulizar
# adj_lgd_ma_mitig_per : viene a nivel operación, hay que dejar valor ponderado de todos los préstamos que entren en la cartera óptima a titulizar

# (project finance) : se coge el marcado en el launchpad aunque esté marcado tb por variable a titulizar gf_pf_final_lgd_amount 

In [67]:
lgd = titus.agg(F.avg(F.col("adj_lgd_ma_mitig_per")).cast('float').alias('lgd')
               ).collect()[0].lgd
lgd

0.40471550822257996

In [68]:
collect_df.append({"constant_type" : 'lgd', "constant_value" : lgd})

## 4. CI RATIO
Ratio cost-to-income (entra en la fórmula del RORC, son los gastos de explotación medios del área, expresados en porcentaje del margen bruto)

In [69]:
ci_ratio = df_ic.where(F.trim(F.col('gf_head_office_desc'))=='ESPAÑA'
                      ).select(F.col('gf_customer_contract_control_per').cast('float')
                              ).collect()[0].gf_customer_contract_control_per
ci_ratio

0.09063776582479477

In [70]:
collect_df.append({"constant_type" : 'ci_ratio', "constant_value" : ci_ratio})

# Persistimos en Sbx

## Tabla de constantes
Incluimos en esta tabla:
- las constantes incluidas en el launchpad de limites
- las constantes generadas

In [71]:
df = spark.createDataFrame(collect_df)

In [72]:
df_constantes_t = df.union(df_constantes).withColumn('closing_date', F.lit(fecha_launchpad)) #incluimos la fecha de la particion del launchpad usada

In [73]:
df_constantes_t.show()

+----------------+-------------------+------------+
|   constant_type|     constant_value|closing_date|
+----------------+-------------------+------------+
|        tax_rate|                0.3|  2024-10-15|
|      ratio_cet1|               0.12|  2024-10-15|
|             lgd|0.40471550822257996|  2024-10-15|
|        ci_ratio|0.09063776582479477|  2024-10-15|
|               C| -1.850000023841858|  2024-10-15|
|         upper_A|                0.0|  2024-10-15|
|               B|  3.559999942779541|  2024-10-15|
|               E|0.07000000029802322|  2024-10-15|
|          Detach|                1.0|  2024-10-15|
|               D|  0.550000011920929|  2024-10-15|
|Attachment_Point|0.10000000149011612|  2024-10-15|
|          Coupon|0.05000000074505806|  2024-10-15|
+----------------+-------------------+------------+



In [74]:
df_constantes_t.write.parquet(path_constantes, mode='overwrite')

In [75]:
path_constantes

'/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-10-22/constants'

## Tabla de limites
Incluimos en esta tabla exclusivamente limites

In [76]:
limites.where(F.col('limit_type')!='constant_type').write.parquet(path_limites_only, mode='overwrite')

In [77]:
path_limites_only

'/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-10-22/limites'

## Tabla de facilities
Incluimos todas las operaciones disponibles a carterizar

In [78]:
titus_10.write.parquet(path_facilities,mode='overwrite')

In [79]:
path_facilities

'/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-10-22/facilities'

# TEST

In [80]:
path = '/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/'
part = 'closing_date'
f = last_partition(path,part)

In [81]:
path_l = path + part + '='+ str(f) + '/facilities'
path_l

'/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-10-22/facilities'

In [82]:
# si quiere leer otra partición de diferente dia
# path_facilities = '/data/sandboxes/dslb/data/Joystick/TITULIZACIONES/cartera_optima/closing_date=2024-08-02/facilities'

In [83]:
facilities = dataproc.read().parquet(path_l)
facilities.show(5,False)

+------------------------------------------+-------------+------------------+---------+------------------------+-------------------------+--------------------------+--------------+------------+-----------+-----------------+---------------+----------------------------+-----------+------------------------+------------------+------------------------+---------------------------+------------------------------+-------------------------------+---------------+-------------------------+------------------+---------------+--------------------+------------------------------+------------------------------+----------------+--------------------+-----------------+---------------------+---------------------+-------------------------+---------------+-----------------------------+--------------------------+------------------------+----------------------------+---------------------+--------------------+----------------------+---------------------------+--------------------------+--------------------------

In [84]:
# marca según el rating interno
# facilities.groupBy('non_ig_flag').count().show()

In [85]:
# sorted(facilities.columns)