**Analisis de columnas rating para el cálculo non_ig**

In [1]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))
from pyspark.sql import functions as F, DataFrame
import datetime as dt
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
from pyspark.sql.window import Window
import pyspark.sql.types as t
from decimal import Decimal
from pyspark.sql.functions import regexp_replace

In [2]:
from dataproc_sdk.dataproc_sdk_datiopysparksession.datiopysparksession import DatioPysparkSession
datioSparkSession = DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_datiopysparksession import datiopysparksession
dataproc = datiopysparksession.DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_schema.datioschema import DatioSchema
from dataproc_sdk.dataproc_sdk_datiofilesystem.datiofilesystem import DatioFileSystem

In [3]:
# para evitar problemas de tipología de datos
spark.conf.set("spark.sql.parquet.enableVectorizedReader", "false")

In [4]:
# para coger todas las columnas del fichero independientemente que hay fotos que o las tengan
spark.conf.set('spark.sql.parquet.mergeSchema', 'true')

# Configuracion

### Paths in

In [5]:
# base de datos de partida la salida de titulizaciones de joystick
path_titulizaciones = '/data/sandboxes/dslb/data/Joystick/mrr/joystick_mrr'

## Diccionarios
Se genera un diccionario para los valores de rating

In [6]:
# non_ig: marca limites peores a rating BB+
# rating que tomamos depende del tipo de titulizacion que se quiera realizar
non_ig_limit ={
    'corporate_loan': 'gf_ma_expanded_master_scale_id',
    'project_finance': 'g_lmscl_internal_ratg_type'
}

In [7]:
# NO SE USA: cogemos directamente la columna ind_rating
rating_dict = {"AAA": "01","AA+": "02","AA": "03","AA-": "04","A+": "05","A": "06","A-": "07","BBB+": "08","BBB+1": "09","BBB+2": "10",
               "BBB": "11","BBB1": "12","BBB2": "13","BBB-": "14","BBB-1": "15","BBB-2": "16","BB+": "17","BB+1": "18","BB+2": "19",
               "BB": "20","BB1": "21","BB2": "22","BB-": "23","BB-1": "24","BB-2": "25","B+": "26","B+1": "27","B+2": "28","B+3": "29",
               "B": "30","B1": "31","B2": "32","B3": "33","B-": "34","B-1": "35","B-2": "36","B-3": "37","CCC+": "38","CCC": "39",
               "CCC-": "40","CC+": "41","CC": "42","CC-": "43","C+": "44","C": "45","C-": "46","D": "47","D1": "48","D2": "49",
               "No Rating": "50","XXXX": "50","null":"50","No Informado":"50"}

# Funciones

In [8]:
# calculamos la fecha más reciente de la ruta tomando como campo de particion el pasado como parámetro
def last_partition (p_path:str, campo:str):
    
    datio_path = DatioFileSystem().get().qualify(p_path)
    fs = datio_path.fileSystem()
    path = datio_path.path()
    path_list = fs.listStatus(path)
    paths = [path.getPath().toString() for path in path_list] #listado de todos los paths de la ruta pasada
    
    l_fechas = [element.split(campo+'=')[1] for element in paths if campo in element] #listado de todas las fechas
    return max(l_fechas) # fecha mayor

# Tabla Titulizaciones
información que se comparte como punto de partida para generar titulizaciones

In [9]:
fecha_titus = last_partition (path_titulizaciones, 'clan_date')
fecha_titus

'20240926'

In [10]:
# fecha_titus=20240430

In [11]:
titus = dataproc.read().parquet(path_titulizaciones
                               ).where(F.col('clan_date')==fecha_titus)

In [12]:
# titus.printSchema()

In [13]:
titus.count()

1610

In [14]:
titus.show(5,False)

+-------------+------------------+---------+------------------------+-------------------------+------------------------------------------+--------------------------+--------------+------------+-----------+-----------------+---------------+----------------------------+-----------+------------------------+------------------+------------------------+---------------------------+------------------------------+-------------------------------+---------------+-------------------------+------------------+---------------+--------------------+------------------------------+------------------------------+----------------+--------------------+-----------------+---------------------+---------------------+-------------------------+---------------+-----------------------------+--------------------------+------------------------+----------------------------+---------------------+--------------------+----------------------+---------------------------+--------------------------+--------------------------

## Rating
Análisis rating para el cálculo del non_ig

In [15]:
# ind_inv_grade ya te dice si es investment grade o no
cols_rating= ['ind_inv_grade', # flag si es ig
              'ind_rating',# ind numerico rating interno
              'g_lmscl_internal_ratg_type','g_smscl_internal_ratg_type', # rating interno largo, corto
             'ma_expanded_master_scale_number', # ind numerico regulatorio
             'gf_ma_expanded_master_scale_id',] # regulatorio           

In [16]:
titus.select(*cols_rating).show(5,False)

+-------------+----------+--------------------------+--------------------------+-------------------------------+------------------------------+
|ind_inv_grade|ind_rating|g_lmscl_internal_ratg_type|g_smscl_internal_ratg_type|ma_expanded_master_scale_number|gf_ma_expanded_master_scale_id|
+-------------+----------+--------------------------+--------------------------+-------------------------------+------------------------------+
|0            |null      |null                      |null                      |33                             |B3                            |
|1            |06        |A                         |A                         |06                             |A                             |
|0            |12        |BBB1                      |BBB                       |15                             |BBB-1                         |
|1            |09        |BBB+1                     |BBB+                      |10                             |BBB+2                   

### Interno

In [17]:
titus.select('ind_rating','g_lmscl_internal_ratg_type').distinct(
).where(F.trim(F.col('g_lmscl_internal_ratg_type'))=='BB+1').show()

+----------+--------------------------+
|ind_rating|g_lmscl_internal_ratg_type|
+----------+--------------------------+
|        18|                      BB+1|
+----------+--------------------------+



In [18]:
titus.where(F.col('ind_rating')>18).count()

387

In [19]:
titus.select('ind_rating','g_lmscl_internal_ratg_type').distinct().orderBy('ind_rating').show(50,False)

+----------+--------------------------+
|ind_rating|g_lmscl_internal_ratg_type|
+----------+--------------------------+
|null      |null                      |
|null      |BBB3                      |
|null      |BBB6                      |
|null      |BBB4                      |
|null      |BBB5                      |
|01        |AAA                       |
|02        |AA+                       |
|03        |AA                        |
|04        |AA-                       |
|05        |A+                        |
|06        |A                         |
|07        |A-                        |
|09        |BBB+1                     |
|10        |BBB+2                     |
|12        |BBB1                      |
|13        |BBB2                      |
|15        |BBB-1                     |
|16        |BBB-2                     |
|18        |BB+1                      |
|19        |BB+2                      |
|20        |BB                        |
|21        |BB1                       |


### Regulatorio

In [17]:
titus.select('ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id').distinct(
).where(F.trim(F.col('gf_ma_expanded_master_scale_id'))=='BB+1').show()

+-------------------------------+------------------------------+
|ma_expanded_master_scale_number|gf_ma_expanded_master_scale_id|
+-------------------------------+------------------------------+
|                             18|                          BB+1|
+-------------------------------+------------------------------+



In [18]:
titus.where(F.col('ma_expanded_master_scale_number')>18).count()

725

In [22]:
titus.select('ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id').distinct().orderBy('ma_expanded_master_scale_number').show(50,False)

+-------------------------------+------------------------------+
|ma_expanded_master_scale_number|gf_ma_expanded_master_scale_id|
+-------------------------------+------------------------------+
|01                             |AAA                           |
|02                             |AA+                           |
|03                             |AA                            |
|04                             |AA-                           |
|05                             |A+                            |
|06                             |A                             |
|07                             |A-                            |
|09                             |BBB+1                         |
|10                             |BBB+2                         |
|12                             |BBB1                          |
|13                             |BBB2                          |
|15                             |BBB-1                         |
|16                      

### Indicador ind_inv
se ha calculado un indicador que dice si es investiment grade o no (dependiendo valor de rating interno)

**ahora mismo habría que ampliar la marca 'ind_inv_grade' hasta BB+1 o peor con g_lmscl_internal_ratg_type**

In [24]:
titus.where(F.col('ind_inv_grade')==1).select('ind_rating','g_lmscl_internal_ratg_type',
                                              'ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id'
                                             ).distinct().orderBy('ind_rating','ma_expanded_master_scale_number').show(100,False)

+----------+--------------------------+-------------------------------+------------------------------+
|ind_rating|g_lmscl_internal_ratg_type|ma_expanded_master_scale_number|gf_ma_expanded_master_scale_id|
+----------+--------------------------+-------------------------------+------------------------------+
|01        |AAA                       |01                             |AAA                           |
|02        |AA+                       |02                             |AA+                           |
|03        |AA                        |03                             |AA                            |
|04        |AA-                       |04                             |AA-                           |
|05        |A+                        |05                             |A+                            |
|05        |A+                        |06                             |A                             |
|05        |A+                        |10                             |BB

ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File "/opt/spark/dist/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1207, in send_command
    raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/spark/dist/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1033, in send_command
    response = connection.send_command(command)
  File "/opt/spark/dist/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1211, in send_command
    raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while receiving
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:46135)
Traceback (most recent call last):
  File "/opt/spark/dist/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 977, in _get_connection
  