**Analisis de columnas rating para el cálculo non_ig**

In [None]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))
from pyspark.sql import functions as F, DataFrame
import datetime as dt
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
from pyspark.sql.window import Window
import pyspark.sql.types as t
from decimal import Decimal
from pyspark.sql.functions import regexp_replace

In [None]:
from dataproc_sdk.dataproc_sdk_datiopysparksession.datiopysparksession import DatioPysparkSession
datioSparkSession = DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_datiopysparksession import datiopysparksession
dataproc = datiopysparksession.DatioPysparkSession().get_or_create()

from dataproc_sdk.dataproc_sdk_schema.datioschema import DatioSchema
from dataproc_sdk.dataproc_sdk_datiofilesystem.datiofilesystem import DatioFileSystem

In [None]:
# para evitar problemas de tipología de datos
spark.conf.set("spark.sql.parquet.enableVectorizedReader", "false")

In [None]:
# para coger todas las columnas del fichero independientemente que hay fotos que o las tengan
spark.conf.set('spark.sql.parquet.mergeSchema', 'true')

# Configuracion

### Paths in

In [None]:
# base de datos de partida la salida de titulizaciones de joystick
path_titulizaciones = '/data/sandboxes/dslb/data/Joystick/mrr/joystick_mrr'

## Diccionarios
Se genera un diccionario para los valores de rating

In [None]:
# non_ig: marca limites peores a rating BB+
# rating que tomamos depende del tipo de titulizacion que se quiera realizar
non_ig_limit ={
    'corporate_loan': 'gf_ma_expanded_master_scale_id',
    'project_finance': 'g_lmscl_internal_ratg_type'
}

In [None]:
# NO SE USA: cogemos directamente la columna ind_rating
rating_dict = {"AAA": "01","AA+": "02","AA": "03","AA-": "04","A+": "05","A": "06","A-": "07","BBB+": "08","BBB+1": "09","BBB+2": "10",
               "BBB": "11","BBB1": "12","BBB2": "13","BBB-": "14","BBB-1": "15","BBB-2": "16","BB+": "17","BB+1": "18","BB+2": "19",
               "BB": "20","BB1": "21","BB2": "22","BB-": "23","BB-1": "24","BB-2": "25","B+": "26","B+1": "27","B+2": "28","B+3": "29",
               "B": "30","B1": "31","B2": "32","B3": "33","B-": "34","B-1": "35","B-2": "36","B-3": "37","CCC+": "38","CCC": "39",
               "CCC-": "40","CC+": "41","CC": "42","CC-": "43","C+": "44","C": "45","C-": "46","D": "47","D1": "48","D2": "49",
               "No Rating": "50","XXXX": "50","null":"50","No Informado":"50"}

# Funciones

In [None]:
# calculamos la fecha más reciente de la ruta tomando como campo de particion el pasado como parámetro
def last_partition (p_path:str, campo:str):
    
    datio_path = DatioFileSystem().get().qualify(p_path)
    fs = datio_path.fileSystem()
    path = datio_path.path()
    path_list = fs.listStatus(path)
    paths = [path.getPath().toString() for path in path_list] #listado de todos los paths de la ruta pasada
    
    l_fechas = [element.split(campo+'=')[1] for element in paths if campo in element] #listado de todas las fechas
    return max(l_fechas) # fecha mayor

# Tabla Titulizaciones
información que se comparte como punto de partida para generar titulizaciones

In [None]:
fecha_titus = last_partition (path_titulizaciones, 'clan_date')
fecha_titus

In [None]:
# fecha_titus=20240430

In [None]:
titus = dataproc.read().parquet(path_titulizaciones
                               ).where(F.col('clan_date')==fecha_titus)

In [None]:
# titus.printSchema()

In [None]:
titus.count()

In [None]:
titus.show(5,False)

## Rating
Análisis rating para el cálculo del non_ig

In [None]:
# ind_inv_grade ya te dice si es investment grade o no
cols_rating= ['ind_inv_grade', # flag si es ig
              'ind_rating',# ind numerico rating interno
              'g_lmscl_internal_ratg_type','g_smscl_internal_ratg_type', # rating interno largo, corto
             'ma_expanded_master_scale_number', # ind numerico regulatorio
             'gf_ma_expanded_master_scale_id',] # regulatorio           

In [None]:
titus.select(*cols_rating).show(5,False)

### Interno

In [None]:
titus.select('ind_rating','g_lmscl_internal_ratg_type').distinct(
).where(F.trim(F.col('g_lmscl_internal_ratg_type'))=='BB+1').show()

In [None]:
titus.where(F.col('ind_rating')>18).count()

In [None]:
titus.select('ind_rating','g_lmscl_internal_ratg_type').distinct().orderBy('ind_rating').show(50,False)

### Regulatorio

In [None]:
titus.select('ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id').distinct(
).where(F.trim(F.col('gf_ma_expanded_master_scale_id'))=='BB+1').show()

In [None]:
titus.where(F.col('ma_expanded_master_scale_number')>18).count()

In [None]:
titus.select('ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id').distinct().orderBy('ma_expanded_master_scale_number').show(50,False)

### Indicador ind_inv
se ha calculado un indicador que dice si es investiment grade o no (dependiendo valor de rating interno)

**ahora mismo habría que ampliar la marca 'ind_inv_grade' hasta BB+1 o peor con g_lmscl_internal_ratg_type**

In [None]:
titus.where(F.col('ind_inv_grade')==1).select('ind_rating','g_lmscl_internal_ratg_type',
                                              'ma_expanded_master_scale_number','gf_ma_expanded_master_scale_id'
                                             ).distinct().orderBy('ind_rating','ma_expanded_master_scale_number').show(100,False)