In [2]:
import polars as pl
import plotly.express as px
import pandas as pd
from polars_visualization import pl_config as config
from freqSevEda import FreqSevEDA

# Data overview

- We have 1580 entries. Grouping by Loc ID and Año Póliza we saw that each policy is annual.

In [4]:
path = r"C:\Users\abrah\OneDrive\Desktop\casoCAS\archivo\data\final-dataset.xlsx"
df = pl.read_excel(path)

with config():
    print(df)

shape: (1_580, 21)
| Loc ID | Año Póliza | Exposicion | Suma Asegurada | Prima   | Evento ID | Numero Siniestros | Mes  | Duración de la inundación (día… | Severidad de la inundación (es… | Precipitación (mm) | Incremento del Nivel del Río (… | Monto de siniestro | Latitud | Longitud | Ciudad    | Pais         | Codigo pais | Continente | Sub continente     | Continente intermedio |
|--------|------------|------------|----------------|---------|-----------|-------------------|------|---------------------------------|---------------------------------|--------------------|---------------------------------|--------------------|---------|----------|-----------|--------------|-------------|------------|--------------------|-----------------------|
| loc001 | 2,014      | 1          | 87,223,100     | 659,145 | null      | 0                 | null | null                            | null                            | null               | null                            | 0                  | 

In [6]:
eda = FreqSevEDA(df, exposure="Exposicion", claimNb="Numero Siniestros", claimAmount="Monto de siniestro")
eda.interactive_graph()

Dropdown(description='Columns:', options=('-', 'Loc ID', 'Año Póliza', 'Exposicion', 'Suma Asegurada', 'Prima'…

Output()

# Description by columns

- We have negative values for TIV (Is this a mistake?) how should we deal with those values? We have only one location with negative TIV loc068. In this case we will take absolute value of the TIV column, in that way we don't lose information.
- Information between years 2014 and 2023
- Is premium in USD dolars?
- Duration floods between 4 and 10 days
- Severity value between 2 and 5. Why we don't have value 1?
- Precipitation between 100 mm and 310 mm
- Raising of river between 1.2 M and 4.5 M

In [7]:
with config():
    print(df.describe())

#lets fix TIV values 

df = df.with_columns(pl.col("Suma Asegurada").abs())


shape: (9, 22)
| statistic  | Loc ID | Año Póliza | Exposicion | Suma Asegurada    | Prima          | Evento ID | Numero Siniestros | Mes       | Duración de la inundación (día… | Severidad de la inundación (es… | Precipitación (mm) | Incremento del Nivel del Río (… | Monto de siniestro | Latitud   | Longitud  | Ciudad | Pais      | Codigo pais | Continente | Sub continente            | Continente intermedio |
|------------|--------|------------|------------|-------------------|----------------|-----------|-------------------|-----------|---------------------------------|---------------------------------|--------------------|---------------------------------|--------------------|-----------|-----------|--------|-----------|-------------|------------|---------------------------|-----------------------|
| count      | 1580   | 1,580.000  | 1,580.000  | 1,580.000         | 1,580.000      | 188.000   | 1,580.000         | 188.000   | 188.000                         | 188.000               

# Information against Año Póliza

- Precipitación Promedio per years tends to increase

In [15]:
(df.group_by("Pais").agg(pl.col("Exposicion").sum().alias("Exposicion"),
                              pl.col("Suma Asegurada").sum(),
                              pl.col("Prima").sum().alias("Prima Ganada"),
                              pl.col("Evento ID").drop_nulls().n_unique().alias("Número Eventos"),
                              pl.col("Numero Siniestros").sum(),
                              pl.col("Duración de la inundación (días)").mean().alias("Duración Promedio"),
                              pl.col("Severidad de la inundación (escala 1-5)").mean().alias("Magnitud Inundaciones Promedio"),
                              pl.col("Precipitación (mm)").mean().alias("Precipitación Promedio"),
                              pl.col("Incremento del Nivel del Río (m)").mean().alias("Incremento Promedio"),
                              pl.col("Monto de siniestro").sum().alias("Incurrido"))
                         .with_columns((pl.col("Incurrido")/pl.col("Prima Ganada")).alias("Indice de Siniestralidad"),
                                       (pl.col("Prima Ganada")/pl.col("Suma Asegurada")*1000).alias("Tasa por Mil"),
                                       (pl.col("Incurrido")/pl.col("Suma Asegurada")).alias("TPR"),
                                       (pl.col("Numero Siniestros")/pl.col("Exposicion")).alias("Frecuencia"),
                                       (pl.col("Incurrido")/pl.col("Numero Siniestros")).alias("Severidad"))
                         .with_columns((pl.col("Frecuencia")*pl.col("Severidad")).alias("Prima Pura"))
                         .with_columns((pl.col("Prima Pura")/pl.col("Prima Ganada")).alias("proportion"))
                         .sort(by = "Prima Pura", descending=False))


Pais,Exposicion,Suma Asegurada,Prima Ganada,Número Eventos,Numero Siniestros,Duración Promedio,Magnitud Inundaciones Promedio,Precipitación Promedio,Incremento Promedio,Incurrido,Indice de Siniestralidad,Tasa por Mil,TPR,Frecuencia,Severidad,Prima Pura,proportion
str,i64,i64,i64,u32,i64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64
"""Argentina""",70,5444474010,40719191,2,7,5.428571,2.857143,147.142857,2.1,1230000,0.030207,7.478994,0.000226,0.1,175714.285714,17571.428571,0.000432
"""Brazil""",70,10707066620,66963252,2,7,4.571429,2.571429,151.428571,1.871429,1400000,0.020907,6.254117,0.000131,0.1,200000.0,20000.0,0.000299
"""South Africa""",80,6805602900,58991325,2,8,7.0,4.0,170.0,2.3,1660000,0.02814,8.668053,0.000244,0.1,207500.0,20750.0,0.000352
"""New Zealand""",80,8436502580,80166073,2,8,5.0,3.0,155.0,1.8,1680000,0.020956,9.502287,0.000199,0.1,210000.0,21000.0,0.000262
"""Chile""",60,5048979620,22885281,2,6,5.0,2.5,145.0,1.75,1320000,0.057679,4.532655,0.000261,0.1,220000.0,22000.0,0.000961
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""India""",60,22292004080,162439295,2,6,8.666667,5.0,256.666667,4.0,8500000,0.052327,7.286886,0.000381,0.1,1.4167e6,141666.666667,0.000872
"""Japan""",40,15972841700,115625035,2,4,8.0,4.5,230.0,3.0,6500000,0.056216,7.238852,0.000407,0.1,1.625e6,162500.0,0.001405
"""Germany""",30,9245259740,86489322,2,6,9.0,5.0,210.0,3.0,5090000,0.058851,9.354991,0.000551,0.2,848333.333333,169666.666667,0.001962
"""Indonesia""",50,25781744720,148689033,2,5,9.4,5.0,306.0,4.26,10500000,0.070617,5.767222,0.000407,0.1,2.1e6,210000.0,0.001412
